1 2 /* 3 Basic functions for basic parallel dense matrices. 4 */ 5 6 #include <../src/mat/impls/dense/mpi/mpidense.h> /*I "petscmat.h" I*/ 7 #include <../src/mat/impls/aij/mpi/mpiaij.h> 8 #include <petscblaslapack.h> 9 10 /*@ 11 MatDenseGetLocalMatrix - For a `MATMPIDENSE` or `MATSEQDENSE` matrix returns the sequential 12 matrix that represents the operator. For sequential matrices it returns itself. 13 14 Input Parameter: 15 . A - the sequential or MPI dense matrix 16 17 Output Parameter: 18 . B - the inner matrix 19 20 Level: intermediate 21 22 .seealso: `MATDENSE`, `MATMPIDENSE`, `MATSEQDENSE` 23 @*/ 24 PetscErrorCode MatDenseGetLocalMatrix(Mat A, Mat *B) { 25 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 26 PetscBool flg; 27 28 PetscFunctionBegin; 29 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 30 PetscValidPointer(B, 2); 31 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIDENSE, &flg)); 32 if (flg) *B = mat->A; 33 else { 34 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQDENSE, &flg)); 35 PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Not for matrix type %s", ((PetscObject)A)->type_name); 36 *B = A; 37 } 38 PetscFunctionReturn(0); 39 } 40 41 PetscErrorCode MatCopy_MPIDense(Mat A, Mat B, MatStructure s) { 42 Mat_MPIDense *Amat = (Mat_MPIDense *)A->data; 43 Mat_MPIDense *Bmat = (Mat_MPIDense *)B->data; 44 45 PetscFunctionBegin; 46 PetscCall(MatCopy(Amat->A, Bmat->A, s)); 47 PetscFunctionReturn(0); 48 } 49 50 PetscErrorCode MatShift_MPIDense(Mat A, PetscScalar alpha) { 51 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 52 PetscInt j, lda, rstart = A->rmap->rstart, rend = A->rmap->rend, rend2; 53 PetscScalar *v; 54 55 PetscFunctionBegin; 56 PetscCall(MatDenseGetArray(mat->A, &v)); 57 PetscCall(MatDenseGetLDA(mat->A, &lda)); 58 rend2 = PetscMin(rend, A->cmap->N); 59 if (rend2 > rstart) { 60 for (j = rstart; j < rend2; j++) v[j - rstart + j * lda] += alpha; 61 PetscCall(PetscLogFlops(rend2 - rstart)); 62 } 63 PetscCall(MatDenseRestoreArray(mat->A, &v)); 64 PetscFunctionReturn(0); 65 } 66 67 PetscErrorCode MatGetRow_MPIDense(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) { 68 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 69 PetscInt lrow, rstart = A->rmap->rstart, rend = A->rmap->rend; 70 71 PetscFunctionBegin; 72 PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "only local rows"); 73 lrow = row - rstart; 74 PetscCall(MatGetRow(mat->A, lrow, nz, (const PetscInt **)idx, (const PetscScalar **)v)); 75 PetscFunctionReturn(0); 76 } 77 78 PetscErrorCode MatRestoreRow_MPIDense(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) { 79 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 80 PetscInt lrow, rstart = A->rmap->rstart, rend = A->rmap->rend; 81 82 PetscFunctionBegin; 83 PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "only local rows"); 84 lrow = row - rstart; 85 PetscCall(MatRestoreRow(mat->A, lrow, nz, (const PetscInt **)idx, (const PetscScalar **)v)); 86 PetscFunctionReturn(0); 87 } 88 89 PetscErrorCode MatGetDiagonalBlock_MPIDense(Mat A, Mat *a) { 90 Mat_MPIDense *mdn = (Mat_MPIDense *)A->data; 91 PetscInt m = A->rmap->n, rstart = A->rmap->rstart; 92 PetscScalar *array; 93 MPI_Comm comm; 94 PetscBool flg; 95 Mat B; 96 97 PetscFunctionBegin; 98 PetscCall(MatHasCongruentLayouts(A, &flg)); 99 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only square matrices supported."); 100 PetscCall(PetscObjectQuery((PetscObject)A, "DiagonalBlock", (PetscObject *)&B)); 101 if (!B) { /* This should use MatDenseGetSubMatrix (not create), but we would need a call like MatRestoreDiagonalBlock */ 102 103 PetscCall(PetscObjectTypeCompare((PetscObject)mdn->A, MATSEQDENSECUDA, &flg)); 104 PetscCheck(!flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature", MATSEQDENSECUDA); 105 PetscCall(PetscObjectGetComm((PetscObject)(mdn->A), &comm)); 106 PetscCall(MatCreate(comm, &B)); 107 PetscCall(MatSetSizes(B, m, m, m, m)); 108 PetscCall(MatSetType(B, ((PetscObject)mdn->A)->type_name)); 109 PetscCall(MatDenseGetArrayRead(mdn->A, (const PetscScalar **)&array)); 110 PetscCall(MatSeqDenseSetPreallocation(B, array + m * rstart)); 111 PetscCall(MatDenseRestoreArrayRead(mdn->A, (const PetscScalar **)&array)); 112 PetscCall(PetscObjectCompose((PetscObject)A, "DiagonalBlock", (PetscObject)B)); 113 *a = B; 114 PetscCall(MatDestroy(&B)); 115 } else *a = B; 116 PetscFunctionReturn(0); 117 } 118 119 PetscErrorCode MatSetValues_MPIDense(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], const PetscScalar v[], InsertMode addv) { 120 Mat_MPIDense *A = (Mat_MPIDense *)mat->data; 121 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend, row; 122 PetscBool roworiented = A->roworiented; 123 124 PetscFunctionBegin; 125 for (i = 0; i < m; i++) { 126 if (idxm[i] < 0) continue; 127 PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large"); 128 if (idxm[i] >= rstart && idxm[i] < rend) { 129 row = idxm[i] - rstart; 130 if (roworiented) { 131 PetscCall(MatSetValues(A->A, 1, &row, n, idxn, v + i * n, addv)); 132 } else { 133 for (j = 0; j < n; j++) { 134 if (idxn[j] < 0) continue; 135 PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large"); 136 PetscCall(MatSetValues(A->A, 1, &row, 1, &idxn[j], v + i + j * m, addv)); 137 } 138 } 139 } else if (!A->donotstash) { 140 mat->assembled = PETSC_FALSE; 141 if (roworiented) { 142 PetscCall(MatStashValuesRow_Private(&mat->stash, idxm[i], n, idxn, v + i * n, PETSC_FALSE)); 143 } else { 144 PetscCall(MatStashValuesCol_Private(&mat->stash, idxm[i], n, idxn, v + i, m, PETSC_FALSE)); 145 } 146 } 147 } 148 PetscFunctionReturn(0); 149 } 150 151 PetscErrorCode MatGetValues_MPIDense(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[]) { 152 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 153 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend, row; 154 155 PetscFunctionBegin; 156 for (i = 0; i < m; i++) { 157 if (idxm[i] < 0) continue; /* negative row */ 158 PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large"); 159 if (idxm[i] >= rstart && idxm[i] < rend) { 160 row = idxm[i] - rstart; 161 for (j = 0; j < n; j++) { 162 if (idxn[j] < 0) continue; /* negative column */ 163 PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large"); 164 PetscCall(MatGetValues(mdn->A, 1, &row, 1, &idxn[j], v + i * n + j)); 165 } 166 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported"); 167 } 168 PetscFunctionReturn(0); 169 } 170 171 static PetscErrorCode MatDenseGetLDA_MPIDense(Mat A, PetscInt *lda) { 172 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 173 174 PetscFunctionBegin; 175 PetscCall(MatDenseGetLDA(a->A, lda)); 176 PetscFunctionReturn(0); 177 } 178 179 static PetscErrorCode MatDenseSetLDA_MPIDense(Mat A, PetscInt lda) { 180 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 181 PetscBool iscuda; 182 183 PetscFunctionBegin; 184 if (!a->A) { 185 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 186 PetscCall(PetscLayoutSetUp(A->rmap)); 187 PetscCall(PetscLayoutSetUp(A->cmap)); 188 PetscCall(MatCreate(PETSC_COMM_SELF, &a->A)); 189 PetscCall(MatSetSizes(a->A, A->rmap->n, A->cmap->N, A->rmap->n, A->cmap->N)); 190 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIDENSECUDA, &iscuda)); 191 PetscCall(MatSetType(a->A, iscuda ? MATSEQDENSECUDA : MATSEQDENSE)); 192 } 193 PetscCall(MatDenseSetLDA(a->A, lda)); 194 PetscFunctionReturn(0); 195 } 196 197 static PetscErrorCode MatDenseGetArray_MPIDense(Mat A, PetscScalar **array) { 198 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 199 200 PetscFunctionBegin; 201 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 202 PetscCall(MatDenseGetArray(a->A, array)); 203 PetscFunctionReturn(0); 204 } 205 206 static PetscErrorCode MatDenseGetArrayRead_MPIDense(Mat A, const PetscScalar **array) { 207 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 208 209 PetscFunctionBegin; 210 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 211 PetscCall(MatDenseGetArrayRead(a->A, array)); 212 PetscFunctionReturn(0); 213 } 214 215 static PetscErrorCode MatDenseGetArrayWrite_MPIDense(Mat A, PetscScalar **array) { 216 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 217 218 PetscFunctionBegin; 219 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 220 PetscCall(MatDenseGetArrayWrite(a->A, array)); 221 PetscFunctionReturn(0); 222 } 223 224 static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A, const PetscScalar *array) { 225 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 226 227 PetscFunctionBegin; 228 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 229 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 230 PetscCall(MatDensePlaceArray(a->A, array)); 231 PetscFunctionReturn(0); 232 } 233 234 static PetscErrorCode MatDenseResetArray_MPIDense(Mat A) { 235 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 236 237 PetscFunctionBegin; 238 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 239 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 240 PetscCall(MatDenseResetArray(a->A)); 241 PetscFunctionReturn(0); 242 } 243 244 static PetscErrorCode MatDenseReplaceArray_MPIDense(Mat A, const PetscScalar *array) { 245 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 246 247 PetscFunctionBegin; 248 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 249 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 250 PetscCall(MatDenseReplaceArray(a->A, array)); 251 PetscFunctionReturn(0); 252 } 253 254 static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A, IS isrow, IS iscol, MatReuse scall, Mat *B) { 255 Mat_MPIDense *mat = (Mat_MPIDense *)A->data, *newmatd; 256 PetscInt lda, i, j, rstart, rend, nrows, ncols, Ncols, nlrows, nlcols; 257 const PetscInt *irow, *icol; 258 const PetscScalar *v; 259 PetscScalar *bv; 260 Mat newmat; 261 IS iscol_local; 262 MPI_Comm comm_is, comm_mat; 263 264 PetscFunctionBegin; 265 PetscCall(PetscObjectGetComm((PetscObject)A, &comm_mat)); 266 PetscCall(PetscObjectGetComm((PetscObject)iscol, &comm_is)); 267 PetscCheck(comm_mat == comm_is, PETSC_COMM_SELF, PETSC_ERR_ARG_NOTSAMECOMM, "IS communicator must match matrix communicator"); 268 269 PetscCall(ISAllGather(iscol, &iscol_local)); 270 PetscCall(ISGetIndices(isrow, &irow)); 271 PetscCall(ISGetIndices(iscol_local, &icol)); 272 PetscCall(ISGetLocalSize(isrow, &nrows)); 273 PetscCall(ISGetLocalSize(iscol, &ncols)); 274 PetscCall(ISGetSize(iscol, &Ncols)); /* global number of columns, size of iscol_local */ 275 276 /* No parallel redistribution currently supported! Should really check each index set 277 to comfirm that it is OK. ... Currently supports only submatrix same partitioning as 278 original matrix! */ 279 280 PetscCall(MatGetLocalSize(A, &nlrows, &nlcols)); 281 PetscCall(MatGetOwnershipRange(A, &rstart, &rend)); 282 283 /* Check submatrix call */ 284 if (scall == MAT_REUSE_MATRIX) { 285 /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */ 286 /* Really need to test rows and column sizes! */ 287 newmat = *B; 288 } else { 289 /* Create and fill new matrix */ 290 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &newmat)); 291 PetscCall(MatSetSizes(newmat, nrows, ncols, PETSC_DECIDE, Ncols)); 292 PetscCall(MatSetType(newmat, ((PetscObject)A)->type_name)); 293 PetscCall(MatMPIDenseSetPreallocation(newmat, NULL)); 294 } 295 296 /* Now extract the data pointers and do the copy, column at a time */ 297 newmatd = (Mat_MPIDense *)newmat->data; 298 PetscCall(MatDenseGetArray(newmatd->A, &bv)); 299 PetscCall(MatDenseGetArrayRead(mat->A, &v)); 300 PetscCall(MatDenseGetLDA(mat->A, &lda)); 301 for (i = 0; i < Ncols; i++) { 302 const PetscScalar *av = v + lda * icol[i]; 303 for (j = 0; j < nrows; j++) *bv++ = av[irow[j] - rstart]; 304 } 305 PetscCall(MatDenseRestoreArrayRead(mat->A, &v)); 306 PetscCall(MatDenseRestoreArray(newmatd->A, &bv)); 307 308 /* Assemble the matrices so that the correct flags are set */ 309 PetscCall(MatAssemblyBegin(newmat, MAT_FINAL_ASSEMBLY)); 310 PetscCall(MatAssemblyEnd(newmat, MAT_FINAL_ASSEMBLY)); 311 312 /* Free work space */ 313 PetscCall(ISRestoreIndices(isrow, &irow)); 314 PetscCall(ISRestoreIndices(iscol_local, &icol)); 315 PetscCall(ISDestroy(&iscol_local)); 316 *B = newmat; 317 PetscFunctionReturn(0); 318 } 319 320 PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A, PetscScalar **array) { 321 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 322 323 PetscFunctionBegin; 324 PetscCall(MatDenseRestoreArray(a->A, array)); 325 PetscFunctionReturn(0); 326 } 327 328 PetscErrorCode MatDenseRestoreArrayRead_MPIDense(Mat A, const PetscScalar **array) { 329 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 330 331 PetscFunctionBegin; 332 PetscCall(MatDenseRestoreArrayRead(a->A, array)); 333 PetscFunctionReturn(0); 334 } 335 336 PetscErrorCode MatDenseRestoreArrayWrite_MPIDense(Mat A, PetscScalar **array) { 337 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 338 339 PetscFunctionBegin; 340 PetscCall(MatDenseRestoreArrayWrite(a->A, array)); 341 PetscFunctionReturn(0); 342 } 343 344 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat, MatAssemblyType mode) { 345 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 346 PetscInt nstash, reallocs; 347 348 PetscFunctionBegin; 349 if (mdn->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 350 351 PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range)); 352 PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs)); 353 PetscCall(PetscInfo(mdn->A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 354 PetscFunctionReturn(0); 355 } 356 357 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat, MatAssemblyType mode) { 358 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 359 PetscInt i, *row, *col, flg, j, rstart, ncols; 360 PetscMPIInt n; 361 PetscScalar *val; 362 363 PetscFunctionBegin; 364 if (!mdn->donotstash && !mat->nooffprocentries) { 365 /* wait on receives */ 366 while (1) { 367 PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg)); 368 if (!flg) break; 369 370 for (i = 0; i < n;) { 371 /* Now identify the consecutive vals belonging to the same row */ 372 for (j = i, rstart = row[j]; j < n; j++) { 373 if (row[j] != rstart) break; 374 } 375 if (j < n) ncols = j - i; 376 else ncols = n - i; 377 /* Now assemble all these values with a single function call */ 378 PetscCall(MatSetValues_MPIDense(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode)); 379 i = j; 380 } 381 } 382 PetscCall(MatStashScatterEnd_Private(&mat->stash)); 383 } 384 385 PetscCall(MatAssemblyBegin(mdn->A, mode)); 386 PetscCall(MatAssemblyEnd(mdn->A, mode)); 387 PetscFunctionReturn(0); 388 } 389 390 PetscErrorCode MatZeroEntries_MPIDense(Mat A) { 391 Mat_MPIDense *l = (Mat_MPIDense *)A->data; 392 393 PetscFunctionBegin; 394 PetscCall(MatZeroEntries(l->A)); 395 PetscFunctionReturn(0); 396 } 397 398 PetscErrorCode MatZeroRows_MPIDense(Mat A, PetscInt n, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) { 399 Mat_MPIDense *l = (Mat_MPIDense *)A->data; 400 PetscInt i, len, *lrows; 401 402 PetscFunctionBegin; 403 /* get locally owned rows */ 404 PetscCall(PetscLayoutMapLocal(A->rmap, n, rows, &len, &lrows, NULL)); 405 /* fix right hand side if needed */ 406 if (x && b) { 407 const PetscScalar *xx; 408 PetscScalar *bb; 409 410 PetscCall(VecGetArrayRead(x, &xx)); 411 PetscCall(VecGetArrayWrite(b, &bb)); 412 for (i = 0; i < len; ++i) bb[lrows[i]] = diag * xx[lrows[i]]; 413 PetscCall(VecRestoreArrayRead(x, &xx)); 414 PetscCall(VecRestoreArrayWrite(b, &bb)); 415 } 416 PetscCall(MatZeroRows(l->A, len, lrows, 0.0, NULL, NULL)); 417 if (diag != 0.0) { 418 Vec d; 419 420 PetscCall(MatCreateVecs(A, NULL, &d)); 421 PetscCall(VecSet(d, diag)); 422 PetscCall(MatDiagonalSet(A, d, INSERT_VALUES)); 423 PetscCall(VecDestroy(&d)); 424 } 425 PetscCall(PetscFree(lrows)); 426 PetscFunctionReturn(0); 427 } 428 429 PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat, Vec, Vec); 430 PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat, Vec, Vec, Vec); 431 PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat, Vec, Vec); 432 PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat, Vec, Vec, Vec); 433 434 PetscErrorCode MatMult_MPIDense(Mat mat, Vec xx, Vec yy) { 435 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 436 const PetscScalar *ax; 437 PetscScalar *ay; 438 PetscMemType axmtype, aymtype; 439 440 PetscFunctionBegin; 441 if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat)); 442 PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype)); 443 PetscCall(VecGetArrayAndMemType(mdn->lvec, &ay, &aymtype)); 444 PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE)); 445 PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE)); 446 PetscCall(VecRestoreArrayAndMemType(mdn->lvec, &ay)); 447 PetscCall(VecRestoreArrayReadAndMemType(xx, &ax)); 448 PetscCall((*mdn->A->ops->mult)(mdn->A, mdn->lvec, yy)); 449 PetscFunctionReturn(0); 450 } 451 452 PetscErrorCode MatMultAdd_MPIDense(Mat mat, Vec xx, Vec yy, Vec zz) { 453 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 454 const PetscScalar *ax; 455 PetscScalar *ay; 456 PetscMemType axmtype, aymtype; 457 458 PetscFunctionBegin; 459 if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat)); 460 PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype)); 461 PetscCall(VecGetArrayAndMemType(mdn->lvec, &ay, &aymtype)); 462 PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE)); 463 PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE)); 464 PetscCall(VecRestoreArrayAndMemType(mdn->lvec, &ay)); 465 PetscCall(VecRestoreArrayReadAndMemType(xx, &ax)); 466 PetscCall((*mdn->A->ops->multadd)(mdn->A, mdn->lvec, yy, zz)); 467 PetscFunctionReturn(0); 468 } 469 470 PetscErrorCode MatMultTranspose_MPIDense(Mat A, Vec xx, Vec yy) { 471 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 472 const PetscScalar *ax; 473 PetscScalar *ay; 474 PetscMemType axmtype, aymtype; 475 476 PetscFunctionBegin; 477 if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A)); 478 PetscCall(VecSet(yy, 0.0)); 479 PetscCall((*a->A->ops->multtranspose)(a->A, xx, a->lvec)); 480 PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype)); 481 PetscCall(VecGetArrayAndMemType(yy, &ay, &aymtype)); 482 PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM)); 483 PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM)); 484 PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax)); 485 PetscCall(VecRestoreArrayAndMemType(yy, &ay)); 486 PetscFunctionReturn(0); 487 } 488 489 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A, Vec xx, Vec yy, Vec zz) { 490 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 491 const PetscScalar *ax; 492 PetscScalar *ay; 493 PetscMemType axmtype, aymtype; 494 495 PetscFunctionBegin; 496 if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A)); 497 PetscCall(VecCopy(yy, zz)); 498 PetscCall((*a->A->ops->multtranspose)(a->A, xx, a->lvec)); 499 PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype)); 500 PetscCall(VecGetArrayAndMemType(zz, &ay, &aymtype)); 501 PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM)); 502 PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM)); 503 PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax)); 504 PetscCall(VecRestoreArrayAndMemType(zz, &ay)); 505 PetscFunctionReturn(0); 506 } 507 508 PetscErrorCode MatGetDiagonal_MPIDense(Mat A, Vec v) { 509 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 510 PetscInt lda, len, i, n, m = A->rmap->n, radd; 511 PetscScalar *x, zero = 0.0; 512 const PetscScalar *av; 513 514 PetscFunctionBegin; 515 PetscCall(VecSet(v, zero)); 516 PetscCall(VecGetArray(v, &x)); 517 PetscCall(VecGetSize(v, &n)); 518 PetscCheck(n == A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming mat and vec"); 519 len = PetscMin(a->A->rmap->n, a->A->cmap->n); 520 radd = A->rmap->rstart * m; 521 PetscCall(MatDenseGetArrayRead(a->A, &av)); 522 PetscCall(MatDenseGetLDA(a->A, &lda)); 523 for (i = 0; i < len; i++) x[i] = av[radd + i * lda + i]; 524 PetscCall(MatDenseRestoreArrayRead(a->A, &av)); 525 PetscCall(VecRestoreArray(v, &x)); 526 PetscFunctionReturn(0); 527 } 528 529 PetscErrorCode MatDestroy_MPIDense(Mat mat) { 530 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 531 532 PetscFunctionBegin; 533 #if defined(PETSC_USE_LOG) 534 PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N); 535 #endif 536 PetscCall(MatStashDestroy_Private(&mat->stash)); 537 PetscCheck(!mdn->vecinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 538 PetscCheck(!mdn->matinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 539 PetscCall(MatDestroy(&mdn->A)); 540 PetscCall(VecDestroy(&mdn->lvec)); 541 PetscCall(PetscSFDestroy(&mdn->Mvctx)); 542 PetscCall(VecDestroy(&mdn->cvec)); 543 PetscCall(MatDestroy(&mdn->cmat)); 544 545 PetscCall(PetscFree(mat->data)); 546 PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL)); 547 548 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetLDA_C", NULL)); 549 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseSetLDA_C", NULL)); 550 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArray_C", NULL)); 551 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArray_C", NULL)); 552 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayRead_C", NULL)); 553 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayRead_C", NULL)); 554 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayWrite_C", NULL)); 555 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayWrite_C", NULL)); 556 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDensePlaceArray_C", NULL)); 557 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseResetArray_C", NULL)); 558 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseReplaceArray_C", NULL)); 559 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", NULL)); 560 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpiaij_C", NULL)); 561 #if defined(PETSC_HAVE_ELEMENTAL) 562 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_elemental_C", NULL)); 563 #endif 564 #if defined(PETSC_HAVE_SCALAPACK) 565 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_scalapack_C", NULL)); 566 #endif 567 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIDenseSetPreallocation_C", NULL)); 568 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidense_C", NULL)); 569 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaij_C", NULL)); 570 #if defined(PETSC_HAVE_CUDA) 571 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidense_C", NULL)); 572 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijcusparse_C", NULL)); 573 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensecuda_C", NULL)); 574 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidensecuda_mpidense_C", NULL)); 575 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidensecuda_C", NULL)); 576 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C", NULL)); 577 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensecuda_mpiaij_C", NULL)); 578 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C", NULL)); 579 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArray_C", NULL)); 580 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArrayRead_C", NULL)); 581 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArrayWrite_C", NULL)); 582 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArray_C", NULL)); 583 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArrayRead_C", NULL)); 584 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArrayWrite_C", NULL)); 585 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAPlaceArray_C", NULL)); 586 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAResetArray_C", NULL)); 587 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAReplaceArray_C", NULL)); 588 #endif 589 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumn_C", NULL)); 590 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumn_C", NULL)); 591 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", NULL)); 592 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", NULL)); 593 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", NULL)); 594 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", NULL)); 595 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", NULL)); 596 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", NULL)); 597 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetSubMatrix_C", NULL)); 598 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreSubMatrix_C", NULL)); 599 600 PetscCall(PetscObjectCompose((PetscObject)mat, "DiagonalBlock", NULL)); 601 PetscFunctionReturn(0); 602 } 603 604 PETSC_INTERN PetscErrorCode MatView_SeqDense(Mat, PetscViewer); 605 606 #include <petscdraw.h> 607 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat, PetscViewer viewer) { 608 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 609 PetscMPIInt rank; 610 PetscViewerType vtype; 611 PetscBool iascii, isdraw; 612 PetscViewer sviewer; 613 PetscViewerFormat format; 614 615 PetscFunctionBegin; 616 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 617 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 618 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 619 if (iascii) { 620 PetscCall(PetscViewerGetType(viewer, &vtype)); 621 PetscCall(PetscViewerGetFormat(viewer, &format)); 622 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 623 MatInfo info; 624 PetscCall(MatGetInfo(mat, MAT_LOCAL, &info)); 625 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 626 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %" PetscInt_FMT " \n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, 627 (PetscInt)info.memory)); 628 PetscCall(PetscViewerFlush(viewer)); 629 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 630 if (mdn->Mvctx) PetscCall(PetscSFView(mdn->Mvctx, viewer)); 631 PetscFunctionReturn(0); 632 } else if (format == PETSC_VIEWER_ASCII_INFO) { 633 PetscFunctionReturn(0); 634 } 635 } else if (isdraw) { 636 PetscDraw draw; 637 PetscBool isnull; 638 639 PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw)); 640 PetscCall(PetscDrawIsNull(draw, &isnull)); 641 if (isnull) PetscFunctionReturn(0); 642 } 643 644 { 645 /* assemble the entire matrix onto first processor. */ 646 Mat A; 647 PetscInt M = mat->rmap->N, N = mat->cmap->N, m, row, i, nz; 648 PetscInt *cols; 649 PetscScalar *vals; 650 651 PetscCall(MatCreate(PetscObjectComm((PetscObject)mat), &A)); 652 if (rank == 0) { 653 PetscCall(MatSetSizes(A, M, N, M, N)); 654 } else { 655 PetscCall(MatSetSizes(A, 0, 0, M, N)); 656 } 657 /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */ 658 PetscCall(MatSetType(A, MATMPIDENSE)); 659 PetscCall(MatMPIDenseSetPreallocation(A, NULL)); 660 661 /* Copy the matrix ... This isn't the most efficient means, 662 but it's quick for now */ 663 A->insertmode = INSERT_VALUES; 664 665 row = mat->rmap->rstart; 666 m = mdn->A->rmap->n; 667 for (i = 0; i < m; i++) { 668 PetscCall(MatGetRow_MPIDense(mat, row, &nz, &cols, &vals)); 669 PetscCall(MatSetValues_MPIDense(A, 1, &row, nz, cols, vals, INSERT_VALUES)); 670 PetscCall(MatRestoreRow_MPIDense(mat, row, &nz, &cols, &vals)); 671 row++; 672 } 673 674 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 675 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 676 PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 677 if (rank == 0) { 678 PetscCall(PetscObjectSetName((PetscObject)((Mat_MPIDense *)(A->data))->A, ((PetscObject)mat)->name)); 679 PetscCall(MatView_SeqDense(((Mat_MPIDense *)(A->data))->A, sviewer)); 680 } 681 PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 682 PetscCall(PetscViewerFlush(viewer)); 683 PetscCall(MatDestroy(&A)); 684 } 685 PetscFunctionReturn(0); 686 } 687 688 PetscErrorCode MatView_MPIDense(Mat mat, PetscViewer viewer) { 689 PetscBool iascii, isbinary, isdraw, issocket; 690 691 PetscFunctionBegin; 692 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 693 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 694 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket)); 695 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 696 697 if (iascii || issocket || isdraw) { 698 PetscCall(MatView_MPIDense_ASCIIorDraworSocket(mat, viewer)); 699 } else if (isbinary) PetscCall(MatView_Dense_Binary(mat, viewer)); 700 PetscFunctionReturn(0); 701 } 702 703 PetscErrorCode MatGetInfo_MPIDense(Mat A, MatInfoType flag, MatInfo *info) { 704 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 705 Mat mdn = mat->A; 706 PetscLogDouble isend[5], irecv[5]; 707 708 PetscFunctionBegin; 709 info->block_size = 1.0; 710 711 PetscCall(MatGetInfo(mdn, MAT_LOCAL, info)); 712 713 isend[0] = info->nz_used; 714 isend[1] = info->nz_allocated; 715 isend[2] = info->nz_unneeded; 716 isend[3] = info->memory; 717 isend[4] = info->mallocs; 718 if (flag == MAT_LOCAL) { 719 info->nz_used = isend[0]; 720 info->nz_allocated = isend[1]; 721 info->nz_unneeded = isend[2]; 722 info->memory = isend[3]; 723 info->mallocs = isend[4]; 724 } else if (flag == MAT_GLOBAL_MAX) { 725 PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)A))); 726 727 info->nz_used = irecv[0]; 728 info->nz_allocated = irecv[1]; 729 info->nz_unneeded = irecv[2]; 730 info->memory = irecv[3]; 731 info->mallocs = irecv[4]; 732 } else if (flag == MAT_GLOBAL_SUM) { 733 PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)A))); 734 735 info->nz_used = irecv[0]; 736 info->nz_allocated = irecv[1]; 737 info->nz_unneeded = irecv[2]; 738 info->memory = irecv[3]; 739 info->mallocs = irecv[4]; 740 } 741 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 742 info->fill_ratio_needed = 0; 743 info->factor_mallocs = 0; 744 PetscFunctionReturn(0); 745 } 746 747 PetscErrorCode MatSetOption_MPIDense(Mat A, MatOption op, PetscBool flg) { 748 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 749 750 PetscFunctionBegin; 751 switch (op) { 752 case MAT_NEW_NONZERO_LOCATIONS: 753 case MAT_NEW_NONZERO_LOCATION_ERR: 754 case MAT_NEW_NONZERO_ALLOCATION_ERR: 755 MatCheckPreallocated(A, 1); 756 PetscCall(MatSetOption(a->A, op, flg)); 757 break; 758 case MAT_ROW_ORIENTED: 759 MatCheckPreallocated(A, 1); 760 a->roworiented = flg; 761 PetscCall(MatSetOption(a->A, op, flg)); 762 break; 763 case MAT_FORCE_DIAGONAL_ENTRIES: 764 case MAT_KEEP_NONZERO_PATTERN: 765 case MAT_USE_HASH_TABLE: 766 case MAT_SORTED_FULL: PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op])); break; 767 case MAT_IGNORE_OFF_PROC_ENTRIES: a->donotstash = flg; break; 768 case MAT_SYMMETRIC: 769 case MAT_STRUCTURALLY_SYMMETRIC: 770 case MAT_HERMITIAN: 771 case MAT_SYMMETRY_ETERNAL: 772 case MAT_STRUCTURAL_SYMMETRY_ETERNAL: 773 case MAT_SPD: 774 case MAT_IGNORE_LOWER_TRIANGULAR: 775 case MAT_IGNORE_ZERO_ENTRIES: 776 case MAT_SPD_ETERNAL: 777 /* if the diagonal matrix is square it inherits some of the properties above */ 778 PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op])); 779 break; 780 default: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %s", MatOptions[op]); 781 } 782 PetscFunctionReturn(0); 783 } 784 785 PetscErrorCode MatDiagonalScale_MPIDense(Mat A, Vec ll, Vec rr) { 786 Mat_MPIDense *mdn = (Mat_MPIDense *)A->data; 787 const PetscScalar *l; 788 PetscScalar x, *v, *vv, *r; 789 PetscInt i, j, s2a, s3a, s2, s3, m = mdn->A->rmap->n, n = mdn->A->cmap->n, lda; 790 791 PetscFunctionBegin; 792 PetscCall(MatDenseGetArray(mdn->A, &vv)); 793 PetscCall(MatDenseGetLDA(mdn->A, &lda)); 794 PetscCall(MatGetLocalSize(A, &s2, &s3)); 795 if (ll) { 796 PetscCall(VecGetLocalSize(ll, &s2a)); 797 PetscCheck(s2a == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Left scaling vector non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT, s2a, s2); 798 PetscCall(VecGetArrayRead(ll, &l)); 799 for (i = 0; i < m; i++) { 800 x = l[i]; 801 v = vv + i; 802 for (j = 0; j < n; j++) { 803 (*v) *= x; 804 v += lda; 805 } 806 } 807 PetscCall(VecRestoreArrayRead(ll, &l)); 808 PetscCall(PetscLogFlops(1.0 * n * m)); 809 } 810 if (rr) { 811 const PetscScalar *ar; 812 813 PetscCall(VecGetLocalSize(rr, &s3a)); 814 PetscCheck(s3a == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Right scaling vec non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT ".", s3a, s3); 815 PetscCall(VecGetArrayRead(rr, &ar)); 816 if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A)); 817 PetscCall(VecGetArray(mdn->lvec, &r)); 818 PetscCall(PetscSFBcastBegin(mdn->Mvctx, MPIU_SCALAR, ar, r, MPI_REPLACE)); 819 PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ar, r, MPI_REPLACE)); 820 PetscCall(VecRestoreArrayRead(rr, &ar)); 821 for (i = 0; i < n; i++) { 822 x = r[i]; 823 v = vv + i * lda; 824 for (j = 0; j < m; j++) (*v++) *= x; 825 } 826 PetscCall(VecRestoreArray(mdn->lvec, &r)); 827 PetscCall(PetscLogFlops(1.0 * n * m)); 828 } 829 PetscCall(MatDenseRestoreArray(mdn->A, &vv)); 830 PetscFunctionReturn(0); 831 } 832 833 PetscErrorCode MatNorm_MPIDense(Mat A, NormType type, PetscReal *nrm) { 834 Mat_MPIDense *mdn = (Mat_MPIDense *)A->data; 835 PetscInt i, j; 836 PetscMPIInt size; 837 PetscReal sum = 0.0; 838 const PetscScalar *av, *v; 839 840 PetscFunctionBegin; 841 PetscCall(MatDenseGetArrayRead(mdn->A, &av)); 842 v = av; 843 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 844 if (size == 1) { 845 PetscCall(MatNorm(mdn->A, type, nrm)); 846 } else { 847 if (type == NORM_FROBENIUS) { 848 for (i = 0; i < mdn->A->cmap->n * mdn->A->rmap->n; i++) { 849 sum += PetscRealPart(PetscConj(*v) * (*v)); 850 v++; 851 } 852 PetscCall(MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A))); 853 *nrm = PetscSqrtReal(*nrm); 854 PetscCall(PetscLogFlops(2.0 * mdn->A->cmap->n * mdn->A->rmap->n)); 855 } else if (type == NORM_1) { 856 PetscReal *tmp, *tmp2; 857 PetscCall(PetscCalloc2(A->cmap->N, &tmp, A->cmap->N, &tmp2)); 858 *nrm = 0.0; 859 v = av; 860 for (j = 0; j < mdn->A->cmap->n; j++) { 861 for (i = 0; i < mdn->A->rmap->n; i++) { 862 tmp[j] += PetscAbsScalar(*v); 863 v++; 864 } 865 } 866 PetscCall(MPIU_Allreduce(tmp, tmp2, A->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A))); 867 for (j = 0; j < A->cmap->N; j++) { 868 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 869 } 870 PetscCall(PetscFree2(tmp, tmp2)); 871 PetscCall(PetscLogFlops(A->cmap->n * A->rmap->n)); 872 } else if (type == NORM_INFINITY) { /* max row norm */ 873 PetscReal ntemp; 874 PetscCall(MatNorm(mdn->A, type, &ntemp)); 875 PetscCall(MPIU_Allreduce(&ntemp, nrm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A))); 876 } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "No support for two norm"); 877 } 878 PetscCall(MatDenseRestoreArrayRead(mdn->A, &av)); 879 PetscFunctionReturn(0); 880 } 881 882 PetscErrorCode MatTranspose_MPIDense(Mat A, MatReuse reuse, Mat *matout) { 883 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 884 Mat B; 885 PetscInt M = A->rmap->N, N = A->cmap->N, m, n, *rwork, rstart = A->rmap->rstart; 886 PetscInt j, i, lda; 887 PetscScalar *v; 888 889 PetscFunctionBegin; 890 if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout)); 891 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) { 892 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 893 PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M)); 894 PetscCall(MatSetType(B, ((PetscObject)A)->type_name)); 895 PetscCall(MatMPIDenseSetPreallocation(B, NULL)); 896 } else B = *matout; 897 898 m = a->A->rmap->n; 899 n = a->A->cmap->n; 900 PetscCall(MatDenseGetArrayRead(a->A, (const PetscScalar **)&v)); 901 PetscCall(MatDenseGetLDA(a->A, &lda)); 902 PetscCall(PetscMalloc1(m, &rwork)); 903 for (i = 0; i < m; i++) rwork[i] = rstart + i; 904 for (j = 0; j < n; j++) { 905 PetscCall(MatSetValues(B, 1, &j, m, rwork, v, INSERT_VALUES)); 906 v += lda; 907 } 908 PetscCall(MatDenseRestoreArrayRead(a->A, (const PetscScalar **)&v)); 909 PetscCall(PetscFree(rwork)); 910 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 911 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 912 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 913 *matout = B; 914 } else { 915 PetscCall(MatHeaderMerge(A, &B)); 916 } 917 PetscFunctionReturn(0); 918 } 919 920 static PetscErrorCode MatDuplicate_MPIDense(Mat, MatDuplicateOption, Mat *); 921 PETSC_INTERN PetscErrorCode MatScale_MPIDense(Mat, PetscScalar); 922 923 PetscErrorCode MatSetUp_MPIDense(Mat A) { 924 PetscFunctionBegin; 925 PetscCall(PetscLayoutSetUp(A->rmap)); 926 PetscCall(PetscLayoutSetUp(A->cmap)); 927 if (!A->preallocated) PetscCall(MatMPIDenseSetPreallocation(A, NULL)); 928 PetscFunctionReturn(0); 929 } 930 931 PetscErrorCode MatAXPY_MPIDense(Mat Y, PetscScalar alpha, Mat X, MatStructure str) { 932 Mat_MPIDense *A = (Mat_MPIDense *)Y->data, *B = (Mat_MPIDense *)X->data; 933 934 PetscFunctionBegin; 935 PetscCall(MatAXPY(A->A, alpha, B->A, str)); 936 PetscFunctionReturn(0); 937 } 938 939 PetscErrorCode MatConjugate_MPIDense(Mat mat) { 940 Mat_MPIDense *a = (Mat_MPIDense *)mat->data; 941 942 PetscFunctionBegin; 943 PetscCall(MatConjugate(a->A)); 944 PetscFunctionReturn(0); 945 } 946 947 PetscErrorCode MatRealPart_MPIDense(Mat A) { 948 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 949 950 PetscFunctionBegin; 951 PetscCall(MatRealPart(a->A)); 952 PetscFunctionReturn(0); 953 } 954 955 PetscErrorCode MatImaginaryPart_MPIDense(Mat A) { 956 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 957 958 PetscFunctionBegin; 959 PetscCall(MatImaginaryPart(a->A)); 960 PetscFunctionReturn(0); 961 } 962 963 static PetscErrorCode MatGetColumnVector_MPIDense(Mat A, Vec v, PetscInt col) { 964 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 965 966 PetscFunctionBegin; 967 PetscCheck(a->A, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Missing local matrix"); 968 PetscCheck(a->A->ops->getcolumnvector, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Missing get column operation"); 969 PetscCall((*a->A->ops->getcolumnvector)(a->A, v, col)); 970 PetscFunctionReturn(0); 971 } 972 973 PETSC_INTERN PetscErrorCode MatGetColumnReductions_SeqDense(Mat, PetscInt, PetscReal *); 974 975 PetscErrorCode MatGetColumnReductions_MPIDense(Mat A, PetscInt type, PetscReal *reductions) { 976 PetscInt i, m, n; 977 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 978 PetscReal *work; 979 980 PetscFunctionBegin; 981 PetscCall(MatGetSize(A, &m, &n)); 982 PetscCall(PetscMalloc1(n, &work)); 983 if (type == REDUCTION_MEAN_REALPART) { 984 PetscCall(MatGetColumnReductions_SeqDense(a->A, (PetscInt)REDUCTION_SUM_REALPART, work)); 985 } else if (type == REDUCTION_MEAN_IMAGINARYPART) { 986 PetscCall(MatGetColumnReductions_SeqDense(a->A, (PetscInt)REDUCTION_SUM_IMAGINARYPART, work)); 987 } else { 988 PetscCall(MatGetColumnReductions_SeqDense(a->A, type, work)); 989 } 990 if (type == NORM_2) { 991 for (i = 0; i < n; i++) work[i] *= work[i]; 992 } 993 if (type == NORM_INFINITY) { 994 PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_MAX, A->hdr.comm)); 995 } else { 996 PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_SUM, A->hdr.comm)); 997 } 998 PetscCall(PetscFree(work)); 999 if (type == NORM_2) { 1000 for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]); 1001 } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 1002 for (i = 0; i < n; i++) reductions[i] /= m; 1003 } 1004 PetscFunctionReturn(0); 1005 } 1006 1007 #if defined(PETSC_HAVE_CUDA) 1008 PetscErrorCode MatShift_MPIDenseCUDA(Mat A, PetscScalar alpha) { 1009 PetscScalar *da; 1010 PetscInt lda; 1011 1012 PetscFunctionBegin; 1013 PetscCall(MatDenseCUDAGetArray(A, &da)); 1014 PetscCall(MatDenseGetLDA(A, &lda)); 1015 PetscCall(PetscInfo(A, "Performing Shift on backend\n")); 1016 PetscCall(MatShift_DenseCUDA_Private(da, alpha, lda, A->rmap->rstart, A->rmap->rend, A->cmap->N)); 1017 PetscCall(MatDenseCUDARestoreArray(A, &da)); 1018 PetscFunctionReturn(0); 1019 } 1020 1021 static PetscErrorCode MatDenseGetColumnVec_MPIDenseCUDA(Mat A, PetscInt col, Vec *v) { 1022 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1023 PetscInt lda; 1024 1025 PetscFunctionBegin; 1026 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1027 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1028 if (!a->cvec) { PetscCall(VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, &a->cvec)); } 1029 a->vecinuse = col + 1; 1030 PetscCall(MatDenseGetLDA(a->A, &lda)); 1031 PetscCall(MatDenseCUDAGetArray(a->A, (PetscScalar **)&a->ptrinuse)); 1032 PetscCall(VecCUDAPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda)); 1033 *v = a->cvec; 1034 PetscFunctionReturn(0); 1035 } 1036 1037 static PetscErrorCode MatDenseRestoreColumnVec_MPIDenseCUDA(Mat A, PetscInt col, Vec *v) { 1038 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1039 1040 PetscFunctionBegin; 1041 PetscCheck(a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first"); 1042 PetscCheck(a->cvec, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing internal column vector"); 1043 a->vecinuse = 0; 1044 PetscCall(MatDenseCUDARestoreArray(a->A, (PetscScalar **)&a->ptrinuse)); 1045 PetscCall(VecCUDAResetArray(a->cvec)); 1046 if (v) *v = NULL; 1047 PetscFunctionReturn(0); 1048 } 1049 1050 static PetscErrorCode MatDenseGetColumnVecRead_MPIDenseCUDA(Mat A, PetscInt col, Vec *v) { 1051 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1052 PetscInt lda; 1053 1054 PetscFunctionBegin; 1055 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1056 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1057 if (!a->cvec) { PetscCall(VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, &a->cvec)); } 1058 a->vecinuse = col + 1; 1059 PetscCall(MatDenseGetLDA(a->A, &lda)); 1060 PetscCall(MatDenseCUDAGetArrayRead(a->A, &a->ptrinuse)); 1061 PetscCall(VecCUDAPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda)); 1062 PetscCall(VecLockReadPush(a->cvec)); 1063 *v = a->cvec; 1064 PetscFunctionReturn(0); 1065 } 1066 1067 static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDenseCUDA(Mat A, PetscInt col, Vec *v) { 1068 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1069 1070 PetscFunctionBegin; 1071 PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first"); 1072 PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector"); 1073 a->vecinuse = 0; 1074 PetscCall(MatDenseCUDARestoreArrayRead(a->A, &a->ptrinuse)); 1075 PetscCall(VecLockReadPop(a->cvec)); 1076 PetscCall(VecCUDAResetArray(a->cvec)); 1077 if (v) *v = NULL; 1078 PetscFunctionReturn(0); 1079 } 1080 1081 static PetscErrorCode MatDenseGetColumnVecWrite_MPIDenseCUDA(Mat A, PetscInt col, Vec *v) { 1082 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1083 PetscInt lda; 1084 1085 PetscFunctionBegin; 1086 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1087 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1088 if (!a->cvec) { PetscCall(VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, &a->cvec)); } 1089 a->vecinuse = col + 1; 1090 PetscCall(MatDenseGetLDA(a->A, &lda)); 1091 PetscCall(MatDenseCUDAGetArrayWrite(a->A, (PetscScalar **)&a->ptrinuse)); 1092 PetscCall(VecCUDAPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda)); 1093 *v = a->cvec; 1094 PetscFunctionReturn(0); 1095 } 1096 1097 static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDenseCUDA(Mat A, PetscInt col, Vec *v) { 1098 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1099 1100 PetscFunctionBegin; 1101 PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first"); 1102 PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector"); 1103 a->vecinuse = 0; 1104 PetscCall(MatDenseCUDARestoreArrayWrite(a->A, (PetscScalar **)&a->ptrinuse)); 1105 PetscCall(VecCUDAResetArray(a->cvec)); 1106 if (v) *v = NULL; 1107 PetscFunctionReturn(0); 1108 } 1109 1110 static PetscErrorCode MatDenseCUDAPlaceArray_MPIDenseCUDA(Mat A, const PetscScalar *a) { 1111 Mat_MPIDense *l = (Mat_MPIDense *)A->data; 1112 1113 PetscFunctionBegin; 1114 PetscCheck(!l->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1115 PetscCheck(!l->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1116 PetscCall(MatDenseCUDAPlaceArray(l->A, a)); 1117 PetscFunctionReturn(0); 1118 } 1119 1120 static PetscErrorCode MatDenseCUDAResetArray_MPIDenseCUDA(Mat A) { 1121 Mat_MPIDense *l = (Mat_MPIDense *)A->data; 1122 1123 PetscFunctionBegin; 1124 PetscCheck(!l->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1125 PetscCheck(!l->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1126 PetscCall(MatDenseCUDAResetArray(l->A)); 1127 PetscFunctionReturn(0); 1128 } 1129 1130 static PetscErrorCode MatDenseCUDAReplaceArray_MPIDenseCUDA(Mat A, const PetscScalar *a) { 1131 Mat_MPIDense *l = (Mat_MPIDense *)A->data; 1132 1133 PetscFunctionBegin; 1134 PetscCheck(!l->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1135 PetscCheck(!l->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1136 PetscCall(MatDenseCUDAReplaceArray(l->A, a)); 1137 PetscFunctionReturn(0); 1138 } 1139 1140 static PetscErrorCode MatDenseCUDAGetArrayWrite_MPIDenseCUDA(Mat A, PetscScalar **a) { 1141 Mat_MPIDense *l = (Mat_MPIDense *)A->data; 1142 1143 PetscFunctionBegin; 1144 PetscCall(MatDenseCUDAGetArrayWrite(l->A, a)); 1145 PetscFunctionReturn(0); 1146 } 1147 1148 static PetscErrorCode MatDenseCUDARestoreArrayWrite_MPIDenseCUDA(Mat A, PetscScalar **a) { 1149 Mat_MPIDense *l = (Mat_MPIDense *)A->data; 1150 1151 PetscFunctionBegin; 1152 PetscCall(MatDenseCUDARestoreArrayWrite(l->A, a)); 1153 PetscFunctionReturn(0); 1154 } 1155 1156 static PetscErrorCode MatDenseCUDAGetArrayRead_MPIDenseCUDA(Mat A, const PetscScalar **a) { 1157 Mat_MPIDense *l = (Mat_MPIDense *)A->data; 1158 1159 PetscFunctionBegin; 1160 PetscCall(MatDenseCUDAGetArrayRead(l->A, a)); 1161 PetscFunctionReturn(0); 1162 } 1163 1164 static PetscErrorCode MatDenseCUDARestoreArrayRead_MPIDenseCUDA(Mat A, const PetscScalar **a) { 1165 Mat_MPIDense *l = (Mat_MPIDense *)A->data; 1166 1167 PetscFunctionBegin; 1168 PetscCall(MatDenseCUDARestoreArrayRead(l->A, a)); 1169 PetscFunctionReturn(0); 1170 } 1171 1172 static PetscErrorCode MatDenseCUDAGetArray_MPIDenseCUDA(Mat A, PetscScalar **a) { 1173 Mat_MPIDense *l = (Mat_MPIDense *)A->data; 1174 1175 PetscFunctionBegin; 1176 PetscCall(MatDenseCUDAGetArray(l->A, a)); 1177 PetscFunctionReturn(0); 1178 } 1179 1180 static PetscErrorCode MatDenseCUDARestoreArray_MPIDenseCUDA(Mat A, PetscScalar **a) { 1181 Mat_MPIDense *l = (Mat_MPIDense *)A->data; 1182 1183 PetscFunctionBegin; 1184 PetscCall(MatDenseCUDARestoreArray(l->A, a)); 1185 PetscFunctionReturn(0); 1186 } 1187 1188 static PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat, PetscInt, Vec *); 1189 static PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat, PetscInt, Vec *); 1190 static PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat, PetscInt, Vec *); 1191 static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat, PetscInt, Vec *); 1192 static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat, PetscInt, Vec *); 1193 static PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat, PetscInt, Vec *); 1194 static PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat, Mat *); 1195 1196 static PetscErrorCode MatBindToCPU_MPIDenseCUDA(Mat mat, PetscBool bind) { 1197 Mat_MPIDense *d = (Mat_MPIDense *)mat->data; 1198 1199 PetscFunctionBegin; 1200 PetscCheck(!d->vecinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1201 PetscCheck(!d->matinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1202 if (d->A) PetscCall(MatBindToCPU(d->A, bind)); 1203 mat->boundtocpu = bind; 1204 if (!bind) { 1205 PetscBool iscuda; 1206 1207 PetscCall(PetscFree(mat->defaultrandtype)); 1208 PetscCall(PetscStrallocpy(PETSCCURAND, &mat->defaultrandtype)); 1209 PetscCall(PetscObjectTypeCompare((PetscObject)d->cvec, VECMPICUDA, &iscuda)); 1210 if (!iscuda) PetscCall(VecDestroy(&d->cvec)); 1211 PetscCall(PetscObjectTypeCompare((PetscObject)d->cmat, MATMPIDENSECUDA, &iscuda)); 1212 if (!iscuda) PetscCall(MatDestroy(&d->cmat)); 1213 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", MatDenseGetColumnVec_MPIDenseCUDA)); 1214 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", MatDenseRestoreColumnVec_MPIDenseCUDA)); 1215 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", MatDenseGetColumnVecRead_MPIDenseCUDA)); 1216 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", MatDenseRestoreColumnVecRead_MPIDenseCUDA)); 1217 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", MatDenseGetColumnVecWrite_MPIDenseCUDA)); 1218 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", MatDenseRestoreColumnVecWrite_MPIDenseCUDA)); 1219 mat->ops->shift = MatShift_MPIDenseCUDA; 1220 } else { 1221 PetscCall(PetscFree(mat->defaultrandtype)); 1222 PetscCall(PetscStrallocpy(PETSCRANDER48, &mat->defaultrandtype)); 1223 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", MatDenseGetColumnVec_MPIDense)); 1224 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", MatDenseRestoreColumnVec_MPIDense)); 1225 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", MatDenseGetColumnVecRead_MPIDense)); 1226 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", MatDenseRestoreColumnVecRead_MPIDense)); 1227 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", MatDenseGetColumnVecWrite_MPIDense)); 1228 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", MatDenseRestoreColumnVecWrite_MPIDense)); 1229 mat->ops->shift = MatShift_MPIDense; 1230 } 1231 if (d->cmat) PetscCall(MatBindToCPU(d->cmat, bind)); 1232 PetscFunctionReturn(0); 1233 } 1234 1235 PetscErrorCode MatMPIDenseCUDASetPreallocation(Mat A, PetscScalar *d_data) { 1236 Mat_MPIDense *d = (Mat_MPIDense *)A->data; 1237 PetscBool iscuda; 1238 1239 PetscFunctionBegin; 1240 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 1241 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIDENSECUDA, &iscuda)); 1242 if (!iscuda) PetscFunctionReturn(0); 1243 PetscCall(PetscLayoutSetUp(A->rmap)); 1244 PetscCall(PetscLayoutSetUp(A->cmap)); 1245 if (!d->A) { 1246 PetscCall(MatCreate(PETSC_COMM_SELF, &d->A)); 1247 PetscCall(MatSetSizes(d->A, A->rmap->n, A->cmap->N, A->rmap->n, A->cmap->N)); 1248 } 1249 PetscCall(MatSetType(d->A, MATSEQDENSECUDA)); 1250 PetscCall(MatSeqDenseCUDASetPreallocation(d->A, d_data)); 1251 A->preallocated = PETSC_TRUE; 1252 A->assembled = PETSC_TRUE; 1253 PetscFunctionReturn(0); 1254 } 1255 #endif 1256 1257 static PetscErrorCode MatSetRandom_MPIDense(Mat x, PetscRandom rctx) { 1258 Mat_MPIDense *d = (Mat_MPIDense *)x->data; 1259 1260 PetscFunctionBegin; 1261 PetscCall(MatSetRandom(d->A, rctx)); 1262 #if defined(PETSC_HAVE_DEVICE) 1263 x->offloadmask = d->A->offloadmask; 1264 #endif 1265 PetscFunctionReturn(0); 1266 } 1267 1268 static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A, PetscBool *missing, PetscInt *d) { 1269 PetscFunctionBegin; 1270 *missing = PETSC_FALSE; 1271 PetscFunctionReturn(0); 1272 } 1273 1274 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat, Mat, PetscReal, Mat); 1275 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat, Mat, Mat); 1276 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat, Mat, PetscReal, Mat); 1277 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat, Mat, Mat); 1278 static PetscErrorCode MatEqual_MPIDense(Mat, Mat, PetscBool *); 1279 static PetscErrorCode MatLoad_MPIDense(Mat, PetscViewer); 1280 1281 /* -------------------------------------------------------------------*/ 1282 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense, 1283 MatGetRow_MPIDense, 1284 MatRestoreRow_MPIDense, 1285 MatMult_MPIDense, 1286 /* 4*/ MatMultAdd_MPIDense, 1287 MatMultTranspose_MPIDense, 1288 MatMultTransposeAdd_MPIDense, 1289 NULL, 1290 NULL, 1291 NULL, 1292 /* 10*/ NULL, 1293 NULL, 1294 NULL, 1295 NULL, 1296 MatTranspose_MPIDense, 1297 /* 15*/ MatGetInfo_MPIDense, 1298 MatEqual_MPIDense, 1299 MatGetDiagonal_MPIDense, 1300 MatDiagonalScale_MPIDense, 1301 MatNorm_MPIDense, 1302 /* 20*/ MatAssemblyBegin_MPIDense, 1303 MatAssemblyEnd_MPIDense, 1304 MatSetOption_MPIDense, 1305 MatZeroEntries_MPIDense, 1306 /* 24*/ MatZeroRows_MPIDense, 1307 NULL, 1308 NULL, 1309 NULL, 1310 NULL, 1311 /* 29*/ MatSetUp_MPIDense, 1312 NULL, 1313 NULL, 1314 MatGetDiagonalBlock_MPIDense, 1315 NULL, 1316 /* 34*/ MatDuplicate_MPIDense, 1317 NULL, 1318 NULL, 1319 NULL, 1320 NULL, 1321 /* 39*/ MatAXPY_MPIDense, 1322 MatCreateSubMatrices_MPIDense, 1323 NULL, 1324 MatGetValues_MPIDense, 1325 MatCopy_MPIDense, 1326 /* 44*/ NULL, 1327 MatScale_MPIDense, 1328 MatShift_MPIDense, 1329 NULL, 1330 NULL, 1331 /* 49*/ MatSetRandom_MPIDense, 1332 NULL, 1333 NULL, 1334 NULL, 1335 NULL, 1336 /* 54*/ NULL, 1337 NULL, 1338 NULL, 1339 NULL, 1340 NULL, 1341 /* 59*/ MatCreateSubMatrix_MPIDense, 1342 MatDestroy_MPIDense, 1343 MatView_MPIDense, 1344 NULL, 1345 NULL, 1346 /* 64*/ NULL, 1347 NULL, 1348 NULL, 1349 NULL, 1350 NULL, 1351 /* 69*/ NULL, 1352 NULL, 1353 NULL, 1354 NULL, 1355 NULL, 1356 /* 74*/ NULL, 1357 NULL, 1358 NULL, 1359 NULL, 1360 NULL, 1361 /* 79*/ NULL, 1362 NULL, 1363 NULL, 1364 NULL, 1365 /* 83*/ MatLoad_MPIDense, 1366 NULL, 1367 NULL, 1368 NULL, 1369 NULL, 1370 NULL, 1371 /* 89*/ NULL, 1372 NULL, 1373 NULL, 1374 NULL, 1375 NULL, 1376 /* 94*/ NULL, 1377 NULL, 1378 MatMatTransposeMultSymbolic_MPIDense_MPIDense, 1379 MatMatTransposeMultNumeric_MPIDense_MPIDense, 1380 NULL, 1381 /* 99*/ MatProductSetFromOptions_MPIDense, 1382 NULL, 1383 NULL, 1384 MatConjugate_MPIDense, 1385 NULL, 1386 /*104*/ NULL, 1387 MatRealPart_MPIDense, 1388 MatImaginaryPart_MPIDense, 1389 NULL, 1390 NULL, 1391 /*109*/ NULL, 1392 NULL, 1393 NULL, 1394 MatGetColumnVector_MPIDense, 1395 MatMissingDiagonal_MPIDense, 1396 /*114*/ NULL, 1397 NULL, 1398 NULL, 1399 NULL, 1400 NULL, 1401 /*119*/ NULL, 1402 NULL, 1403 NULL, 1404 NULL, 1405 NULL, 1406 /*124*/ NULL, 1407 MatGetColumnReductions_MPIDense, 1408 NULL, 1409 NULL, 1410 NULL, 1411 /*129*/ NULL, 1412 NULL, 1413 MatTransposeMatMultSymbolic_MPIDense_MPIDense, 1414 MatTransposeMatMultNumeric_MPIDense_MPIDense, 1415 NULL, 1416 /*134*/ NULL, 1417 NULL, 1418 NULL, 1419 NULL, 1420 NULL, 1421 /*139*/ NULL, 1422 NULL, 1423 NULL, 1424 NULL, 1425 NULL, 1426 MatCreateMPIMatConcatenateSeqMat_MPIDense, 1427 /*145*/ NULL, 1428 NULL, 1429 NULL, 1430 NULL, 1431 NULL, 1432 /*150*/ NULL}; 1433 1434 PetscErrorCode MatMPIDenseSetPreallocation_MPIDense(Mat mat, PetscScalar *data) { 1435 Mat_MPIDense *a = (Mat_MPIDense *)mat->data; 1436 PetscBool iscuda; 1437 1438 PetscFunctionBegin; 1439 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1440 PetscCall(PetscLayoutSetUp(mat->rmap)); 1441 PetscCall(PetscLayoutSetUp(mat->cmap)); 1442 if (!a->A) { 1443 PetscCall(MatCreate(PETSC_COMM_SELF, &a->A)); 1444 PetscCall(MatSetSizes(a->A, mat->rmap->n, mat->cmap->N, mat->rmap->n, mat->cmap->N)); 1445 } 1446 PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPIDENSECUDA, &iscuda)); 1447 PetscCall(MatSetType(a->A, iscuda ? MATSEQDENSECUDA : MATSEQDENSE)); 1448 PetscCall(MatSeqDenseSetPreallocation(a->A, data)); 1449 #if defined(PETSC_HAVE_DEVICE) 1450 mat->offloadmask = a->A->offloadmask; 1451 #endif 1452 mat->preallocated = PETSC_TRUE; 1453 mat->assembled = PETSC_TRUE; 1454 PetscFunctionReturn(0); 1455 } 1456 1457 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIDense(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) { 1458 Mat B, C; 1459 1460 PetscFunctionBegin; 1461 PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &C)); 1462 PetscCall(MatConvert_SeqAIJ_SeqDense(C, MATSEQDENSE, MAT_INITIAL_MATRIX, &B)); 1463 PetscCall(MatDestroy(&C)); 1464 if (reuse == MAT_REUSE_MATRIX) { 1465 C = *newmat; 1466 } else C = NULL; 1467 PetscCall(MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A), B, A->cmap->n, !C ? MAT_INITIAL_MATRIX : MAT_REUSE_MATRIX, &C)); 1468 PetscCall(MatDestroy(&B)); 1469 if (reuse == MAT_INPLACE_MATRIX) { 1470 PetscCall(MatHeaderReplace(A, &C)); 1471 } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C; 1472 PetscFunctionReturn(0); 1473 } 1474 1475 PetscErrorCode MatConvert_MPIDense_MPIAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) { 1476 Mat B, C; 1477 1478 PetscFunctionBegin; 1479 PetscCall(MatDenseGetLocalMatrix(A, &C)); 1480 PetscCall(MatConvert_SeqDense_SeqAIJ(C, MATSEQAIJ, MAT_INITIAL_MATRIX, &B)); 1481 if (reuse == MAT_REUSE_MATRIX) { 1482 C = *newmat; 1483 } else C = NULL; 1484 PetscCall(MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A), B, A->cmap->n, !C ? MAT_INITIAL_MATRIX : MAT_REUSE_MATRIX, &C)); 1485 PetscCall(MatDestroy(&B)); 1486 if (reuse == MAT_INPLACE_MATRIX) { 1487 PetscCall(MatHeaderReplace(A, &C)); 1488 } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C; 1489 PetscFunctionReturn(0); 1490 } 1491 1492 #if defined(PETSC_HAVE_ELEMENTAL) 1493 PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) { 1494 Mat mat_elemental; 1495 PetscScalar *v; 1496 PetscInt m = A->rmap->n, N = A->cmap->N, rstart = A->rmap->rstart, i, *rows, *cols; 1497 1498 PetscFunctionBegin; 1499 if (reuse == MAT_REUSE_MATRIX) { 1500 mat_elemental = *newmat; 1501 PetscCall(MatZeroEntries(*newmat)); 1502 } else { 1503 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental)); 1504 PetscCall(MatSetSizes(mat_elemental, PETSC_DECIDE, PETSC_DECIDE, A->rmap->N, A->cmap->N)); 1505 PetscCall(MatSetType(mat_elemental, MATELEMENTAL)); 1506 PetscCall(MatSetUp(mat_elemental)); 1507 PetscCall(MatSetOption(mat_elemental, MAT_ROW_ORIENTED, PETSC_FALSE)); 1508 } 1509 1510 PetscCall(PetscMalloc2(m, &rows, N, &cols)); 1511 for (i = 0; i < N; i++) cols[i] = i; 1512 for (i = 0; i < m; i++) rows[i] = rstart + i; 1513 1514 /* PETSc-Elemental interface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */ 1515 PetscCall(MatDenseGetArray(A, &v)); 1516 PetscCall(MatSetValues(mat_elemental, m, rows, N, cols, v, ADD_VALUES)); 1517 PetscCall(MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY)); 1518 PetscCall(MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY)); 1519 PetscCall(MatDenseRestoreArray(A, &v)); 1520 PetscCall(PetscFree2(rows, cols)); 1521 1522 if (reuse == MAT_INPLACE_MATRIX) { 1523 PetscCall(MatHeaderReplace(A, &mat_elemental)); 1524 } else { 1525 *newmat = mat_elemental; 1526 } 1527 PetscFunctionReturn(0); 1528 } 1529 #endif 1530 1531 static PetscErrorCode MatDenseGetColumn_MPIDense(Mat A, PetscInt col, PetscScalar **vals) { 1532 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 1533 1534 PetscFunctionBegin; 1535 PetscCall(MatDenseGetColumn(mat->A, col, vals)); 1536 PetscFunctionReturn(0); 1537 } 1538 1539 static PetscErrorCode MatDenseRestoreColumn_MPIDense(Mat A, PetscScalar **vals) { 1540 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 1541 1542 PetscFunctionBegin; 1543 PetscCall(MatDenseRestoreColumn(mat->A, vals)); 1544 PetscFunctionReturn(0); 1545 } 1546 1547 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIDense(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat) { 1548 Mat_MPIDense *mat; 1549 PetscInt m, nloc, N; 1550 1551 PetscFunctionBegin; 1552 PetscCall(MatGetSize(inmat, &m, &N)); 1553 PetscCall(MatGetLocalSize(inmat, NULL, &nloc)); 1554 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 1555 PetscInt sum; 1556 1557 if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnership(comm, &n, &N)); 1558 /* Check sum(n) = N */ 1559 PetscCall(MPIU_Allreduce(&n, &sum, 1, MPIU_INT, MPI_SUM, comm)); 1560 PetscCheck(sum == N, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT, sum, N); 1561 1562 PetscCall(MatCreateDense(comm, m, n, PETSC_DETERMINE, N, NULL, outmat)); 1563 PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 1564 } 1565 1566 /* numeric phase */ 1567 mat = (Mat_MPIDense *)(*outmat)->data; 1568 PetscCall(MatCopy(inmat, mat->A, SAME_NONZERO_PATTERN)); 1569 PetscFunctionReturn(0); 1570 } 1571 1572 #if defined(PETSC_HAVE_CUDA) 1573 PetscErrorCode MatConvert_MPIDenseCUDA_MPIDense(Mat M, MatType type, MatReuse reuse, Mat *newmat) { 1574 Mat B; 1575 Mat_MPIDense *m; 1576 1577 PetscFunctionBegin; 1578 if (reuse == MAT_INITIAL_MATRIX) { 1579 PetscCall(MatDuplicate(M, MAT_COPY_VALUES, newmat)); 1580 } else if (reuse == MAT_REUSE_MATRIX) { 1581 PetscCall(MatCopy(M, *newmat, SAME_NONZERO_PATTERN)); 1582 } 1583 1584 B = *newmat; 1585 PetscCall(MatBindToCPU_MPIDenseCUDA(B, PETSC_TRUE)); 1586 PetscCall(PetscFree(B->defaultvectype)); 1587 PetscCall(PetscStrallocpy(VECSTANDARD, &B->defaultvectype)); 1588 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIDENSE)); 1589 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpidensecuda_mpidense_C", NULL)); 1590 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpidensecuda_C", NULL)); 1591 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C", NULL)); 1592 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpidensecuda_mpiaij_C", NULL)); 1593 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C", NULL)); 1594 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAGetArray_C", NULL)); 1595 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAGetArrayRead_C", NULL)); 1596 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAGetArrayWrite_C", NULL)); 1597 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDARestoreArray_C", NULL)); 1598 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDARestoreArrayRead_C", NULL)); 1599 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDARestoreArrayWrite_C", NULL)); 1600 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAPlaceArray_C", NULL)); 1601 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAResetArray_C", NULL)); 1602 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAReplaceArray_C", NULL)); 1603 m = (Mat_MPIDense *)(B)->data; 1604 if (m->A) PetscCall(MatConvert(m->A, MATSEQDENSE, MAT_INPLACE_MATRIX, &m->A)); 1605 B->ops->bindtocpu = NULL; 1606 B->offloadmask = PETSC_OFFLOAD_CPU; 1607 PetscFunctionReturn(0); 1608 } 1609 1610 PetscErrorCode MatConvert_MPIDense_MPIDenseCUDA(Mat M, MatType type, MatReuse reuse, Mat *newmat) { 1611 Mat B; 1612 Mat_MPIDense *m; 1613 1614 PetscFunctionBegin; 1615 if (reuse == MAT_INITIAL_MATRIX) { 1616 PetscCall(MatDuplicate(M, MAT_COPY_VALUES, newmat)); 1617 } else if (reuse == MAT_REUSE_MATRIX) { 1618 PetscCall(MatCopy(M, *newmat, SAME_NONZERO_PATTERN)); 1619 } 1620 1621 B = *newmat; 1622 PetscCall(PetscFree(B->defaultvectype)); 1623 PetscCall(PetscStrallocpy(VECCUDA, &B->defaultvectype)); 1624 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIDENSECUDA)); 1625 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpidensecuda_mpidense_C", MatConvert_MPIDenseCUDA_MPIDense)); 1626 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpidensecuda_C", MatProductSetFromOptions_MPIAIJ_MPIDense)); 1627 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C", MatProductSetFromOptions_MPIAIJ_MPIDense)); 1628 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpidensecuda_mpiaij_C", MatProductSetFromOptions_MPIDense_MPIAIJ)); 1629 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C", MatProductSetFromOptions_MPIDense_MPIAIJ)); 1630 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAGetArray_C", MatDenseCUDAGetArray_MPIDenseCUDA)); 1631 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAGetArrayRead_C", MatDenseCUDAGetArrayRead_MPIDenseCUDA)); 1632 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAGetArrayWrite_C", MatDenseCUDAGetArrayWrite_MPIDenseCUDA)); 1633 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDARestoreArray_C", MatDenseCUDARestoreArray_MPIDenseCUDA)); 1634 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDARestoreArrayRead_C", MatDenseCUDARestoreArrayRead_MPIDenseCUDA)); 1635 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDARestoreArrayWrite_C", MatDenseCUDARestoreArrayWrite_MPIDenseCUDA)); 1636 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAPlaceArray_C", MatDenseCUDAPlaceArray_MPIDenseCUDA)); 1637 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAResetArray_C", MatDenseCUDAResetArray_MPIDenseCUDA)); 1638 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAReplaceArray_C", MatDenseCUDAReplaceArray_MPIDenseCUDA)); 1639 m = (Mat_MPIDense *)(B->data); 1640 if (m->A) { 1641 PetscCall(MatConvert(m->A, MATSEQDENSECUDA, MAT_INPLACE_MATRIX, &m->A)); 1642 B->offloadmask = PETSC_OFFLOAD_BOTH; 1643 } else { 1644 B->offloadmask = PETSC_OFFLOAD_UNALLOCATED; 1645 } 1646 PetscCall(MatBindToCPU_MPIDenseCUDA(B, PETSC_FALSE)); 1647 1648 B->ops->bindtocpu = MatBindToCPU_MPIDenseCUDA; 1649 PetscFunctionReturn(0); 1650 } 1651 #endif 1652 1653 PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat A, PetscInt col, Vec *v) { 1654 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1655 PetscInt lda; 1656 1657 PetscFunctionBegin; 1658 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1659 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1660 if (!a->cvec) PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, &a->cvec)); 1661 a->vecinuse = col + 1; 1662 PetscCall(MatDenseGetLDA(a->A, &lda)); 1663 PetscCall(MatDenseGetArray(a->A, (PetscScalar **)&a->ptrinuse)); 1664 PetscCall(VecPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda)); 1665 *v = a->cvec; 1666 PetscFunctionReturn(0); 1667 } 1668 1669 PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat A, PetscInt col, Vec *v) { 1670 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1671 1672 PetscFunctionBegin; 1673 PetscCheck(a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first"); 1674 PetscCheck(a->cvec, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing internal column vector"); 1675 a->vecinuse = 0; 1676 PetscCall(MatDenseRestoreArray(a->A, (PetscScalar **)&a->ptrinuse)); 1677 PetscCall(VecResetArray(a->cvec)); 1678 if (v) *v = NULL; 1679 PetscFunctionReturn(0); 1680 } 1681 1682 PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat A, PetscInt col, Vec *v) { 1683 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1684 PetscInt lda; 1685 1686 PetscFunctionBegin; 1687 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1688 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1689 if (!a->cvec) PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, &a->cvec)); 1690 a->vecinuse = col + 1; 1691 PetscCall(MatDenseGetLDA(a->A, &lda)); 1692 PetscCall(MatDenseGetArrayRead(a->A, &a->ptrinuse)); 1693 PetscCall(VecPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda)); 1694 PetscCall(VecLockReadPush(a->cvec)); 1695 *v = a->cvec; 1696 PetscFunctionReturn(0); 1697 } 1698 1699 PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat A, PetscInt col, Vec *v) { 1700 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1701 1702 PetscFunctionBegin; 1703 PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first"); 1704 PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector"); 1705 a->vecinuse = 0; 1706 PetscCall(MatDenseRestoreArrayRead(a->A, &a->ptrinuse)); 1707 PetscCall(VecLockReadPop(a->cvec)); 1708 PetscCall(VecResetArray(a->cvec)); 1709 if (v) *v = NULL; 1710 PetscFunctionReturn(0); 1711 } 1712 1713 PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat A, PetscInt col, Vec *v) { 1714 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1715 PetscInt lda; 1716 1717 PetscFunctionBegin; 1718 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1719 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1720 if (!a->cvec) PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, &a->cvec)); 1721 a->vecinuse = col + 1; 1722 PetscCall(MatDenseGetLDA(a->A, &lda)); 1723 PetscCall(MatDenseGetArrayWrite(a->A, (PetscScalar **)&a->ptrinuse)); 1724 PetscCall(VecPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda)); 1725 *v = a->cvec; 1726 PetscFunctionReturn(0); 1727 } 1728 1729 PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat A, PetscInt col, Vec *v) { 1730 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1731 1732 PetscFunctionBegin; 1733 PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first"); 1734 PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector"); 1735 a->vecinuse = 0; 1736 PetscCall(MatDenseRestoreArrayWrite(a->A, (PetscScalar **)&a->ptrinuse)); 1737 PetscCall(VecResetArray(a->cvec)); 1738 if (v) *v = NULL; 1739 PetscFunctionReturn(0); 1740 } 1741 1742 PetscErrorCode MatDenseGetSubMatrix_MPIDense(Mat A, PetscInt rbegin, PetscInt rend, PetscInt cbegin, PetscInt cend, Mat *v) { 1743 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1744 Mat_MPIDense *c; 1745 MPI_Comm comm; 1746 PetscInt pbegin, pend; 1747 1748 PetscFunctionBegin; 1749 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 1750 PetscCheck(!a->vecinuse, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1751 PetscCheck(!a->matinuse, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1752 pbegin = PetscMax(0, PetscMin(A->rmap->rend, rbegin) - A->rmap->rstart); 1753 pend = PetscMin(A->rmap->n, PetscMax(0, rend - A->rmap->rstart)); 1754 if (!a->cmat) { 1755 PetscCall(MatCreate(comm, &a->cmat)); 1756 PetscCall(MatSetType(a->cmat, ((PetscObject)A)->type_name)); 1757 if (rend - rbegin == A->rmap->N) PetscCall(PetscLayoutReference(A->rmap, &a->cmat->rmap)); 1758 else { 1759 PetscCall(PetscLayoutSetLocalSize(a->cmat->rmap, pend - pbegin)); 1760 PetscCall(PetscLayoutSetSize(a->cmat->rmap, rend - rbegin)); 1761 PetscCall(PetscLayoutSetUp(a->cmat->rmap)); 1762 } 1763 PetscCall(PetscLayoutSetSize(a->cmat->cmap, cend - cbegin)); 1764 PetscCall(PetscLayoutSetUp(a->cmat->cmap)); 1765 } else { 1766 PetscBool same = (PetscBool)(rend - rbegin == a->cmat->rmap->N); 1767 if (same && a->cmat->rmap->N != A->rmap->N) { 1768 same = (PetscBool)(pend - pbegin == a->cmat->rmap->n); 1769 PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &same, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A))); 1770 } 1771 if (!same) { 1772 PetscCall(PetscLayoutDestroy(&a->cmat->rmap)); 1773 PetscCall(PetscLayoutCreate(comm, &a->cmat->rmap)); 1774 PetscCall(PetscLayoutSetLocalSize(a->cmat->rmap, pend - pbegin)); 1775 PetscCall(PetscLayoutSetSize(a->cmat->rmap, rend - rbegin)); 1776 PetscCall(PetscLayoutSetUp(a->cmat->rmap)); 1777 } 1778 if (cend - cbegin != a->cmat->cmap->N) { 1779 PetscCall(PetscLayoutDestroy(&a->cmat->cmap)); 1780 PetscCall(PetscLayoutCreate(comm, &a->cmat->cmap)); 1781 PetscCall(PetscLayoutSetSize(a->cmat->cmap, cend - cbegin)); 1782 PetscCall(PetscLayoutSetUp(a->cmat->cmap)); 1783 } 1784 } 1785 c = (Mat_MPIDense *)a->cmat->data; 1786 PetscCheck(!c->A, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1787 PetscCall(MatDenseGetSubMatrix(a->A, pbegin, pend, cbegin, cend, &c->A)); 1788 1789 a->cmat->preallocated = PETSC_TRUE; 1790 a->cmat->assembled = PETSC_TRUE; 1791 #if defined(PETSC_HAVE_DEVICE) 1792 a->cmat->offloadmask = c->A->offloadmask; 1793 #endif 1794 a->matinuse = cbegin + 1; 1795 *v = a->cmat; 1796 PetscFunctionReturn(0); 1797 } 1798 1799 PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat A, Mat *v) { 1800 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1801 Mat_MPIDense *c; 1802 1803 PetscFunctionBegin; 1804 PetscCheck(a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetSubMatrix() first"); 1805 PetscCheck(a->cmat, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal matrix"); 1806 PetscCheck(*v == a->cmat, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Not the matrix obtained from MatDenseGetSubMatrix()"); 1807 a->matinuse = 0; 1808 c = (Mat_MPIDense *)a->cmat->data; 1809 PetscCall(MatDenseRestoreSubMatrix(a->A, &c->A)); 1810 if (v) *v = NULL; 1811 #if defined(PETSC_HAVE_DEVICE) 1812 A->offloadmask = a->A->offloadmask; 1813 #endif 1814 PetscFunctionReturn(0); 1815 } 1816 1817 /*MC 1818 MATMPIDENSE - MATMPIDENSE = "mpidense" - A matrix type to be used for distributed dense matrices. 1819 1820 Options Database Keys: 1821 . -mat_type mpidense - sets the matrix type to `MATMPIDENSE` during a call to `MatSetFromOptions()` 1822 1823 Level: beginner 1824 1825 .seealso: `MatCreateDense()`, `MATSEQDENSE`, `MATDENSE` 1826 M*/ 1827 PETSC_EXTERN PetscErrorCode MatCreate_MPIDense(Mat mat) { 1828 Mat_MPIDense *a; 1829 1830 PetscFunctionBegin; 1831 PetscCall(PetscNew(&a)); 1832 mat->data = (void *)a; 1833 PetscCall(PetscMemcpy(mat->ops, &MatOps_Values, sizeof(struct _MatOps))); 1834 1835 mat->insertmode = NOT_SET_VALUES; 1836 1837 /* build cache for off array entries formed */ 1838 a->donotstash = PETSC_FALSE; 1839 1840 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)mat), 1, &mat->stash)); 1841 1842 /* stuff used for matrix vector multiply */ 1843 a->lvec = NULL; 1844 a->Mvctx = NULL; 1845 a->roworiented = PETSC_TRUE; 1846 1847 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetLDA_C", MatDenseGetLDA_MPIDense)); 1848 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseSetLDA_C", MatDenseSetLDA_MPIDense)); 1849 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArray_C", MatDenseGetArray_MPIDense)); 1850 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArray_C", MatDenseRestoreArray_MPIDense)); 1851 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayRead_C", MatDenseGetArrayRead_MPIDense)); 1852 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayRead_C", MatDenseRestoreArrayRead_MPIDense)); 1853 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayWrite_C", MatDenseGetArrayWrite_MPIDense)); 1854 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayWrite_C", MatDenseRestoreArrayWrite_MPIDense)); 1855 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDensePlaceArray_C", MatDensePlaceArray_MPIDense)); 1856 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseResetArray_C", MatDenseResetArray_MPIDense)); 1857 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseReplaceArray_C", MatDenseReplaceArray_MPIDense)); 1858 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", MatDenseGetColumnVec_MPIDense)); 1859 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", MatDenseRestoreColumnVec_MPIDense)); 1860 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", MatDenseGetColumnVecRead_MPIDense)); 1861 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", MatDenseRestoreColumnVecRead_MPIDense)); 1862 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", MatDenseGetColumnVecWrite_MPIDense)); 1863 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", MatDenseRestoreColumnVecWrite_MPIDense)); 1864 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetSubMatrix_C", MatDenseGetSubMatrix_MPIDense)); 1865 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreSubMatrix_C", MatDenseRestoreSubMatrix_MPIDense)); 1866 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", MatConvert_MPIAIJ_MPIDense)); 1867 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpiaij_C", MatConvert_MPIDense_MPIAIJ)); 1868 #if defined(PETSC_HAVE_ELEMENTAL) 1869 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_elemental_C", MatConvert_MPIDense_Elemental)); 1870 #endif 1871 #if defined(PETSC_HAVE_SCALAPACK) 1872 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_scalapack_C", MatConvert_Dense_ScaLAPACK)); 1873 #endif 1874 #if defined(PETSC_HAVE_CUDA) 1875 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensecuda_C", MatConvert_MPIDense_MPIDenseCUDA)); 1876 #endif 1877 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIDenseSetPreallocation_C", MatMPIDenseSetPreallocation_MPIDense)); 1878 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense)); 1879 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaij_C", MatProductSetFromOptions_MPIDense_MPIAIJ)); 1880 #if defined(PETSC_HAVE_CUDA) 1881 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense)); 1882 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijcusparse_C", MatProductSetFromOptions_MPIDense_MPIAIJ)); 1883 #endif 1884 1885 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumn_C", MatDenseGetColumn_MPIDense)); 1886 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumn_C", MatDenseRestoreColumn_MPIDense)); 1887 PetscCall(PetscObjectChangeTypeName((PetscObject)mat, MATMPIDENSE)); 1888 PetscFunctionReturn(0); 1889 } 1890 1891 /*MC 1892 MATMPIDENSECUDA - MATMPIDENSECUDA = "mpidensecuda" - A matrix type to be used for distributed dense matrices on GPUs. 1893 1894 Options Database Keys: 1895 . -mat_type mpidensecuda - sets the matrix type to `MATMPIDENSECUDA` during a call to `MatSetFromOptions()` 1896 1897 Level: beginner 1898 1899 .seealso: `MATMPIDENSE`, `MATSEQDENSE`, `MATSEQDENSECUDA` 1900 M*/ 1901 #if defined(PETSC_HAVE_CUDA) 1902 #include <petsc/private/deviceimpl.h> 1903 PETSC_EXTERN PetscErrorCode MatCreate_MPIDenseCUDA(Mat B) { 1904 PetscFunctionBegin; 1905 PetscCall(PetscDeviceInitialize(PETSC_DEVICE_CUDA)); 1906 PetscCall(MatCreate_MPIDense(B)); 1907 PetscCall(MatConvert_MPIDense_MPIDenseCUDA(B, MATMPIDENSECUDA, MAT_INPLACE_MATRIX, &B)); 1908 PetscFunctionReturn(0); 1909 } 1910 #endif 1911 1912 /*MC 1913 MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices. 1914 1915 This matrix type is identical to `MATSEQDENSE` when constructed with a single process communicator, 1916 and `MATMPIDENSE` otherwise. 1917 1918 Options Database Keys: 1919 . -mat_type dense - sets the matrix type to `MATDENSE` during a call to `MatSetFromOptions()` 1920 1921 Level: beginner 1922 1923 .seealso: `MATSEQDENSE`, `MATMPIDENSE`, `MATDENSECUDA` 1924 M*/ 1925 1926 /*MC 1927 MATDENSECUDA - MATDENSECUDA = "densecuda" - A matrix type to be used for dense matrices on GPUs. 1928 1929 This matrix type is identical to `MATSEQDENSECUDA` when constructed with a single process communicator, 1930 and `MATMPIDENSECUDA` otherwise. 1931 1932 Options Database Keys: 1933 . -mat_type densecuda - sets the matrix type to `MATDENSECUDA` during a call to `MatSetFromOptions()` 1934 1935 Level: beginner 1936 1937 .seealso: `MATSEQDENSECUDA`, `MATMPIDENSECUDA`, `MATDENSE` 1938 M*/ 1939 1940 /*@C 1941 MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries 1942 1943 Collective 1944 1945 Input Parameters: 1946 . B - the matrix 1947 - data - optional location of matrix data. Set data=NULL for PETSc 1948 to control all matrix memory allocation. 1949 1950 Notes: 1951 The dense format is fully compatible with standard Fortran 77 1952 storage by columns. 1953 1954 The data input variable is intended primarily for Fortran programmers 1955 who wish to allocate their own matrix memory space. Most users should 1956 set data=NULL. 1957 1958 Level: intermediate 1959 1960 .seealso: `MATMPIDENSE`, `MatCreate()`, `MatCreateSeqDense()`, `MatSetValues()` 1961 @*/ 1962 PetscErrorCode MatMPIDenseSetPreallocation(Mat B, PetscScalar *data) { 1963 PetscFunctionBegin; 1964 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 1965 PetscTryMethod(B, "MatMPIDenseSetPreallocation_C", (Mat, PetscScalar *), (B, data)); 1966 PetscFunctionReturn(0); 1967 } 1968 1969 /*@ 1970 MatDensePlaceArray - Allows one to replace the array in a `MATDENSE` matrix with an 1971 array provided by the user. This is useful to avoid copying an array 1972 into a matrix 1973 1974 Not Collective 1975 1976 Input Parameters: 1977 + mat - the matrix 1978 - array - the array in column major order 1979 1980 Note: 1981 You can return to the original array with a call to `MatDenseResetArray()`. The user is responsible for freeing this array; it will not be 1982 freed when the matrix is destroyed. 1983 1984 Level: developer 1985 1986 .seealso: `MATDENSE`, `MatDenseGetArray()`, `MatDenseResetArray()`, `VecPlaceArray()`, `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`, `VecResetArray()`, 1987 `MatDenseReplaceArray()` 1988 @*/ 1989 PetscErrorCode MatDensePlaceArray(Mat mat, const PetscScalar *array) { 1990 PetscFunctionBegin; 1991 PetscValidHeaderSpecific(mat, MAT_CLASSID, 1); 1992 PetscUseMethod(mat, "MatDensePlaceArray_C", (Mat, const PetscScalar *), (mat, array)); 1993 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 1994 #if defined(PETSC_HAVE_DEVICE) 1995 mat->offloadmask = PETSC_OFFLOAD_CPU; 1996 #endif 1997 PetscFunctionReturn(0); 1998 } 1999 2000 /*@ 2001 MatDenseResetArray - Resets the matrix array to that it previously had before the call to `MatDensePlaceArray()` 2002 2003 Not Collective 2004 2005 Input Parameters: 2006 . mat - the matrix 2007 2008 Note: 2009 You can only call this after a call to `MatDensePlaceArray()` 2010 2011 Level: developer 2012 2013 .seealso: `MATDENSE`, `MatDenseGetArray()`, `MatDensePlaceArray()`, `VecPlaceArray()`, `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`, `VecResetArray()` 2014 @*/ 2015 PetscErrorCode MatDenseResetArray(Mat mat) { 2016 PetscFunctionBegin; 2017 PetscValidHeaderSpecific(mat, MAT_CLASSID, 1); 2018 PetscUseMethod(mat, "MatDenseResetArray_C", (Mat), (mat)); 2019 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 2020 PetscFunctionReturn(0); 2021 } 2022 2023 /*@ 2024 MatDenseReplaceArray - Allows one to replace the array in a dense matrix with an 2025 array provided by the user. This is useful to avoid copying an array 2026 into a matrix 2027 2028 Not Collective 2029 2030 Input Parameters: 2031 + mat - the matrix 2032 - array - the array in column major order 2033 2034 Note: 2035 The memory passed in MUST be obtained with `PetscMalloc()` and CANNOT be 2036 freed by the user. It will be freed when the matrix is destroyed. 2037 2038 Level: developer 2039 2040 .seealso: `MatDensePlaceArray()`, `MatDenseGetArray()`, `VecReplaceArray()` 2041 @*/ 2042 PetscErrorCode MatDenseReplaceArray(Mat mat, const PetscScalar *array) { 2043 PetscFunctionBegin; 2044 PetscValidHeaderSpecific(mat, MAT_CLASSID, 1); 2045 PetscUseMethod(mat, "MatDenseReplaceArray_C", (Mat, const PetscScalar *), (mat, array)); 2046 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 2047 #if defined(PETSC_HAVE_DEVICE) 2048 mat->offloadmask = PETSC_OFFLOAD_CPU; 2049 #endif 2050 PetscFunctionReturn(0); 2051 } 2052 2053 #if defined(PETSC_HAVE_CUDA) 2054 /*@C 2055 MatDenseCUDAPlaceArray - Allows one to replace the GPU array in a `MATDENSECUDA` matrix with an 2056 array provided by the user. This is useful to avoid copying an array 2057 into a matrix 2058 2059 Not Collective 2060 2061 Input Parameters: 2062 + mat - the matrix 2063 - array - the array in column major order 2064 2065 Note: 2066 You can return to the original array with a call to `MatDenseCUDAResetArray()`. The user is responsible for freeing this array; it will not be 2067 freed when the matrix is destroyed. The array must have been allocated with cudaMalloc(). 2068 2069 Level: developer 2070 2071 .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDAResetArray()`, `MatDenseCUDAReplaceArray()` 2072 @*/ 2073 PetscErrorCode MatDenseCUDAPlaceArray(Mat mat, const PetscScalar *array) { 2074 PetscFunctionBegin; 2075 PetscValidHeaderSpecific(mat, MAT_CLASSID, 1); 2076 PetscUseMethod(mat, "MatDenseCUDAPlaceArray_C", (Mat, const PetscScalar *), (mat, array)); 2077 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 2078 mat->offloadmask = PETSC_OFFLOAD_GPU; 2079 PetscFunctionReturn(0); 2080 } 2081 2082 /*@C 2083 MatDenseCUDAResetArray - Resets the matrix array to that it previously had before the call to `MatDenseCUDAPlaceArray()` 2084 2085 Not Collective 2086 2087 Input Parameters: 2088 . mat - the matrix 2089 2090 Note: 2091 You can only call this after a call to `MatDenseCUDAPlaceArray()` 2092 2093 Level: developer 2094 2095 .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDAPlaceArray()` 2096 @*/ 2097 PetscErrorCode MatDenseCUDAResetArray(Mat mat) { 2098 PetscFunctionBegin; 2099 PetscValidHeaderSpecific(mat, MAT_CLASSID, 1); 2100 PetscUseMethod(mat, "MatDenseCUDAResetArray_C", (Mat), (mat)); 2101 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 2102 PetscFunctionReturn(0); 2103 } 2104 2105 /*@C 2106 MatDenseCUDAReplaceArray - Allows one to replace the GPU array in a `MATDENSECUDA` matrix with an 2107 array provided by the user. This is useful to avoid copying an array 2108 into a matrix 2109 2110 Not Collective 2111 2112 Input Parameters: 2113 + mat - the matrix 2114 - array - the array in column major order 2115 2116 Note: 2117 This permanently replaces the GPU array and frees the memory associated with the old GPU array. 2118 The memory passed in CANNOT be freed by the user. It will be freed 2119 when the matrix is destroyed. The array should respect the matrix leading dimension. 2120 2121 Level: developer 2122 2123 .seealso: `MatDenseCUDAGetArray()`, `MatDenseCUDAPlaceArray()`, `MatDenseCUDAResetArray()` 2124 @*/ 2125 PetscErrorCode MatDenseCUDAReplaceArray(Mat mat, const PetscScalar *array) { 2126 PetscFunctionBegin; 2127 PetscValidHeaderSpecific(mat, MAT_CLASSID, 1); 2128 PetscUseMethod(mat, "MatDenseCUDAReplaceArray_C", (Mat, const PetscScalar *), (mat, array)); 2129 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 2130 mat->offloadmask = PETSC_OFFLOAD_GPU; 2131 PetscFunctionReturn(0); 2132 } 2133 2134 /*@C 2135 MatDenseCUDAGetArrayWrite - Provides write access to the CUDA buffer inside a `MATDENSECUDA` matrix. 2136 2137 Not Collective 2138 2139 Input Parameters: 2140 . A - the matrix 2141 2142 Output Parameters 2143 . array - the GPU array in column major order 2144 2145 Notes: 2146 The data on the GPU may not be updated due to operations done on the CPU. If you need updated data, use `MatDenseCUDAGetArray()`. 2147 2148 The array must be restored with `MatDenseCUDARestoreArrayWrite()` when no longer needed. 2149 2150 Level: developer 2151 2152 .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`, `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayRead()`, `MatDenseCUDARestoreArrayRead()` 2153 @*/ 2154 PetscErrorCode MatDenseCUDAGetArrayWrite(Mat A, PetscScalar **a) { 2155 PetscFunctionBegin; 2156 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 2157 PetscUseMethod(A, "MatDenseCUDAGetArrayWrite_C", (Mat, PetscScalar **), (A, a)); 2158 PetscCall(PetscObjectStateIncrease((PetscObject)A)); 2159 PetscFunctionReturn(0); 2160 } 2161 2162 /*@C 2163 MatDenseCUDARestoreArrayWrite - Restore write access to the CUDA buffer inside a `MATDENSECUDA` matrix previously obtained with `MatDenseCUDAGetArrayWrite()`. 2164 2165 Not Collective 2166 2167 Input Parameters: 2168 + A - the matrix 2169 - array - the GPU array in column major order 2170 2171 Level: developer 2172 2173 .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDARestoreArrayRead()`, `MatDenseCUDAGetArrayRead()` 2174 @*/ 2175 PetscErrorCode MatDenseCUDARestoreArrayWrite(Mat A, PetscScalar **a) { 2176 PetscFunctionBegin; 2177 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 2178 PetscUseMethod(A, "MatDenseCUDARestoreArrayWrite_C", (Mat, PetscScalar **), (A, a)); 2179 PetscCall(PetscObjectStateIncrease((PetscObject)A)); 2180 A->offloadmask = PETSC_OFFLOAD_GPU; 2181 PetscFunctionReturn(0); 2182 } 2183 2184 /*@C 2185 MatDenseCUDAGetArrayRead - Provides read-only access to the CUDA buffer inside a `MATDENSECUDA` matrix. The array must be restored with `MatDenseCUDARestoreArrayRead()` when no longer needed. 2186 2187 Not Collective 2188 2189 Input Parameters: 2190 . A - the matrix 2191 2192 Output Parameters 2193 . array - the GPU array in column major order 2194 2195 Note: 2196 Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use `MatDenseCUDAGetArrayWrite()`. 2197 2198 Level: developer 2199 2200 .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`, `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDARestoreArrayRead()` 2201 @*/ 2202 PetscErrorCode MatDenseCUDAGetArrayRead(Mat A, const PetscScalar **a) { 2203 PetscFunctionBegin; 2204 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 2205 PetscUseMethod(A, "MatDenseCUDAGetArrayRead_C", (Mat, const PetscScalar **), (A, a)); 2206 PetscFunctionReturn(0); 2207 } 2208 2209 /*@C 2210 MatDenseCUDARestoreArrayRead - Restore read-only access to the CUDA buffer inside a `MATDENSECUDA` matrix previously obtained with a call to `MatDenseCUDAGetArrayRead()`. 2211 2212 Not Collective 2213 2214 Input Parameters: 2215 + A - the matrix 2216 - array - the GPU array in column major order 2217 2218 Note: 2219 Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use `MatDenseCUDAGetArrayWrite()`. 2220 2221 Level: developer 2222 2223 .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`, `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDAGetArrayRead()` 2224 @*/ 2225 PetscErrorCode MatDenseCUDARestoreArrayRead(Mat A, const PetscScalar **a) { 2226 PetscFunctionBegin; 2227 PetscUseMethod(A, "MatDenseCUDARestoreArrayRead_C", (Mat, const PetscScalar **), (A, a)); 2228 PetscFunctionReturn(0); 2229 } 2230 2231 /*@C 2232 MatDenseCUDAGetArray - Provides access to the CUDA buffer inside a `MATDENSECUDA` matrix. The array must be restored with `MatDenseCUDARestoreArray()` when no longer needed. 2233 2234 Not Collective 2235 2236 Input Parameters: 2237 . A - the matrix 2238 2239 Output Parameters 2240 . array - the GPU array in column major order 2241 2242 Note: 2243 Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use `MatDenseCUDAGetArrayWrite()`. For read-only access, use `MatDenseCUDAGetArrayRead()`. 2244 2245 Level: developer 2246 2247 .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArrayRead()`, `MatDenseCUDARestoreArray()`, `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDARestoreArrayRead()` 2248 @*/ 2249 PetscErrorCode MatDenseCUDAGetArray(Mat A, PetscScalar **a) { 2250 PetscFunctionBegin; 2251 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 2252 PetscUseMethod(A, "MatDenseCUDAGetArray_C", (Mat, PetscScalar **), (A, a)); 2253 PetscCall(PetscObjectStateIncrease((PetscObject)A)); 2254 PetscFunctionReturn(0); 2255 } 2256 2257 /*@C 2258 MatDenseCUDARestoreArray - Restore access to the CUDA buffer inside a `MATDENSECUDA` matrix previously obtained with `MatDenseCUDAGetArray()`. 2259 2260 Not Collective 2261 2262 Input Parameters: 2263 + A - the matrix 2264 - array - the GPU array in column major order 2265 2266 Level: developer 2267 2268 .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDARestoreArrayRead()`, `MatDenseCUDAGetArrayRead()` 2269 @*/ 2270 PetscErrorCode MatDenseCUDARestoreArray(Mat A, PetscScalar **a) { 2271 PetscFunctionBegin; 2272 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 2273 PetscUseMethod(A, "MatDenseCUDARestoreArray_C", (Mat, PetscScalar **), (A, a)); 2274 PetscCall(PetscObjectStateIncrease((PetscObject)A)); 2275 A->offloadmask = PETSC_OFFLOAD_GPU; 2276 PetscFunctionReturn(0); 2277 } 2278 #endif 2279 2280 /*@C 2281 MatCreateDense - Creates a matrix in `MATDENSE` format. 2282 2283 Collective 2284 2285 Input Parameters: 2286 + comm - MPI communicator 2287 . m - number of local rows (or `PETSC_DECIDE` to have calculated if M is given) 2288 . n - number of local columns (or `PETSC_DECIDE` to have calculated if N is given) 2289 . M - number of global rows (or `PETSC_DECIDE` to have calculated if m is given) 2290 . N - number of global columns (or `PETSC_DECIDE` to have calculated if n is given) 2291 - data - optional location of matrix data. Set data to NULL (`PETSC_NULL_SCALAR` for Fortran users) for PETSc 2292 to control all matrix memory allocation. 2293 2294 Output Parameter: 2295 . A - the matrix 2296 2297 Notes: 2298 The dense format is fully compatible with standard Fortran 77 2299 storage by columns. 2300 2301 Although local portions of the matrix are stored in column-major 2302 order, the matrix is partitioned across MPI ranks by row. 2303 2304 The data input variable is intended primarily for Fortran programmers 2305 who wish to allocate their own matrix memory space. Most users should 2306 set data=NULL (`PETSC_NULL_SCALAR` for Fortran users). 2307 2308 The user MUST specify either the local or global matrix dimensions 2309 (possibly both). 2310 2311 Level: intermediate 2312 2313 .seealso: `MATDENSE`, `MatCreate()`, `MatCreateSeqDense()`, `MatSetValues()` 2314 @*/ 2315 PetscErrorCode MatCreateDense(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscScalar *data, Mat *A) { 2316 PetscFunctionBegin; 2317 PetscCall(MatCreate(comm, A)); 2318 PetscCall(MatSetSizes(*A, m, n, M, N)); 2319 PetscCall(MatSetType(*A, MATDENSE)); 2320 PetscCall(MatSeqDenseSetPreallocation(*A, data)); 2321 PetscCall(MatMPIDenseSetPreallocation(*A, data)); 2322 PetscFunctionReturn(0); 2323 } 2324 2325 #if defined(PETSC_HAVE_CUDA) 2326 /*@C 2327 MatCreateDenseCUDA - Creates a matrix in `MATDENSECUDA` format using CUDA. 2328 2329 Collective 2330 2331 Input Parameters: 2332 + comm - MPI communicator 2333 . m - number of local rows (or `PETSC_DECIDE` to have calculated if M is given) 2334 . n - number of local columns (or `PETSC_DECIDE` to have calculated if N is given) 2335 . M - number of global rows (or `PETSC_DECIDE` to have calculated if m is given) 2336 . N - number of global columns (or `PETSC_DECIDE` to have calculated if n is given) 2337 - data - optional location of GPU matrix data. Set data=NULL for PETSc 2338 to control matrix memory allocation. 2339 2340 Output Parameter: 2341 . A - the matrix 2342 2343 Level: intermediate 2344 2345 .seealso: `MATDENSECUDA`, `MatCreate()`, `MatCreateDense()` 2346 @*/ 2347 PetscErrorCode MatCreateDenseCUDA(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscScalar *data, Mat *A) { 2348 PetscFunctionBegin; 2349 PetscCall(MatCreate(comm, A)); 2350 PetscValidLogicalCollectiveBool(*A, !!data, 6); 2351 PetscCall(MatSetSizes(*A, m, n, M, N)); 2352 PetscCall(MatSetType(*A, MATDENSECUDA)); 2353 PetscCall(MatSeqDenseCUDASetPreallocation(*A, data)); 2354 PetscCall(MatMPIDenseCUDASetPreallocation(*A, data)); 2355 PetscFunctionReturn(0); 2356 } 2357 #endif 2358 2359 static PetscErrorCode MatDuplicate_MPIDense(Mat A, MatDuplicateOption cpvalues, Mat *newmat) { 2360 Mat mat; 2361 Mat_MPIDense *a, *oldmat = (Mat_MPIDense *)A->data; 2362 2363 PetscFunctionBegin; 2364 *newmat = NULL; 2365 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat)); 2366 PetscCall(MatSetSizes(mat, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 2367 PetscCall(MatSetType(mat, ((PetscObject)A)->type_name)); 2368 a = (Mat_MPIDense *)mat->data; 2369 2370 mat->factortype = A->factortype; 2371 mat->assembled = PETSC_TRUE; 2372 mat->preallocated = PETSC_TRUE; 2373 2374 mat->insertmode = NOT_SET_VALUES; 2375 a->donotstash = oldmat->donotstash; 2376 2377 PetscCall(PetscLayoutReference(A->rmap, &mat->rmap)); 2378 PetscCall(PetscLayoutReference(A->cmap, &mat->cmap)); 2379 2380 PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A)); 2381 2382 *newmat = mat; 2383 PetscFunctionReturn(0); 2384 } 2385 2386 PetscErrorCode MatLoad_MPIDense(Mat newMat, PetscViewer viewer) { 2387 PetscBool isbinary; 2388 #if defined(PETSC_HAVE_HDF5) 2389 PetscBool ishdf5; 2390 #endif 2391 2392 PetscFunctionBegin; 2393 PetscValidHeaderSpecific(newMat, MAT_CLASSID, 1); 2394 PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2); 2395 /* force binary viewer to load .info file if it has not yet done so */ 2396 PetscCall(PetscViewerSetUp(viewer)); 2397 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 2398 #if defined(PETSC_HAVE_HDF5) 2399 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5)); 2400 #endif 2401 if (isbinary) { 2402 PetscCall(MatLoad_Dense_Binary(newMat, viewer)); 2403 #if defined(PETSC_HAVE_HDF5) 2404 } else if (ishdf5) { 2405 PetscCall(MatLoad_Dense_HDF5(newMat, viewer)); 2406 #endif 2407 } else SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name); 2408 PetscFunctionReturn(0); 2409 } 2410 2411 static PetscErrorCode MatEqual_MPIDense(Mat A, Mat B, PetscBool *flag) { 2412 Mat_MPIDense *matB = (Mat_MPIDense *)B->data, *matA = (Mat_MPIDense *)A->data; 2413 Mat a, b; 2414 2415 PetscFunctionBegin; 2416 a = matA->A; 2417 b = matB->A; 2418 PetscCall(MatEqual(a, b, flag)); 2419 PetscCall(MPIU_Allreduce(MPI_IN_PLACE, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A))); 2420 PetscFunctionReturn(0); 2421 } 2422 2423 PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(void *data) { 2424 Mat_TransMatMultDense *atb = (Mat_TransMatMultDense *)data; 2425 2426 PetscFunctionBegin; 2427 PetscCall(PetscFree2(atb->sendbuf, atb->recvcounts)); 2428 PetscCall(MatDestroy(&atb->atb)); 2429 PetscCall(PetscFree(atb)); 2430 PetscFunctionReturn(0); 2431 } 2432 2433 PetscErrorCode MatDestroy_MatMatTransMult_MPIDense_MPIDense(void *data) { 2434 Mat_MatTransMultDense *abt = (Mat_MatTransMultDense *)data; 2435 2436 PetscFunctionBegin; 2437 PetscCall(PetscFree2(abt->buf[0], abt->buf[1])); 2438 PetscCall(PetscFree2(abt->recvcounts, abt->recvdispls)); 2439 PetscCall(PetscFree(abt)); 2440 PetscFunctionReturn(0); 2441 } 2442 2443 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C) { 2444 Mat_MPIDense *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data; 2445 Mat_TransMatMultDense *atb; 2446 MPI_Comm comm; 2447 PetscMPIInt size, *recvcounts; 2448 PetscScalar *carray, *sendbuf; 2449 const PetscScalar *atbarray; 2450 PetscInt i, cN = C->cmap->N, proc, k, j, lda; 2451 const PetscInt *ranges; 2452 2453 PetscFunctionBegin; 2454 MatCheckProduct(C, 3); 2455 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 2456 atb = (Mat_TransMatMultDense *)C->product->data; 2457 recvcounts = atb->recvcounts; 2458 sendbuf = atb->sendbuf; 2459 2460 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 2461 PetscCallMPI(MPI_Comm_size(comm, &size)); 2462 2463 /* compute atbarray = aseq^T * bseq */ 2464 PetscCall(MatTransposeMatMult(a->A, b->A, atb->atb ? MAT_REUSE_MATRIX : MAT_INITIAL_MATRIX, PETSC_DEFAULT, &atb->atb)); 2465 2466 PetscCall(MatGetOwnershipRanges(C, &ranges)); 2467 2468 /* arrange atbarray into sendbuf */ 2469 PetscCall(MatDenseGetArrayRead(atb->atb, &atbarray)); 2470 PetscCall(MatDenseGetLDA(atb->atb, &lda)); 2471 for (proc = 0, k = 0; proc < size; proc++) { 2472 for (j = 0; j < cN; j++) { 2473 for (i = ranges[proc]; i < ranges[proc + 1]; i++) sendbuf[k++] = atbarray[i + j * lda]; 2474 } 2475 } 2476 PetscCall(MatDenseRestoreArrayRead(atb->atb, &atbarray)); 2477 2478 /* sum all atbarray to local values of C */ 2479 PetscCall(MatDenseGetArrayWrite(c->A, &carray)); 2480 PetscCallMPI(MPI_Reduce_scatter(sendbuf, carray, recvcounts, MPIU_SCALAR, MPIU_SUM, comm)); 2481 PetscCall(MatDenseRestoreArrayWrite(c->A, &carray)); 2482 PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY)); 2483 PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY)); 2484 PetscFunctionReturn(0); 2485 } 2486 2487 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C) { 2488 MPI_Comm comm; 2489 PetscMPIInt size; 2490 PetscInt cm = A->cmap->n, cM, cN = B->cmap->N; 2491 Mat_TransMatMultDense *atb; 2492 PetscBool cisdense; 2493 PetscInt i; 2494 const PetscInt *ranges; 2495 2496 PetscFunctionBegin; 2497 MatCheckProduct(C, 4); 2498 PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty"); 2499 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 2500 if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) { 2501 SETERRQ(comm, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend); 2502 } 2503 2504 /* create matrix product C */ 2505 PetscCall(MatSetSizes(C, cm, B->cmap->n, A->cmap->N, B->cmap->N)); 2506 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSECUDA, "")); 2507 if (!cisdense) PetscCall(MatSetType(C, ((PetscObject)A)->type_name)); 2508 PetscCall(MatSetUp(C)); 2509 2510 /* create data structure for reuse C */ 2511 PetscCallMPI(MPI_Comm_size(comm, &size)); 2512 PetscCall(PetscNew(&atb)); 2513 cM = C->rmap->N; 2514 PetscCall(PetscMalloc2(cM * cN, &atb->sendbuf, size, &atb->recvcounts)); 2515 PetscCall(MatGetOwnershipRanges(C, &ranges)); 2516 for (i = 0; i < size; i++) atb->recvcounts[i] = (ranges[i + 1] - ranges[i]) * cN; 2517 2518 C->product->data = atb; 2519 C->product->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense; 2520 PetscFunctionReturn(0); 2521 } 2522 2523 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C) { 2524 MPI_Comm comm; 2525 PetscMPIInt i, size; 2526 PetscInt maxRows, bufsiz; 2527 PetscMPIInt tag; 2528 PetscInt alg; 2529 Mat_MatTransMultDense *abt; 2530 Mat_Product *product = C->product; 2531 PetscBool flg; 2532 2533 PetscFunctionBegin; 2534 MatCheckProduct(C, 4); 2535 PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty"); 2536 /* check local size of A and B */ 2537 PetscCheck(A->cmap->n == B->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local column dimensions are incompatible, A (%" PetscInt_FMT ") != B (%" PetscInt_FMT ")", A->cmap->n, B->cmap->n); 2538 2539 PetscCall(PetscStrcmp(product->alg, "allgatherv", &flg)); 2540 alg = flg ? 0 : 1; 2541 2542 /* setup matrix product C */ 2543 PetscCall(MatSetSizes(C, A->rmap->n, B->rmap->n, A->rmap->N, B->rmap->N)); 2544 PetscCall(MatSetType(C, MATMPIDENSE)); 2545 PetscCall(MatSetUp(C)); 2546 PetscCall(PetscObjectGetNewTag((PetscObject)C, &tag)); 2547 2548 /* create data structure for reuse C */ 2549 PetscCall(PetscObjectGetComm((PetscObject)C, &comm)); 2550 PetscCallMPI(MPI_Comm_size(comm, &size)); 2551 PetscCall(PetscNew(&abt)); 2552 abt->tag = tag; 2553 abt->alg = alg; 2554 switch (alg) { 2555 case 1: /* alg: "cyclic" */ 2556 for (maxRows = 0, i = 0; i < size; i++) maxRows = PetscMax(maxRows, (B->rmap->range[i + 1] - B->rmap->range[i])); 2557 bufsiz = A->cmap->N * maxRows; 2558 PetscCall(PetscMalloc2(bufsiz, &(abt->buf[0]), bufsiz, &(abt->buf[1]))); 2559 break; 2560 default: /* alg: "allgatherv" */ 2561 PetscCall(PetscMalloc2(B->rmap->n * B->cmap->N, &(abt->buf[0]), B->rmap->N * B->cmap->N, &(abt->buf[1]))); 2562 PetscCall(PetscMalloc2(size, &(abt->recvcounts), size + 1, &(abt->recvdispls))); 2563 for (i = 0; i <= size; i++) abt->recvdispls[i] = B->rmap->range[i] * A->cmap->N; 2564 for (i = 0; i < size; i++) abt->recvcounts[i] = abt->recvdispls[i + 1] - abt->recvdispls[i]; 2565 break; 2566 } 2567 2568 C->product->data = abt; 2569 C->product->destroy = MatDestroy_MatMatTransMult_MPIDense_MPIDense; 2570 C->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_MPIDense_MPIDense; 2571 PetscFunctionReturn(0); 2572 } 2573 2574 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(Mat A, Mat B, Mat C) { 2575 Mat_MPIDense *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data; 2576 Mat_MatTransMultDense *abt; 2577 MPI_Comm comm; 2578 PetscMPIInt rank, size, sendsiz, recvsiz, sendto, recvfrom, recvisfrom; 2579 PetscScalar *sendbuf, *recvbuf = NULL, *cv; 2580 PetscInt i, cK = A->cmap->N, k, j, bn; 2581 PetscScalar _DOne = 1.0, _DZero = 0.0; 2582 const PetscScalar *av, *bv; 2583 PetscBLASInt cm, cn, ck, alda, blda = 0, clda; 2584 MPI_Request reqs[2]; 2585 const PetscInt *ranges; 2586 2587 PetscFunctionBegin; 2588 MatCheckProduct(C, 3); 2589 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 2590 abt = (Mat_MatTransMultDense *)C->product->data; 2591 PetscCall(PetscObjectGetComm((PetscObject)C, &comm)); 2592 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 2593 PetscCallMPI(MPI_Comm_size(comm, &size)); 2594 PetscCall(MatDenseGetArrayRead(a->A, &av)); 2595 PetscCall(MatDenseGetArrayRead(b->A, &bv)); 2596 PetscCall(MatDenseGetArrayWrite(c->A, &cv)); 2597 PetscCall(MatDenseGetLDA(a->A, &i)); 2598 PetscCall(PetscBLASIntCast(i, &alda)); 2599 PetscCall(MatDenseGetLDA(b->A, &i)); 2600 PetscCall(PetscBLASIntCast(i, &blda)); 2601 PetscCall(MatDenseGetLDA(c->A, &i)); 2602 PetscCall(PetscBLASIntCast(i, &clda)); 2603 PetscCall(MatGetOwnershipRanges(B, &ranges)); 2604 bn = B->rmap->n; 2605 if (blda == bn) { 2606 sendbuf = (PetscScalar *)bv; 2607 } else { 2608 sendbuf = abt->buf[0]; 2609 for (k = 0, i = 0; i < cK; i++) { 2610 for (j = 0; j < bn; j++, k++) sendbuf[k] = bv[i * blda + j]; 2611 } 2612 } 2613 if (size > 1) { 2614 sendto = (rank + size - 1) % size; 2615 recvfrom = (rank + size + 1) % size; 2616 } else { 2617 sendto = recvfrom = 0; 2618 } 2619 PetscCall(PetscBLASIntCast(cK, &ck)); 2620 PetscCall(PetscBLASIntCast(c->A->rmap->n, &cm)); 2621 recvisfrom = rank; 2622 for (i = 0; i < size; i++) { 2623 /* we have finished receiving in sending, bufs can be read/modified */ 2624 PetscInt nextrecvisfrom = (recvisfrom + 1) % size; /* which process the next recvbuf will originate on */ 2625 PetscInt nextbn = ranges[nextrecvisfrom + 1] - ranges[nextrecvisfrom]; 2626 2627 if (nextrecvisfrom != rank) { 2628 /* start the cyclic sends from sendbuf, to recvbuf (which will switch to sendbuf) */ 2629 sendsiz = cK * bn; 2630 recvsiz = cK * nextbn; 2631 recvbuf = (i & 1) ? abt->buf[0] : abt->buf[1]; 2632 PetscCallMPI(MPI_Isend(sendbuf, sendsiz, MPIU_SCALAR, sendto, abt->tag, comm, &reqs[0])); 2633 PetscCallMPI(MPI_Irecv(recvbuf, recvsiz, MPIU_SCALAR, recvfrom, abt->tag, comm, &reqs[1])); 2634 } 2635 2636 /* local aseq * sendbuf^T */ 2637 PetscCall(PetscBLASIntCast(ranges[recvisfrom + 1] - ranges[recvisfrom], &cn)); 2638 if (cm && cn && ck) PetscCallBLAS("BLASgemm", BLASgemm_("N", "T", &cm, &cn, &ck, &_DOne, av, &alda, sendbuf, &cn, &_DZero, cv + clda * ranges[recvisfrom], &clda)); 2639 2640 if (nextrecvisfrom != rank) { 2641 /* wait for the sends and receives to complete, swap sendbuf and recvbuf */ 2642 PetscCallMPI(MPI_Waitall(2, reqs, MPI_STATUSES_IGNORE)); 2643 } 2644 bn = nextbn; 2645 recvisfrom = nextrecvisfrom; 2646 sendbuf = recvbuf; 2647 } 2648 PetscCall(MatDenseRestoreArrayRead(a->A, &av)); 2649 PetscCall(MatDenseRestoreArrayRead(b->A, &bv)); 2650 PetscCall(MatDenseRestoreArrayWrite(c->A, &cv)); 2651 PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY)); 2652 PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY)); 2653 PetscFunctionReturn(0); 2654 } 2655 2656 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(Mat A, Mat B, Mat C) { 2657 Mat_MPIDense *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data; 2658 Mat_MatTransMultDense *abt; 2659 MPI_Comm comm; 2660 PetscMPIInt size; 2661 PetscScalar *cv, *sendbuf, *recvbuf; 2662 const PetscScalar *av, *bv; 2663 PetscInt blda, i, cK = A->cmap->N, k, j, bn; 2664 PetscScalar _DOne = 1.0, _DZero = 0.0; 2665 PetscBLASInt cm, cn, ck, alda, clda; 2666 2667 PetscFunctionBegin; 2668 MatCheckProduct(C, 3); 2669 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 2670 abt = (Mat_MatTransMultDense *)C->product->data; 2671 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 2672 PetscCallMPI(MPI_Comm_size(comm, &size)); 2673 PetscCall(MatDenseGetArrayRead(a->A, &av)); 2674 PetscCall(MatDenseGetArrayRead(b->A, &bv)); 2675 PetscCall(MatDenseGetArrayWrite(c->A, &cv)); 2676 PetscCall(MatDenseGetLDA(a->A, &i)); 2677 PetscCall(PetscBLASIntCast(i, &alda)); 2678 PetscCall(MatDenseGetLDA(b->A, &blda)); 2679 PetscCall(MatDenseGetLDA(c->A, &i)); 2680 PetscCall(PetscBLASIntCast(i, &clda)); 2681 /* copy transpose of B into buf[0] */ 2682 bn = B->rmap->n; 2683 sendbuf = abt->buf[0]; 2684 recvbuf = abt->buf[1]; 2685 for (k = 0, j = 0; j < bn; j++) { 2686 for (i = 0; i < cK; i++, k++) sendbuf[k] = bv[i * blda + j]; 2687 } 2688 PetscCall(MatDenseRestoreArrayRead(b->A, &bv)); 2689 PetscCallMPI(MPI_Allgatherv(sendbuf, bn * cK, MPIU_SCALAR, recvbuf, abt->recvcounts, abt->recvdispls, MPIU_SCALAR, comm)); 2690 PetscCall(PetscBLASIntCast(cK, &ck)); 2691 PetscCall(PetscBLASIntCast(c->A->rmap->n, &cm)); 2692 PetscCall(PetscBLASIntCast(c->A->cmap->n, &cn)); 2693 if (cm && cn && ck) PetscCallBLAS("BLASgemm", BLASgemm_("N", "N", &cm, &cn, &ck, &_DOne, av, &alda, recvbuf, &ck, &_DZero, cv, &clda)); 2694 PetscCall(MatDenseRestoreArrayRead(a->A, &av)); 2695 PetscCall(MatDenseRestoreArrayRead(b->A, &bv)); 2696 PetscCall(MatDenseRestoreArrayWrite(c->A, &cv)); 2697 PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY)); 2698 PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY)); 2699 PetscFunctionReturn(0); 2700 } 2701 2702 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C) { 2703 Mat_MatTransMultDense *abt; 2704 2705 PetscFunctionBegin; 2706 MatCheckProduct(C, 3); 2707 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 2708 abt = (Mat_MatTransMultDense *)C->product->data; 2709 switch (abt->alg) { 2710 case 1: PetscCall(MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(A, B, C)); break; 2711 default: PetscCall(MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(A, B, C)); break; 2712 } 2713 PetscFunctionReturn(0); 2714 } 2715 2716 PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(void *data) { 2717 Mat_MatMultDense *ab = (Mat_MatMultDense *)data; 2718 2719 PetscFunctionBegin; 2720 PetscCall(MatDestroy(&ab->Ce)); 2721 PetscCall(MatDestroy(&ab->Ae)); 2722 PetscCall(MatDestroy(&ab->Be)); 2723 PetscCall(PetscFree(ab)); 2724 PetscFunctionReturn(0); 2725 } 2726 2727 #if defined(PETSC_HAVE_ELEMENTAL) 2728 PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C) { 2729 Mat_MatMultDense *ab; 2730 2731 PetscFunctionBegin; 2732 MatCheckProduct(C, 3); 2733 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Missing product data"); 2734 ab = (Mat_MatMultDense *)C->product->data; 2735 PetscCall(MatConvert_MPIDense_Elemental(A, MATELEMENTAL, MAT_REUSE_MATRIX, &ab->Ae)); 2736 PetscCall(MatConvert_MPIDense_Elemental(B, MATELEMENTAL, MAT_REUSE_MATRIX, &ab->Be)); 2737 PetscCall(MatMatMultNumeric_Elemental(ab->Ae, ab->Be, ab->Ce)); 2738 PetscCall(MatConvert(ab->Ce, MATMPIDENSE, MAT_REUSE_MATRIX, &C)); 2739 PetscFunctionReturn(0); 2740 } 2741 2742 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C) { 2743 Mat Ae, Be, Ce; 2744 Mat_MatMultDense *ab; 2745 2746 PetscFunctionBegin; 2747 MatCheckProduct(C, 4); 2748 PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty"); 2749 /* check local size of A and B */ 2750 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 2751 SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend); 2752 } 2753 2754 /* create elemental matrices Ae and Be */ 2755 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &Ae)); 2756 PetscCall(MatSetSizes(Ae, PETSC_DECIDE, PETSC_DECIDE, A->rmap->N, A->cmap->N)); 2757 PetscCall(MatSetType(Ae, MATELEMENTAL)); 2758 PetscCall(MatSetUp(Ae)); 2759 PetscCall(MatSetOption(Ae, MAT_ROW_ORIENTED, PETSC_FALSE)); 2760 2761 PetscCall(MatCreate(PetscObjectComm((PetscObject)B), &Be)); 2762 PetscCall(MatSetSizes(Be, PETSC_DECIDE, PETSC_DECIDE, B->rmap->N, B->cmap->N)); 2763 PetscCall(MatSetType(Be, MATELEMENTAL)); 2764 PetscCall(MatSetUp(Be)); 2765 PetscCall(MatSetOption(Be, MAT_ROW_ORIENTED, PETSC_FALSE)); 2766 2767 /* compute symbolic Ce = Ae*Be */ 2768 PetscCall(MatCreate(PetscObjectComm((PetscObject)C), &Ce)); 2769 PetscCall(MatMatMultSymbolic_Elemental(Ae, Be, fill, Ce)); 2770 2771 /* setup C */ 2772 PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, PETSC_DECIDE, PETSC_DECIDE)); 2773 PetscCall(MatSetType(C, MATDENSE)); 2774 PetscCall(MatSetUp(C)); 2775 2776 /* create data structure for reuse Cdense */ 2777 PetscCall(PetscNew(&ab)); 2778 ab->Ae = Ae; 2779 ab->Be = Be; 2780 ab->Ce = Ce; 2781 2782 C->product->data = ab; 2783 C->product->destroy = MatDestroy_MatMatMult_MPIDense_MPIDense; 2784 C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense; 2785 PetscFunctionReturn(0); 2786 } 2787 #endif 2788 /* ----------------------------------------------- */ 2789 #if defined(PETSC_HAVE_ELEMENTAL) 2790 static PetscErrorCode MatProductSetFromOptions_MPIDense_AB(Mat C) { 2791 PetscFunctionBegin; 2792 C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIDense; 2793 C->ops->productsymbolic = MatProductSymbolic_AB; 2794 PetscFunctionReturn(0); 2795 } 2796 #endif 2797 2798 static PetscErrorCode MatProductSetFromOptions_MPIDense_AtB(Mat C) { 2799 Mat_Product *product = C->product; 2800 Mat A = product->A, B = product->B; 2801 2802 PetscFunctionBegin; 2803 if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) 2804 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend); 2805 C->ops->transposematmultsymbolic = MatTransposeMatMultSymbolic_MPIDense_MPIDense; 2806 C->ops->productsymbolic = MatProductSymbolic_AtB; 2807 PetscFunctionReturn(0); 2808 } 2809 2810 static PetscErrorCode MatProductSetFromOptions_MPIDense_ABt(Mat C) { 2811 Mat_Product *product = C->product; 2812 const char *algTypes[2] = {"allgatherv", "cyclic"}; 2813 PetscInt alg, nalg = 2; 2814 PetscBool flg = PETSC_FALSE; 2815 2816 PetscFunctionBegin; 2817 /* Set default algorithm */ 2818 alg = 0; /* default is allgatherv */ 2819 PetscCall(PetscStrcmp(product->alg, "default", &flg)); 2820 if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg])); 2821 2822 /* Get runtime option */ 2823 if (product->api_user) { 2824 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatMatTransposeMult", "Mat"); 2825 PetscCall(PetscOptionsEList("-matmattransmult_mpidense_mpidense_via", "Algorithmic approach", "MatMatTransposeMult", algTypes, nalg, algTypes[alg], &alg, &flg)); 2826 PetscOptionsEnd(); 2827 } else { 2828 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_ABt", "Mat"); 2829 PetscCall(PetscOptionsEList("-mat_product_algorithm", "Algorithmic approach", "MatProduct_ABt", algTypes, nalg, algTypes[alg], &alg, &flg)); 2830 PetscOptionsEnd(); 2831 } 2832 if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg])); 2833 2834 C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_MPIDense_MPIDense; 2835 C->ops->productsymbolic = MatProductSymbolic_ABt; 2836 PetscFunctionReturn(0); 2837 } 2838 2839 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense(Mat C) { 2840 Mat_Product *product = C->product; 2841 2842 PetscFunctionBegin; 2843 switch (product->type) { 2844 #if defined(PETSC_HAVE_ELEMENTAL) 2845 case MATPRODUCT_AB: PetscCall(MatProductSetFromOptions_MPIDense_AB(C)); break; 2846 #endif 2847 case MATPRODUCT_AtB: PetscCall(MatProductSetFromOptions_MPIDense_AtB(C)); break; 2848 case MATPRODUCT_ABt: PetscCall(MatProductSetFromOptions_MPIDense_ABt(C)); break; 2849 default: break; 2850 } 2851 PetscFunctionReturn(0); 2852 } 2853