1 2 /* 3 Basic functions for basic parallel dense matrices. 4 */ 5 6 #include <../src/mat/impls/dense/mpi/mpidense.h> /*I "petscmat.h" I*/ 7 #include <../src/mat/impls/aij/mpi/mpiaij.h> 8 #include <petscblaslapack.h> 9 10 /*@ 11 MatDenseGetLocalMatrix - For a `MATMPIDENSE` or `MATSEQDENSE` matrix returns the sequential 12 matrix that represents the operator. For sequential matrices it returns itself. 13 14 Input Parameter: 15 . A - the sequential or MPI dense matrix 16 17 Output Parameter: 18 . B - the inner matrix 19 20 Level: intermediate 21 22 .seealso: `MATDENSE`, `MATMPIDENSE`, `MATSEQDENSE` 23 @*/ 24 PetscErrorCode MatDenseGetLocalMatrix(Mat A, Mat *B) { 25 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 26 PetscBool flg; 27 28 PetscFunctionBegin; 29 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 30 PetscValidPointer(B, 2); 31 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIDENSE, &flg)); 32 if (flg) *B = mat->A; 33 else { 34 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQDENSE, &flg)); 35 PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Not for matrix type %s", ((PetscObject)A)->type_name); 36 *B = A; 37 } 38 PetscFunctionReturn(0); 39 } 40 41 PetscErrorCode MatCopy_MPIDense(Mat A, Mat B, MatStructure s) { 42 Mat_MPIDense *Amat = (Mat_MPIDense *)A->data; 43 Mat_MPIDense *Bmat = (Mat_MPIDense *)B->data; 44 45 PetscFunctionBegin; 46 PetscCall(MatCopy(Amat->A, Bmat->A, s)); 47 PetscFunctionReturn(0); 48 } 49 50 PetscErrorCode MatShift_MPIDense(Mat A, PetscScalar alpha) { 51 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 52 PetscInt j, lda, rstart = A->rmap->rstart, rend = A->rmap->rend, rend2; 53 PetscScalar *v; 54 55 PetscFunctionBegin; 56 PetscCall(MatDenseGetArray(mat->A, &v)); 57 PetscCall(MatDenseGetLDA(mat->A, &lda)); 58 rend2 = PetscMin(rend, A->cmap->N); 59 if (rend2 > rstart) { 60 for (j = rstart; j < rend2; j++) v[j - rstart + j * lda] += alpha; 61 PetscCall(PetscLogFlops(rend2 - rstart)); 62 } 63 PetscCall(MatDenseRestoreArray(mat->A, &v)); 64 PetscFunctionReturn(0); 65 } 66 67 PetscErrorCode MatGetRow_MPIDense(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) { 68 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 69 PetscInt lrow, rstart = A->rmap->rstart, rend = A->rmap->rend; 70 71 PetscFunctionBegin; 72 PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "only local rows"); 73 lrow = row - rstart; 74 PetscCall(MatGetRow(mat->A, lrow, nz, (const PetscInt **)idx, (const PetscScalar **)v)); 75 PetscFunctionReturn(0); 76 } 77 78 PetscErrorCode MatRestoreRow_MPIDense(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) { 79 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 80 PetscInt lrow, rstart = A->rmap->rstart, rend = A->rmap->rend; 81 82 PetscFunctionBegin; 83 PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "only local rows"); 84 lrow = row - rstart; 85 PetscCall(MatRestoreRow(mat->A, lrow, nz, (const PetscInt **)idx, (const PetscScalar **)v)); 86 PetscFunctionReturn(0); 87 } 88 89 PetscErrorCode MatGetDiagonalBlock_MPIDense(Mat A, Mat *a) { 90 Mat_MPIDense *mdn = (Mat_MPIDense *)A->data; 91 PetscInt m = A->rmap->n, rstart = A->rmap->rstart; 92 PetscScalar *array; 93 MPI_Comm comm; 94 PetscBool flg; 95 Mat B; 96 97 PetscFunctionBegin; 98 PetscCall(MatHasCongruentLayouts(A, &flg)); 99 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only square matrices supported."); 100 PetscCall(PetscObjectQuery((PetscObject)A, "DiagonalBlock", (PetscObject *)&B)); 101 if (!B) { /* This should use MatDenseGetSubMatrix (not create), but we would need a call like MatRestoreDiagonalBlock */ 102 103 PetscCall(PetscObjectTypeCompare((PetscObject)mdn->A, MATSEQDENSECUDA, &flg)); 104 PetscCheck(!flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature", MATSEQDENSECUDA); 105 PetscCall(PetscObjectGetComm((PetscObject)(mdn->A), &comm)); 106 PetscCall(MatCreate(comm, &B)); 107 PetscCall(MatSetSizes(B, m, m, m, m)); 108 PetscCall(MatSetType(B, ((PetscObject)mdn->A)->type_name)); 109 PetscCall(MatDenseGetArrayRead(mdn->A, (const PetscScalar **)&array)); 110 PetscCall(MatSeqDenseSetPreallocation(B, array + m * rstart)); 111 PetscCall(MatDenseRestoreArrayRead(mdn->A, (const PetscScalar **)&array)); 112 PetscCall(PetscObjectCompose((PetscObject)A, "DiagonalBlock", (PetscObject)B)); 113 *a = B; 114 PetscCall(MatDestroy(&B)); 115 } else *a = B; 116 PetscFunctionReturn(0); 117 } 118 119 PetscErrorCode MatSetValues_MPIDense(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], const PetscScalar v[], InsertMode addv) { 120 Mat_MPIDense *A = (Mat_MPIDense *)mat->data; 121 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend, row; 122 PetscBool roworiented = A->roworiented; 123 124 PetscFunctionBegin; 125 for (i = 0; i < m; i++) { 126 if (idxm[i] < 0) continue; 127 PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large"); 128 if (idxm[i] >= rstart && idxm[i] < rend) { 129 row = idxm[i] - rstart; 130 if (roworiented) { 131 PetscCall(MatSetValues(A->A, 1, &row, n, idxn, v + i * n, addv)); 132 } else { 133 for (j = 0; j < n; j++) { 134 if (idxn[j] < 0) continue; 135 PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large"); 136 PetscCall(MatSetValues(A->A, 1, &row, 1, &idxn[j], v + i + j * m, addv)); 137 } 138 } 139 } else if (!A->donotstash) { 140 mat->assembled = PETSC_FALSE; 141 if (roworiented) { 142 PetscCall(MatStashValuesRow_Private(&mat->stash, idxm[i], n, idxn, v + i * n, PETSC_FALSE)); 143 } else { 144 PetscCall(MatStashValuesCol_Private(&mat->stash, idxm[i], n, idxn, v + i, m, PETSC_FALSE)); 145 } 146 } 147 } 148 PetscFunctionReturn(0); 149 } 150 151 PetscErrorCode MatGetValues_MPIDense(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[]) { 152 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 153 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend, row; 154 155 PetscFunctionBegin; 156 for (i = 0; i < m; i++) { 157 if (idxm[i] < 0) continue; /* negative row */ 158 PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large"); 159 if (idxm[i] >= rstart && idxm[i] < rend) { 160 row = idxm[i] - rstart; 161 for (j = 0; j < n; j++) { 162 if (idxn[j] < 0) continue; /* negative column */ 163 PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large"); 164 PetscCall(MatGetValues(mdn->A, 1, &row, 1, &idxn[j], v + i * n + j)); 165 } 166 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported"); 167 } 168 PetscFunctionReturn(0); 169 } 170 171 static PetscErrorCode MatDenseGetLDA_MPIDense(Mat A, PetscInt *lda) { 172 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 173 174 PetscFunctionBegin; 175 PetscCall(MatDenseGetLDA(a->A, lda)); 176 PetscFunctionReturn(0); 177 } 178 179 static PetscErrorCode MatDenseSetLDA_MPIDense(Mat A, PetscInt lda) { 180 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 181 PetscBool iscuda; 182 183 PetscFunctionBegin; 184 if (!a->A) { 185 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 186 PetscCall(PetscLayoutSetUp(A->rmap)); 187 PetscCall(PetscLayoutSetUp(A->cmap)); 188 PetscCall(MatCreate(PETSC_COMM_SELF, &a->A)); 189 PetscCall(PetscLogObjectParent((PetscObject)A, (PetscObject)a->A)); 190 PetscCall(MatSetSizes(a->A, A->rmap->n, A->cmap->N, A->rmap->n, A->cmap->N)); 191 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIDENSECUDA, &iscuda)); 192 PetscCall(MatSetType(a->A, iscuda ? MATSEQDENSECUDA : MATSEQDENSE)); 193 } 194 PetscCall(MatDenseSetLDA(a->A, lda)); 195 PetscFunctionReturn(0); 196 } 197 198 static PetscErrorCode MatDenseGetArray_MPIDense(Mat A, PetscScalar **array) { 199 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 200 201 PetscFunctionBegin; 202 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 203 PetscCall(MatDenseGetArray(a->A, array)); 204 PetscFunctionReturn(0); 205 } 206 207 static PetscErrorCode MatDenseGetArrayRead_MPIDense(Mat A, const PetscScalar **array) { 208 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 209 210 PetscFunctionBegin; 211 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 212 PetscCall(MatDenseGetArrayRead(a->A, array)); 213 PetscFunctionReturn(0); 214 } 215 216 static PetscErrorCode MatDenseGetArrayWrite_MPIDense(Mat A, PetscScalar **array) { 217 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 218 219 PetscFunctionBegin; 220 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 221 PetscCall(MatDenseGetArrayWrite(a->A, array)); 222 PetscFunctionReturn(0); 223 } 224 225 static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A, const PetscScalar *array) { 226 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 227 228 PetscFunctionBegin; 229 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 230 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 231 PetscCall(MatDensePlaceArray(a->A, array)); 232 PetscFunctionReturn(0); 233 } 234 235 static PetscErrorCode MatDenseResetArray_MPIDense(Mat A) { 236 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 237 238 PetscFunctionBegin; 239 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 240 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 241 PetscCall(MatDenseResetArray(a->A)); 242 PetscFunctionReturn(0); 243 } 244 245 static PetscErrorCode MatDenseReplaceArray_MPIDense(Mat A, const PetscScalar *array) { 246 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 247 248 PetscFunctionBegin; 249 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 250 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 251 PetscCall(MatDenseReplaceArray(a->A, array)); 252 PetscFunctionReturn(0); 253 } 254 255 static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A, IS isrow, IS iscol, MatReuse scall, Mat *B) { 256 Mat_MPIDense *mat = (Mat_MPIDense *)A->data, *newmatd; 257 PetscInt lda, i, j, rstart, rend, nrows, ncols, Ncols, nlrows, nlcols; 258 const PetscInt *irow, *icol; 259 const PetscScalar *v; 260 PetscScalar *bv; 261 Mat newmat; 262 IS iscol_local; 263 MPI_Comm comm_is, comm_mat; 264 265 PetscFunctionBegin; 266 PetscCall(PetscObjectGetComm((PetscObject)A, &comm_mat)); 267 PetscCall(PetscObjectGetComm((PetscObject)iscol, &comm_is)); 268 PetscCheck(comm_mat == comm_is, PETSC_COMM_SELF, PETSC_ERR_ARG_NOTSAMECOMM, "IS communicator must match matrix communicator"); 269 270 PetscCall(ISAllGather(iscol, &iscol_local)); 271 PetscCall(ISGetIndices(isrow, &irow)); 272 PetscCall(ISGetIndices(iscol_local, &icol)); 273 PetscCall(ISGetLocalSize(isrow, &nrows)); 274 PetscCall(ISGetLocalSize(iscol, &ncols)); 275 PetscCall(ISGetSize(iscol, &Ncols)); /* global number of columns, size of iscol_local */ 276 277 /* No parallel redistribution currently supported! Should really check each index set 278 to comfirm that it is OK. ... Currently supports only submatrix same partitioning as 279 original matrix! */ 280 281 PetscCall(MatGetLocalSize(A, &nlrows, &nlcols)); 282 PetscCall(MatGetOwnershipRange(A, &rstart, &rend)); 283 284 /* Check submatrix call */ 285 if (scall == MAT_REUSE_MATRIX) { 286 /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */ 287 /* Really need to test rows and column sizes! */ 288 newmat = *B; 289 } else { 290 /* Create and fill new matrix */ 291 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &newmat)); 292 PetscCall(MatSetSizes(newmat, nrows, ncols, PETSC_DECIDE, Ncols)); 293 PetscCall(MatSetType(newmat, ((PetscObject)A)->type_name)); 294 PetscCall(MatMPIDenseSetPreallocation(newmat, NULL)); 295 } 296 297 /* Now extract the data pointers and do the copy, column at a time */ 298 newmatd = (Mat_MPIDense *)newmat->data; 299 PetscCall(MatDenseGetArray(newmatd->A, &bv)); 300 PetscCall(MatDenseGetArrayRead(mat->A, &v)); 301 PetscCall(MatDenseGetLDA(mat->A, &lda)); 302 for (i = 0; i < Ncols; i++) { 303 const PetscScalar *av = v + lda * icol[i]; 304 for (j = 0; j < nrows; j++) *bv++ = av[irow[j] - rstart]; 305 } 306 PetscCall(MatDenseRestoreArrayRead(mat->A, &v)); 307 PetscCall(MatDenseRestoreArray(newmatd->A, &bv)); 308 309 /* Assemble the matrices so that the correct flags are set */ 310 PetscCall(MatAssemblyBegin(newmat, MAT_FINAL_ASSEMBLY)); 311 PetscCall(MatAssemblyEnd(newmat, MAT_FINAL_ASSEMBLY)); 312 313 /* Free work space */ 314 PetscCall(ISRestoreIndices(isrow, &irow)); 315 PetscCall(ISRestoreIndices(iscol_local, &icol)); 316 PetscCall(ISDestroy(&iscol_local)); 317 *B = newmat; 318 PetscFunctionReturn(0); 319 } 320 321 PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A, PetscScalar **array) { 322 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 323 324 PetscFunctionBegin; 325 PetscCall(MatDenseRestoreArray(a->A, array)); 326 PetscFunctionReturn(0); 327 } 328 329 PetscErrorCode MatDenseRestoreArrayRead_MPIDense(Mat A, const PetscScalar **array) { 330 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 331 332 PetscFunctionBegin; 333 PetscCall(MatDenseRestoreArrayRead(a->A, array)); 334 PetscFunctionReturn(0); 335 } 336 337 PetscErrorCode MatDenseRestoreArrayWrite_MPIDense(Mat A, PetscScalar **array) { 338 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 339 340 PetscFunctionBegin; 341 PetscCall(MatDenseRestoreArrayWrite(a->A, array)); 342 PetscFunctionReturn(0); 343 } 344 345 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat, MatAssemblyType mode) { 346 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 347 PetscInt nstash, reallocs; 348 349 PetscFunctionBegin; 350 if (mdn->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 351 352 PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range)); 353 PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs)); 354 PetscCall(PetscInfo(mdn->A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 355 PetscFunctionReturn(0); 356 } 357 358 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat, MatAssemblyType mode) { 359 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 360 PetscInt i, *row, *col, flg, j, rstart, ncols; 361 PetscMPIInt n; 362 PetscScalar *val; 363 364 PetscFunctionBegin; 365 if (!mdn->donotstash && !mat->nooffprocentries) { 366 /* wait on receives */ 367 while (1) { 368 PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg)); 369 if (!flg) break; 370 371 for (i = 0; i < n;) { 372 /* Now identify the consecutive vals belonging to the same row */ 373 for (j = i, rstart = row[j]; j < n; j++) { 374 if (row[j] != rstart) break; 375 } 376 if (j < n) ncols = j - i; 377 else ncols = n - i; 378 /* Now assemble all these values with a single function call */ 379 PetscCall(MatSetValues_MPIDense(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode)); 380 i = j; 381 } 382 } 383 PetscCall(MatStashScatterEnd_Private(&mat->stash)); 384 } 385 386 PetscCall(MatAssemblyBegin(mdn->A, mode)); 387 PetscCall(MatAssemblyEnd(mdn->A, mode)); 388 PetscFunctionReturn(0); 389 } 390 391 PetscErrorCode MatZeroEntries_MPIDense(Mat A) { 392 Mat_MPIDense *l = (Mat_MPIDense *)A->data; 393 394 PetscFunctionBegin; 395 PetscCall(MatZeroEntries(l->A)); 396 PetscFunctionReturn(0); 397 } 398 399 PetscErrorCode MatZeroRows_MPIDense(Mat A, PetscInt n, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) { 400 Mat_MPIDense *l = (Mat_MPIDense *)A->data; 401 PetscInt i, len, *lrows; 402 403 PetscFunctionBegin; 404 /* get locally owned rows */ 405 PetscCall(PetscLayoutMapLocal(A->rmap, n, rows, &len, &lrows, NULL)); 406 /* fix right hand side if needed */ 407 if (x && b) { 408 const PetscScalar *xx; 409 PetscScalar *bb; 410 411 PetscCall(VecGetArrayRead(x, &xx)); 412 PetscCall(VecGetArrayWrite(b, &bb)); 413 for (i = 0; i < len; ++i) bb[lrows[i]] = diag * xx[lrows[i]]; 414 PetscCall(VecRestoreArrayRead(x, &xx)); 415 PetscCall(VecRestoreArrayWrite(b, &bb)); 416 } 417 PetscCall(MatZeroRows(l->A, len, lrows, 0.0, NULL, NULL)); 418 if (diag != 0.0) { 419 Vec d; 420 421 PetscCall(MatCreateVecs(A, NULL, &d)); 422 PetscCall(VecSet(d, diag)); 423 PetscCall(MatDiagonalSet(A, d, INSERT_VALUES)); 424 PetscCall(VecDestroy(&d)); 425 } 426 PetscCall(PetscFree(lrows)); 427 PetscFunctionReturn(0); 428 } 429 430 PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat, Vec, Vec); 431 PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat, Vec, Vec, Vec); 432 PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat, Vec, Vec); 433 PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat, Vec, Vec, Vec); 434 435 PetscErrorCode MatMult_MPIDense(Mat mat, Vec xx, Vec yy) { 436 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 437 const PetscScalar *ax; 438 PetscScalar *ay; 439 PetscMemType axmtype, aymtype; 440 441 PetscFunctionBegin; 442 if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat)); 443 PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype)); 444 PetscCall(VecGetArrayAndMemType(mdn->lvec, &ay, &aymtype)); 445 PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE)); 446 PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE)); 447 PetscCall(VecRestoreArrayAndMemType(mdn->lvec, &ay)); 448 PetscCall(VecRestoreArrayReadAndMemType(xx, &ax)); 449 PetscCall((*mdn->A->ops->mult)(mdn->A, mdn->lvec, yy)); 450 PetscFunctionReturn(0); 451 } 452 453 PetscErrorCode MatMultAdd_MPIDense(Mat mat, Vec xx, Vec yy, Vec zz) { 454 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 455 const PetscScalar *ax; 456 PetscScalar *ay; 457 PetscMemType axmtype, aymtype; 458 459 PetscFunctionBegin; 460 if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat)); 461 PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype)); 462 PetscCall(VecGetArrayAndMemType(mdn->lvec, &ay, &aymtype)); 463 PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE)); 464 PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE)); 465 PetscCall(VecRestoreArrayAndMemType(mdn->lvec, &ay)); 466 PetscCall(VecRestoreArrayReadAndMemType(xx, &ax)); 467 PetscCall((*mdn->A->ops->multadd)(mdn->A, mdn->lvec, yy, zz)); 468 PetscFunctionReturn(0); 469 } 470 471 PetscErrorCode MatMultTranspose_MPIDense(Mat A, Vec xx, Vec yy) { 472 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 473 const PetscScalar *ax; 474 PetscScalar *ay; 475 PetscMemType axmtype, aymtype; 476 477 PetscFunctionBegin; 478 if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A)); 479 PetscCall(VecSet(yy, 0.0)); 480 PetscCall((*a->A->ops->multtranspose)(a->A, xx, a->lvec)); 481 PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype)); 482 PetscCall(VecGetArrayAndMemType(yy, &ay, &aymtype)); 483 PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM)); 484 PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM)); 485 PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax)); 486 PetscCall(VecRestoreArrayAndMemType(yy, &ay)); 487 PetscFunctionReturn(0); 488 } 489 490 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A, Vec xx, Vec yy, Vec zz) { 491 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 492 const PetscScalar *ax; 493 PetscScalar *ay; 494 PetscMemType axmtype, aymtype; 495 496 PetscFunctionBegin; 497 if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A)); 498 PetscCall(VecCopy(yy, zz)); 499 PetscCall((*a->A->ops->multtranspose)(a->A, xx, a->lvec)); 500 PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype)); 501 PetscCall(VecGetArrayAndMemType(zz, &ay, &aymtype)); 502 PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM)); 503 PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM)); 504 PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax)); 505 PetscCall(VecRestoreArrayAndMemType(zz, &ay)); 506 PetscFunctionReturn(0); 507 } 508 509 PetscErrorCode MatGetDiagonal_MPIDense(Mat A, Vec v) { 510 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 511 PetscInt lda, len, i, n, m = A->rmap->n, radd; 512 PetscScalar *x, zero = 0.0; 513 const PetscScalar *av; 514 515 PetscFunctionBegin; 516 PetscCall(VecSet(v, zero)); 517 PetscCall(VecGetArray(v, &x)); 518 PetscCall(VecGetSize(v, &n)); 519 PetscCheck(n == A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming mat and vec"); 520 len = PetscMin(a->A->rmap->n, a->A->cmap->n); 521 radd = A->rmap->rstart * m; 522 PetscCall(MatDenseGetArrayRead(a->A, &av)); 523 PetscCall(MatDenseGetLDA(a->A, &lda)); 524 for (i = 0; i < len; i++) x[i] = av[radd + i * lda + i]; 525 PetscCall(MatDenseRestoreArrayRead(a->A, &av)); 526 PetscCall(VecRestoreArray(v, &x)); 527 PetscFunctionReturn(0); 528 } 529 530 PetscErrorCode MatDestroy_MPIDense(Mat mat) { 531 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 532 533 PetscFunctionBegin; 534 #if defined(PETSC_USE_LOG) 535 PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N); 536 #endif 537 PetscCall(MatStashDestroy_Private(&mat->stash)); 538 PetscCheck(!mdn->vecinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 539 PetscCheck(!mdn->matinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 540 PetscCall(MatDestroy(&mdn->A)); 541 PetscCall(VecDestroy(&mdn->lvec)); 542 PetscCall(PetscSFDestroy(&mdn->Mvctx)); 543 PetscCall(VecDestroy(&mdn->cvec)); 544 PetscCall(MatDestroy(&mdn->cmat)); 545 546 PetscCall(PetscFree(mat->data)); 547 PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL)); 548 549 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetLDA_C", NULL)); 550 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseSetLDA_C", NULL)); 551 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArray_C", NULL)); 552 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArray_C", NULL)); 553 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayRead_C", NULL)); 554 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayRead_C", NULL)); 555 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayWrite_C", NULL)); 556 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayWrite_C", NULL)); 557 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDensePlaceArray_C", NULL)); 558 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseResetArray_C", NULL)); 559 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseReplaceArray_C", NULL)); 560 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", NULL)); 561 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpiaij_C", NULL)); 562 #if defined(PETSC_HAVE_ELEMENTAL) 563 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_elemental_C", NULL)); 564 #endif 565 #if defined(PETSC_HAVE_SCALAPACK) 566 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_scalapack_C", NULL)); 567 #endif 568 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIDenseSetPreallocation_C", NULL)); 569 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidense_C", NULL)); 570 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaij_C", NULL)); 571 #if defined(PETSC_HAVE_CUDA) 572 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidense_C", NULL)); 573 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijcusparse_C", NULL)); 574 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensecuda_C", NULL)); 575 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidensecuda_mpidense_C", NULL)); 576 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidensecuda_C", NULL)); 577 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C", NULL)); 578 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensecuda_mpiaij_C", NULL)); 579 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C", NULL)); 580 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArray_C", NULL)); 581 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArrayRead_C", NULL)); 582 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArrayWrite_C", NULL)); 583 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArray_C", NULL)); 584 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArrayRead_C", NULL)); 585 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArrayWrite_C", NULL)); 586 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAPlaceArray_C", NULL)); 587 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAResetArray_C", NULL)); 588 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAReplaceArray_C", NULL)); 589 #endif 590 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumn_C", NULL)); 591 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumn_C", NULL)); 592 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", NULL)); 593 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", NULL)); 594 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", NULL)); 595 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", NULL)); 596 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", NULL)); 597 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", NULL)); 598 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetSubMatrix_C", NULL)); 599 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreSubMatrix_C", NULL)); 600 601 PetscCall(PetscObjectCompose((PetscObject)mat, "DiagonalBlock", NULL)); 602 PetscFunctionReturn(0); 603 } 604 605 PETSC_INTERN PetscErrorCode MatView_SeqDense(Mat, PetscViewer); 606 607 #include <petscdraw.h> 608 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat, PetscViewer viewer) { 609 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 610 PetscMPIInt rank; 611 PetscViewerType vtype; 612 PetscBool iascii, isdraw; 613 PetscViewer sviewer; 614 PetscViewerFormat format; 615 616 PetscFunctionBegin; 617 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 618 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 619 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 620 if (iascii) { 621 PetscCall(PetscViewerGetType(viewer, &vtype)); 622 PetscCall(PetscViewerGetFormat(viewer, &format)); 623 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 624 MatInfo info; 625 PetscCall(MatGetInfo(mat, MAT_LOCAL, &info)); 626 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 627 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %" PetscInt_FMT " \n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, 628 (PetscInt)info.memory)); 629 PetscCall(PetscViewerFlush(viewer)); 630 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 631 if (mdn->Mvctx) PetscCall(PetscSFView(mdn->Mvctx, viewer)); 632 PetscFunctionReturn(0); 633 } else if (format == PETSC_VIEWER_ASCII_INFO) { 634 PetscFunctionReturn(0); 635 } 636 } else if (isdraw) { 637 PetscDraw draw; 638 PetscBool isnull; 639 640 PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw)); 641 PetscCall(PetscDrawIsNull(draw, &isnull)); 642 if (isnull) PetscFunctionReturn(0); 643 } 644 645 { 646 /* assemble the entire matrix onto first processor. */ 647 Mat A; 648 PetscInt M = mat->rmap->N, N = mat->cmap->N, m, row, i, nz; 649 PetscInt *cols; 650 PetscScalar *vals; 651 652 PetscCall(MatCreate(PetscObjectComm((PetscObject)mat), &A)); 653 if (rank == 0) { 654 PetscCall(MatSetSizes(A, M, N, M, N)); 655 } else { 656 PetscCall(MatSetSizes(A, 0, 0, M, N)); 657 } 658 /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */ 659 PetscCall(MatSetType(A, MATMPIDENSE)); 660 PetscCall(MatMPIDenseSetPreallocation(A, NULL)); 661 PetscCall(PetscLogObjectParent((PetscObject)mat, (PetscObject)A)); 662 663 /* Copy the matrix ... This isn't the most efficient means, 664 but it's quick for now */ 665 A->insertmode = INSERT_VALUES; 666 667 row = mat->rmap->rstart; 668 m = mdn->A->rmap->n; 669 for (i = 0; i < m; i++) { 670 PetscCall(MatGetRow_MPIDense(mat, row, &nz, &cols, &vals)); 671 PetscCall(MatSetValues_MPIDense(A, 1, &row, nz, cols, vals, INSERT_VALUES)); 672 PetscCall(MatRestoreRow_MPIDense(mat, row, &nz, &cols, &vals)); 673 row++; 674 } 675 676 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 677 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 678 PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 679 if (rank == 0) { 680 PetscCall(PetscObjectSetName((PetscObject)((Mat_MPIDense *)(A->data))->A, ((PetscObject)mat)->name)); 681 PetscCall(MatView_SeqDense(((Mat_MPIDense *)(A->data))->A, sviewer)); 682 } 683 PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 684 PetscCall(PetscViewerFlush(viewer)); 685 PetscCall(MatDestroy(&A)); 686 } 687 PetscFunctionReturn(0); 688 } 689 690 PetscErrorCode MatView_MPIDense(Mat mat, PetscViewer viewer) { 691 PetscBool iascii, isbinary, isdraw, issocket; 692 693 PetscFunctionBegin; 694 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 695 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 696 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket)); 697 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 698 699 if (iascii || issocket || isdraw) { 700 PetscCall(MatView_MPIDense_ASCIIorDraworSocket(mat, viewer)); 701 } else if (isbinary) PetscCall(MatView_Dense_Binary(mat, viewer)); 702 PetscFunctionReturn(0); 703 } 704 705 PetscErrorCode MatGetInfo_MPIDense(Mat A, MatInfoType flag, MatInfo *info) { 706 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 707 Mat mdn = mat->A; 708 PetscLogDouble isend[5], irecv[5]; 709 710 PetscFunctionBegin; 711 info->block_size = 1.0; 712 713 PetscCall(MatGetInfo(mdn, MAT_LOCAL, info)); 714 715 isend[0] = info->nz_used; 716 isend[1] = info->nz_allocated; 717 isend[2] = info->nz_unneeded; 718 isend[3] = info->memory; 719 isend[4] = info->mallocs; 720 if (flag == MAT_LOCAL) { 721 info->nz_used = isend[0]; 722 info->nz_allocated = isend[1]; 723 info->nz_unneeded = isend[2]; 724 info->memory = isend[3]; 725 info->mallocs = isend[4]; 726 } else if (flag == MAT_GLOBAL_MAX) { 727 PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)A))); 728 729 info->nz_used = irecv[0]; 730 info->nz_allocated = irecv[1]; 731 info->nz_unneeded = irecv[2]; 732 info->memory = irecv[3]; 733 info->mallocs = irecv[4]; 734 } else if (flag == MAT_GLOBAL_SUM) { 735 PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)A))); 736 737 info->nz_used = irecv[0]; 738 info->nz_allocated = irecv[1]; 739 info->nz_unneeded = irecv[2]; 740 info->memory = irecv[3]; 741 info->mallocs = irecv[4]; 742 } 743 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 744 info->fill_ratio_needed = 0; 745 info->factor_mallocs = 0; 746 PetscFunctionReturn(0); 747 } 748 749 PetscErrorCode MatSetOption_MPIDense(Mat A, MatOption op, PetscBool flg) { 750 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 751 752 PetscFunctionBegin; 753 switch (op) { 754 case MAT_NEW_NONZERO_LOCATIONS: 755 case MAT_NEW_NONZERO_LOCATION_ERR: 756 case MAT_NEW_NONZERO_ALLOCATION_ERR: 757 MatCheckPreallocated(A, 1); 758 PetscCall(MatSetOption(a->A, op, flg)); 759 break; 760 case MAT_ROW_ORIENTED: 761 MatCheckPreallocated(A, 1); 762 a->roworiented = flg; 763 PetscCall(MatSetOption(a->A, op, flg)); 764 break; 765 case MAT_FORCE_DIAGONAL_ENTRIES: 766 case MAT_KEEP_NONZERO_PATTERN: 767 case MAT_USE_HASH_TABLE: 768 case MAT_SORTED_FULL: PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op])); break; 769 case MAT_IGNORE_OFF_PROC_ENTRIES: a->donotstash = flg; break; 770 case MAT_SYMMETRIC: 771 case MAT_STRUCTURALLY_SYMMETRIC: 772 case MAT_HERMITIAN: 773 case MAT_SYMMETRY_ETERNAL: 774 case MAT_STRUCTURAL_SYMMETRY_ETERNAL: 775 case MAT_SPD: 776 case MAT_IGNORE_LOWER_TRIANGULAR: 777 case MAT_IGNORE_ZERO_ENTRIES: 778 case MAT_SPD_ETERNAL: 779 /* if the diagonal matrix is square it inherits some of the properties above */ 780 PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op])); 781 break; 782 default: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %s", MatOptions[op]); 783 } 784 PetscFunctionReturn(0); 785 } 786 787 PetscErrorCode MatDiagonalScale_MPIDense(Mat A, Vec ll, Vec rr) { 788 Mat_MPIDense *mdn = (Mat_MPIDense *)A->data; 789 const PetscScalar *l; 790 PetscScalar x, *v, *vv, *r; 791 PetscInt i, j, s2a, s3a, s2, s3, m = mdn->A->rmap->n, n = mdn->A->cmap->n, lda; 792 793 PetscFunctionBegin; 794 PetscCall(MatDenseGetArray(mdn->A, &vv)); 795 PetscCall(MatDenseGetLDA(mdn->A, &lda)); 796 PetscCall(MatGetLocalSize(A, &s2, &s3)); 797 if (ll) { 798 PetscCall(VecGetLocalSize(ll, &s2a)); 799 PetscCheck(s2a == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Left scaling vector non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT, s2a, s2); 800 PetscCall(VecGetArrayRead(ll, &l)); 801 for (i = 0; i < m; i++) { 802 x = l[i]; 803 v = vv + i; 804 for (j = 0; j < n; j++) { 805 (*v) *= x; 806 v += lda; 807 } 808 } 809 PetscCall(VecRestoreArrayRead(ll, &l)); 810 PetscCall(PetscLogFlops(1.0 * n * m)); 811 } 812 if (rr) { 813 const PetscScalar *ar; 814 815 PetscCall(VecGetLocalSize(rr, &s3a)); 816 PetscCheck(s3a == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Right scaling vec non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT ".", s3a, s3); 817 PetscCall(VecGetArrayRead(rr, &ar)); 818 if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A)); 819 PetscCall(VecGetArray(mdn->lvec, &r)); 820 PetscCall(PetscSFBcastBegin(mdn->Mvctx, MPIU_SCALAR, ar, r, MPI_REPLACE)); 821 PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ar, r, MPI_REPLACE)); 822 PetscCall(VecRestoreArrayRead(rr, &ar)); 823 for (i = 0; i < n; i++) { 824 x = r[i]; 825 v = vv + i * lda; 826 for (j = 0; j < m; j++) (*v++) *= x; 827 } 828 PetscCall(VecRestoreArray(mdn->lvec, &r)); 829 PetscCall(PetscLogFlops(1.0 * n * m)); 830 } 831 PetscCall(MatDenseRestoreArray(mdn->A, &vv)); 832 PetscFunctionReturn(0); 833 } 834 835 PetscErrorCode MatNorm_MPIDense(Mat A, NormType type, PetscReal *nrm) { 836 Mat_MPIDense *mdn = (Mat_MPIDense *)A->data; 837 PetscInt i, j; 838 PetscMPIInt size; 839 PetscReal sum = 0.0; 840 const PetscScalar *av, *v; 841 842 PetscFunctionBegin; 843 PetscCall(MatDenseGetArrayRead(mdn->A, &av)); 844 v = av; 845 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 846 if (size == 1) { 847 PetscCall(MatNorm(mdn->A, type, nrm)); 848 } else { 849 if (type == NORM_FROBENIUS) { 850 for (i = 0; i < mdn->A->cmap->n * mdn->A->rmap->n; i++) { 851 sum += PetscRealPart(PetscConj(*v) * (*v)); 852 v++; 853 } 854 PetscCall(MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A))); 855 *nrm = PetscSqrtReal(*nrm); 856 PetscCall(PetscLogFlops(2.0 * mdn->A->cmap->n * mdn->A->rmap->n)); 857 } else if (type == NORM_1) { 858 PetscReal *tmp, *tmp2; 859 PetscCall(PetscCalloc2(A->cmap->N, &tmp, A->cmap->N, &tmp2)); 860 *nrm = 0.0; 861 v = av; 862 for (j = 0; j < mdn->A->cmap->n; j++) { 863 for (i = 0; i < mdn->A->rmap->n; i++) { 864 tmp[j] += PetscAbsScalar(*v); 865 v++; 866 } 867 } 868 PetscCall(MPIU_Allreduce(tmp, tmp2, A->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A))); 869 for (j = 0; j < A->cmap->N; j++) { 870 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 871 } 872 PetscCall(PetscFree2(tmp, tmp2)); 873 PetscCall(PetscLogFlops(A->cmap->n * A->rmap->n)); 874 } else if (type == NORM_INFINITY) { /* max row norm */ 875 PetscReal ntemp; 876 PetscCall(MatNorm(mdn->A, type, &ntemp)); 877 PetscCall(MPIU_Allreduce(&ntemp, nrm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A))); 878 } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "No support for two norm"); 879 } 880 PetscCall(MatDenseRestoreArrayRead(mdn->A, &av)); 881 PetscFunctionReturn(0); 882 } 883 884 PetscErrorCode MatTranspose_MPIDense(Mat A, MatReuse reuse, Mat *matout) { 885 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 886 Mat B; 887 PetscInt M = A->rmap->N, N = A->cmap->N, m, n, *rwork, rstart = A->rmap->rstart; 888 PetscInt j, i, lda; 889 PetscScalar *v; 890 891 PetscFunctionBegin; 892 if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout)); 893 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) { 894 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 895 PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M)); 896 PetscCall(MatSetType(B, ((PetscObject)A)->type_name)); 897 PetscCall(MatMPIDenseSetPreallocation(B, NULL)); 898 } else B = *matout; 899 900 m = a->A->rmap->n; 901 n = a->A->cmap->n; 902 PetscCall(MatDenseGetArrayRead(a->A, (const PetscScalar **)&v)); 903 PetscCall(MatDenseGetLDA(a->A, &lda)); 904 PetscCall(PetscMalloc1(m, &rwork)); 905 for (i = 0; i < m; i++) rwork[i] = rstart + i; 906 for (j = 0; j < n; j++) { 907 PetscCall(MatSetValues(B, 1, &j, m, rwork, v, INSERT_VALUES)); 908 v += lda; 909 } 910 PetscCall(MatDenseRestoreArrayRead(a->A, (const PetscScalar **)&v)); 911 PetscCall(PetscFree(rwork)); 912 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 913 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 914 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 915 *matout = B; 916 } else { 917 PetscCall(MatHeaderMerge(A, &B)); 918 } 919 PetscFunctionReturn(0); 920 } 921 922 static PetscErrorCode MatDuplicate_MPIDense(Mat, MatDuplicateOption, Mat *); 923 PETSC_INTERN PetscErrorCode MatScale_MPIDense(Mat, PetscScalar); 924 925 PetscErrorCode MatSetUp_MPIDense(Mat A) { 926 PetscFunctionBegin; 927 PetscCall(PetscLayoutSetUp(A->rmap)); 928 PetscCall(PetscLayoutSetUp(A->cmap)); 929 if (!A->preallocated) PetscCall(MatMPIDenseSetPreallocation(A, NULL)); 930 PetscFunctionReturn(0); 931 } 932 933 PetscErrorCode MatAXPY_MPIDense(Mat Y, PetscScalar alpha, Mat X, MatStructure str) { 934 Mat_MPIDense *A = (Mat_MPIDense *)Y->data, *B = (Mat_MPIDense *)X->data; 935 936 PetscFunctionBegin; 937 PetscCall(MatAXPY(A->A, alpha, B->A, str)); 938 PetscFunctionReturn(0); 939 } 940 941 PetscErrorCode MatConjugate_MPIDense(Mat mat) { 942 Mat_MPIDense *a = (Mat_MPIDense *)mat->data; 943 944 PetscFunctionBegin; 945 PetscCall(MatConjugate(a->A)); 946 PetscFunctionReturn(0); 947 } 948 949 PetscErrorCode MatRealPart_MPIDense(Mat A) { 950 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 951 952 PetscFunctionBegin; 953 PetscCall(MatRealPart(a->A)); 954 PetscFunctionReturn(0); 955 } 956 957 PetscErrorCode MatImaginaryPart_MPIDense(Mat A) { 958 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 959 960 PetscFunctionBegin; 961 PetscCall(MatImaginaryPart(a->A)); 962 PetscFunctionReturn(0); 963 } 964 965 static PetscErrorCode MatGetColumnVector_MPIDense(Mat A, Vec v, PetscInt col) { 966 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 967 968 PetscFunctionBegin; 969 PetscCheck(a->A, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Missing local matrix"); 970 PetscCheck(a->A->ops->getcolumnvector, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Missing get column operation"); 971 PetscCall((*a->A->ops->getcolumnvector)(a->A, v, col)); 972 PetscFunctionReturn(0); 973 } 974 975 PETSC_INTERN PetscErrorCode MatGetColumnReductions_SeqDense(Mat, PetscInt, PetscReal *); 976 977 PetscErrorCode MatGetColumnReductions_MPIDense(Mat A, PetscInt type, PetscReal *reductions) { 978 PetscInt i, m, n; 979 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 980 PetscReal *work; 981 982 PetscFunctionBegin; 983 PetscCall(MatGetSize(A, &m, &n)); 984 PetscCall(PetscMalloc1(n, &work)); 985 if (type == REDUCTION_MEAN_REALPART) { 986 PetscCall(MatGetColumnReductions_SeqDense(a->A, (PetscInt)REDUCTION_SUM_REALPART, work)); 987 } else if (type == REDUCTION_MEAN_IMAGINARYPART) { 988 PetscCall(MatGetColumnReductions_SeqDense(a->A, (PetscInt)REDUCTION_SUM_IMAGINARYPART, work)); 989 } else { 990 PetscCall(MatGetColumnReductions_SeqDense(a->A, type, work)); 991 } 992 if (type == NORM_2) { 993 for (i = 0; i < n; i++) work[i] *= work[i]; 994 } 995 if (type == NORM_INFINITY) { 996 PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_MAX, A->hdr.comm)); 997 } else { 998 PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_SUM, A->hdr.comm)); 999 } 1000 PetscCall(PetscFree(work)); 1001 if (type == NORM_2) { 1002 for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]); 1003 } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 1004 for (i = 0; i < n; i++) reductions[i] /= m; 1005 } 1006 PetscFunctionReturn(0); 1007 } 1008 1009 #if defined(PETSC_HAVE_CUDA) 1010 PetscErrorCode MatShift_MPIDenseCUDA(Mat A, PetscScalar alpha) { 1011 PetscScalar *da; 1012 PetscInt lda; 1013 1014 PetscFunctionBegin; 1015 PetscCall(MatDenseCUDAGetArray(A, &da)); 1016 PetscCall(MatDenseGetLDA(A, &lda)); 1017 PetscCall(PetscInfo(A, "Performing Shift on backend\n")); 1018 PetscCall(MatShift_DenseCUDA_Private(da, alpha, lda, A->rmap->rstart, A->rmap->rend, A->cmap->N)); 1019 PetscCall(MatDenseCUDARestoreArray(A, &da)); 1020 PetscFunctionReturn(0); 1021 } 1022 1023 static PetscErrorCode MatDenseGetColumnVec_MPIDenseCUDA(Mat A, PetscInt col, Vec *v) { 1024 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1025 PetscInt lda; 1026 1027 PetscFunctionBegin; 1028 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1029 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1030 if (!a->cvec) { 1031 PetscCall(VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, &a->cvec)); 1032 PetscCall(PetscLogObjectParent((PetscObject)A, (PetscObject)a->cvec)); 1033 } 1034 a->vecinuse = col + 1; 1035 PetscCall(MatDenseGetLDA(a->A, &lda)); 1036 PetscCall(MatDenseCUDAGetArray(a->A, (PetscScalar **)&a->ptrinuse)); 1037 PetscCall(VecCUDAPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda)); 1038 *v = a->cvec; 1039 PetscFunctionReturn(0); 1040 } 1041 1042 static PetscErrorCode MatDenseRestoreColumnVec_MPIDenseCUDA(Mat A, PetscInt col, Vec *v) { 1043 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1044 1045 PetscFunctionBegin; 1046 PetscCheck(a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first"); 1047 PetscCheck(a->cvec, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing internal column vector"); 1048 a->vecinuse = 0; 1049 PetscCall(MatDenseCUDARestoreArray(a->A, (PetscScalar **)&a->ptrinuse)); 1050 PetscCall(VecCUDAResetArray(a->cvec)); 1051 if (v) *v = NULL; 1052 PetscFunctionReturn(0); 1053 } 1054 1055 static PetscErrorCode MatDenseGetColumnVecRead_MPIDenseCUDA(Mat A, PetscInt col, Vec *v) { 1056 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1057 PetscInt lda; 1058 1059 PetscFunctionBegin; 1060 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1061 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1062 if (!a->cvec) { 1063 PetscCall(VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, &a->cvec)); 1064 PetscCall(PetscLogObjectParent((PetscObject)A, (PetscObject)a->cvec)); 1065 } 1066 a->vecinuse = col + 1; 1067 PetscCall(MatDenseGetLDA(a->A, &lda)); 1068 PetscCall(MatDenseCUDAGetArrayRead(a->A, &a->ptrinuse)); 1069 PetscCall(VecCUDAPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda)); 1070 PetscCall(VecLockReadPush(a->cvec)); 1071 *v = a->cvec; 1072 PetscFunctionReturn(0); 1073 } 1074 1075 static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDenseCUDA(Mat A, PetscInt col, Vec *v) { 1076 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1077 1078 PetscFunctionBegin; 1079 PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first"); 1080 PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector"); 1081 a->vecinuse = 0; 1082 PetscCall(MatDenseCUDARestoreArrayRead(a->A, &a->ptrinuse)); 1083 PetscCall(VecLockReadPop(a->cvec)); 1084 PetscCall(VecCUDAResetArray(a->cvec)); 1085 if (v) *v = NULL; 1086 PetscFunctionReturn(0); 1087 } 1088 1089 static PetscErrorCode MatDenseGetColumnVecWrite_MPIDenseCUDA(Mat A, PetscInt col, Vec *v) { 1090 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1091 PetscInt lda; 1092 1093 PetscFunctionBegin; 1094 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1095 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1096 if (!a->cvec) { 1097 PetscCall(VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, &a->cvec)); 1098 PetscCall(PetscLogObjectParent((PetscObject)A, (PetscObject)a->cvec)); 1099 } 1100 a->vecinuse = col + 1; 1101 PetscCall(MatDenseGetLDA(a->A, &lda)); 1102 PetscCall(MatDenseCUDAGetArrayWrite(a->A, (PetscScalar **)&a->ptrinuse)); 1103 PetscCall(VecCUDAPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda)); 1104 *v = a->cvec; 1105 PetscFunctionReturn(0); 1106 } 1107 1108 static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDenseCUDA(Mat A, PetscInt col, Vec *v) { 1109 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1110 1111 PetscFunctionBegin; 1112 PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first"); 1113 PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector"); 1114 a->vecinuse = 0; 1115 PetscCall(MatDenseCUDARestoreArrayWrite(a->A, (PetscScalar **)&a->ptrinuse)); 1116 PetscCall(VecCUDAResetArray(a->cvec)); 1117 if (v) *v = NULL; 1118 PetscFunctionReturn(0); 1119 } 1120 1121 static PetscErrorCode MatDenseCUDAPlaceArray_MPIDenseCUDA(Mat A, const PetscScalar *a) { 1122 Mat_MPIDense *l = (Mat_MPIDense *)A->data; 1123 1124 PetscFunctionBegin; 1125 PetscCheck(!l->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1126 PetscCheck(!l->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1127 PetscCall(MatDenseCUDAPlaceArray(l->A, a)); 1128 PetscFunctionReturn(0); 1129 } 1130 1131 static PetscErrorCode MatDenseCUDAResetArray_MPIDenseCUDA(Mat A) { 1132 Mat_MPIDense *l = (Mat_MPIDense *)A->data; 1133 1134 PetscFunctionBegin; 1135 PetscCheck(!l->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1136 PetscCheck(!l->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1137 PetscCall(MatDenseCUDAResetArray(l->A)); 1138 PetscFunctionReturn(0); 1139 } 1140 1141 static PetscErrorCode MatDenseCUDAReplaceArray_MPIDenseCUDA(Mat A, const PetscScalar *a) { 1142 Mat_MPIDense *l = (Mat_MPIDense *)A->data; 1143 1144 PetscFunctionBegin; 1145 PetscCheck(!l->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1146 PetscCheck(!l->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1147 PetscCall(MatDenseCUDAReplaceArray(l->A, a)); 1148 PetscFunctionReturn(0); 1149 } 1150 1151 static PetscErrorCode MatDenseCUDAGetArrayWrite_MPIDenseCUDA(Mat A, PetscScalar **a) { 1152 Mat_MPIDense *l = (Mat_MPIDense *)A->data; 1153 1154 PetscFunctionBegin; 1155 PetscCall(MatDenseCUDAGetArrayWrite(l->A, a)); 1156 PetscFunctionReturn(0); 1157 } 1158 1159 static PetscErrorCode MatDenseCUDARestoreArrayWrite_MPIDenseCUDA(Mat A, PetscScalar **a) { 1160 Mat_MPIDense *l = (Mat_MPIDense *)A->data; 1161 1162 PetscFunctionBegin; 1163 PetscCall(MatDenseCUDARestoreArrayWrite(l->A, a)); 1164 PetscFunctionReturn(0); 1165 } 1166 1167 static PetscErrorCode MatDenseCUDAGetArrayRead_MPIDenseCUDA(Mat A, const PetscScalar **a) { 1168 Mat_MPIDense *l = (Mat_MPIDense *)A->data; 1169 1170 PetscFunctionBegin; 1171 PetscCall(MatDenseCUDAGetArrayRead(l->A, a)); 1172 PetscFunctionReturn(0); 1173 } 1174 1175 static PetscErrorCode MatDenseCUDARestoreArrayRead_MPIDenseCUDA(Mat A, const PetscScalar **a) { 1176 Mat_MPIDense *l = (Mat_MPIDense *)A->data; 1177 1178 PetscFunctionBegin; 1179 PetscCall(MatDenseCUDARestoreArrayRead(l->A, a)); 1180 PetscFunctionReturn(0); 1181 } 1182 1183 static PetscErrorCode MatDenseCUDAGetArray_MPIDenseCUDA(Mat A, PetscScalar **a) { 1184 Mat_MPIDense *l = (Mat_MPIDense *)A->data; 1185 1186 PetscFunctionBegin; 1187 PetscCall(MatDenseCUDAGetArray(l->A, a)); 1188 PetscFunctionReturn(0); 1189 } 1190 1191 static PetscErrorCode MatDenseCUDARestoreArray_MPIDenseCUDA(Mat A, PetscScalar **a) { 1192 Mat_MPIDense *l = (Mat_MPIDense *)A->data; 1193 1194 PetscFunctionBegin; 1195 PetscCall(MatDenseCUDARestoreArray(l->A, a)); 1196 PetscFunctionReturn(0); 1197 } 1198 1199 static PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat, PetscInt, Vec *); 1200 static PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat, PetscInt, Vec *); 1201 static PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat, PetscInt, Vec *); 1202 static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat, PetscInt, Vec *); 1203 static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat, PetscInt, Vec *); 1204 static PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat, PetscInt, Vec *); 1205 static PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat, Mat *); 1206 1207 static PetscErrorCode MatBindToCPU_MPIDenseCUDA(Mat mat, PetscBool bind) { 1208 Mat_MPIDense *d = (Mat_MPIDense *)mat->data; 1209 1210 PetscFunctionBegin; 1211 PetscCheck(!d->vecinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1212 PetscCheck(!d->matinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1213 if (d->A) PetscCall(MatBindToCPU(d->A, bind)); 1214 mat->boundtocpu = bind; 1215 if (!bind) { 1216 PetscBool iscuda; 1217 1218 PetscCall(PetscObjectTypeCompare((PetscObject)d->cvec, VECMPICUDA, &iscuda)); 1219 if (!iscuda) PetscCall(VecDestroy(&d->cvec)); 1220 PetscCall(PetscObjectTypeCompare((PetscObject)d->cmat, MATMPIDENSECUDA, &iscuda)); 1221 if (!iscuda) PetscCall(MatDestroy(&d->cmat)); 1222 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", MatDenseGetColumnVec_MPIDenseCUDA)); 1223 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", MatDenseRestoreColumnVec_MPIDenseCUDA)); 1224 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", MatDenseGetColumnVecRead_MPIDenseCUDA)); 1225 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", MatDenseRestoreColumnVecRead_MPIDenseCUDA)); 1226 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", MatDenseGetColumnVecWrite_MPIDenseCUDA)); 1227 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", MatDenseRestoreColumnVecWrite_MPIDenseCUDA)); 1228 mat->ops->shift = MatShift_MPIDenseCUDA; 1229 } else { 1230 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", MatDenseGetColumnVec_MPIDense)); 1231 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", MatDenseRestoreColumnVec_MPIDense)); 1232 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", MatDenseGetColumnVecRead_MPIDense)); 1233 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", MatDenseRestoreColumnVecRead_MPIDense)); 1234 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", MatDenseGetColumnVecWrite_MPIDense)); 1235 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", MatDenseRestoreColumnVecWrite_MPIDense)); 1236 mat->ops->shift = MatShift_MPIDense; 1237 } 1238 if (d->cmat) PetscCall(MatBindToCPU(d->cmat, bind)); 1239 PetscFunctionReturn(0); 1240 } 1241 1242 PetscErrorCode MatMPIDenseCUDASetPreallocation(Mat A, PetscScalar *d_data) { 1243 Mat_MPIDense *d = (Mat_MPIDense *)A->data; 1244 PetscBool iscuda; 1245 1246 PetscFunctionBegin; 1247 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 1248 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIDENSECUDA, &iscuda)); 1249 if (!iscuda) PetscFunctionReturn(0); 1250 PetscCall(PetscLayoutSetUp(A->rmap)); 1251 PetscCall(PetscLayoutSetUp(A->cmap)); 1252 if (!d->A) { 1253 PetscCall(MatCreate(PETSC_COMM_SELF, &d->A)); 1254 PetscCall(PetscLogObjectParent((PetscObject)A, (PetscObject)d->A)); 1255 PetscCall(MatSetSizes(d->A, A->rmap->n, A->cmap->N, A->rmap->n, A->cmap->N)); 1256 } 1257 PetscCall(MatSetType(d->A, MATSEQDENSECUDA)); 1258 PetscCall(MatSeqDenseCUDASetPreallocation(d->A, d_data)); 1259 A->preallocated = PETSC_TRUE; 1260 A->assembled = PETSC_TRUE; 1261 PetscFunctionReturn(0); 1262 } 1263 #endif 1264 1265 static PetscErrorCode MatSetRandom_MPIDense(Mat x, PetscRandom rctx) { 1266 Mat_MPIDense *d = (Mat_MPIDense *)x->data; 1267 1268 PetscFunctionBegin; 1269 PetscCall(MatSetRandom(d->A, rctx)); 1270 PetscFunctionReturn(0); 1271 } 1272 1273 static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A, PetscBool *missing, PetscInt *d) { 1274 PetscFunctionBegin; 1275 *missing = PETSC_FALSE; 1276 PetscFunctionReturn(0); 1277 } 1278 1279 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat, Mat, PetscReal, Mat); 1280 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat, Mat, Mat); 1281 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat, Mat, PetscReal, Mat); 1282 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat, Mat, Mat); 1283 static PetscErrorCode MatEqual_MPIDense(Mat, Mat, PetscBool *); 1284 static PetscErrorCode MatLoad_MPIDense(Mat, PetscViewer); 1285 1286 /* -------------------------------------------------------------------*/ 1287 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense, 1288 MatGetRow_MPIDense, 1289 MatRestoreRow_MPIDense, 1290 MatMult_MPIDense, 1291 /* 4*/ MatMultAdd_MPIDense, 1292 MatMultTranspose_MPIDense, 1293 MatMultTransposeAdd_MPIDense, 1294 NULL, 1295 NULL, 1296 NULL, 1297 /* 10*/ NULL, 1298 NULL, 1299 NULL, 1300 NULL, 1301 MatTranspose_MPIDense, 1302 /* 15*/ MatGetInfo_MPIDense, 1303 MatEqual_MPIDense, 1304 MatGetDiagonal_MPIDense, 1305 MatDiagonalScale_MPIDense, 1306 MatNorm_MPIDense, 1307 /* 20*/ MatAssemblyBegin_MPIDense, 1308 MatAssemblyEnd_MPIDense, 1309 MatSetOption_MPIDense, 1310 MatZeroEntries_MPIDense, 1311 /* 24*/ MatZeroRows_MPIDense, 1312 NULL, 1313 NULL, 1314 NULL, 1315 NULL, 1316 /* 29*/ MatSetUp_MPIDense, 1317 NULL, 1318 NULL, 1319 MatGetDiagonalBlock_MPIDense, 1320 NULL, 1321 /* 34*/ MatDuplicate_MPIDense, 1322 NULL, 1323 NULL, 1324 NULL, 1325 NULL, 1326 /* 39*/ MatAXPY_MPIDense, 1327 MatCreateSubMatrices_MPIDense, 1328 NULL, 1329 MatGetValues_MPIDense, 1330 MatCopy_MPIDense, 1331 /* 44*/ NULL, 1332 MatScale_MPIDense, 1333 MatShift_MPIDense, 1334 NULL, 1335 NULL, 1336 /* 49*/ MatSetRandom_MPIDense, 1337 NULL, 1338 NULL, 1339 NULL, 1340 NULL, 1341 /* 54*/ NULL, 1342 NULL, 1343 NULL, 1344 NULL, 1345 NULL, 1346 /* 59*/ MatCreateSubMatrix_MPIDense, 1347 MatDestroy_MPIDense, 1348 MatView_MPIDense, 1349 NULL, 1350 NULL, 1351 /* 64*/ NULL, 1352 NULL, 1353 NULL, 1354 NULL, 1355 NULL, 1356 /* 69*/ NULL, 1357 NULL, 1358 NULL, 1359 NULL, 1360 NULL, 1361 /* 74*/ NULL, 1362 NULL, 1363 NULL, 1364 NULL, 1365 NULL, 1366 /* 79*/ NULL, 1367 NULL, 1368 NULL, 1369 NULL, 1370 /* 83*/ MatLoad_MPIDense, 1371 NULL, 1372 NULL, 1373 NULL, 1374 NULL, 1375 NULL, 1376 /* 89*/ NULL, 1377 NULL, 1378 NULL, 1379 NULL, 1380 NULL, 1381 /* 94*/ NULL, 1382 NULL, 1383 MatMatTransposeMultSymbolic_MPIDense_MPIDense, 1384 MatMatTransposeMultNumeric_MPIDense_MPIDense, 1385 NULL, 1386 /* 99*/ MatProductSetFromOptions_MPIDense, 1387 NULL, 1388 NULL, 1389 MatConjugate_MPIDense, 1390 NULL, 1391 /*104*/ NULL, 1392 MatRealPart_MPIDense, 1393 MatImaginaryPart_MPIDense, 1394 NULL, 1395 NULL, 1396 /*109*/ NULL, 1397 NULL, 1398 NULL, 1399 MatGetColumnVector_MPIDense, 1400 MatMissingDiagonal_MPIDense, 1401 /*114*/ NULL, 1402 NULL, 1403 NULL, 1404 NULL, 1405 NULL, 1406 /*119*/ NULL, 1407 NULL, 1408 NULL, 1409 NULL, 1410 NULL, 1411 /*124*/ NULL, 1412 MatGetColumnReductions_MPIDense, 1413 NULL, 1414 NULL, 1415 NULL, 1416 /*129*/ NULL, 1417 NULL, 1418 MatTransposeMatMultSymbolic_MPIDense_MPIDense, 1419 MatTransposeMatMultNumeric_MPIDense_MPIDense, 1420 NULL, 1421 /*134*/ NULL, 1422 NULL, 1423 NULL, 1424 NULL, 1425 NULL, 1426 /*139*/ NULL, 1427 NULL, 1428 NULL, 1429 NULL, 1430 NULL, 1431 MatCreateMPIMatConcatenateSeqMat_MPIDense, 1432 /*145*/ NULL, 1433 NULL, 1434 NULL, 1435 NULL, 1436 NULL, 1437 /*150*/ NULL}; 1438 1439 PetscErrorCode MatMPIDenseSetPreallocation_MPIDense(Mat mat, PetscScalar *data) { 1440 Mat_MPIDense *a = (Mat_MPIDense *)mat->data; 1441 PetscBool iscuda; 1442 1443 PetscFunctionBegin; 1444 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1445 PetscCall(PetscLayoutSetUp(mat->rmap)); 1446 PetscCall(PetscLayoutSetUp(mat->cmap)); 1447 if (!a->A) { 1448 PetscCall(MatCreate(PETSC_COMM_SELF, &a->A)); 1449 PetscCall(PetscLogObjectParent((PetscObject)mat, (PetscObject)a->A)); 1450 PetscCall(MatSetSizes(a->A, mat->rmap->n, mat->cmap->N, mat->rmap->n, mat->cmap->N)); 1451 } 1452 PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPIDENSECUDA, &iscuda)); 1453 PetscCall(MatSetType(a->A, iscuda ? MATSEQDENSECUDA : MATSEQDENSE)); 1454 PetscCall(MatSeqDenseSetPreallocation(a->A, data)); 1455 mat->preallocated = PETSC_TRUE; 1456 mat->assembled = PETSC_TRUE; 1457 PetscFunctionReturn(0); 1458 } 1459 1460 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIDense(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) { 1461 Mat B, C; 1462 1463 PetscFunctionBegin; 1464 PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &C)); 1465 PetscCall(MatConvert_SeqAIJ_SeqDense(C, MATSEQDENSE, MAT_INITIAL_MATRIX, &B)); 1466 PetscCall(MatDestroy(&C)); 1467 if (reuse == MAT_REUSE_MATRIX) { 1468 C = *newmat; 1469 } else C = NULL; 1470 PetscCall(MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A), B, A->cmap->n, !C ? MAT_INITIAL_MATRIX : MAT_REUSE_MATRIX, &C)); 1471 PetscCall(MatDestroy(&B)); 1472 if (reuse == MAT_INPLACE_MATRIX) { 1473 PetscCall(MatHeaderReplace(A, &C)); 1474 } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C; 1475 PetscFunctionReturn(0); 1476 } 1477 1478 PetscErrorCode MatConvert_MPIDense_MPIAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) { 1479 Mat B, C; 1480 1481 PetscFunctionBegin; 1482 PetscCall(MatDenseGetLocalMatrix(A, &C)); 1483 PetscCall(MatConvert_SeqDense_SeqAIJ(C, MATSEQAIJ, MAT_INITIAL_MATRIX, &B)); 1484 if (reuse == MAT_REUSE_MATRIX) { 1485 C = *newmat; 1486 } else C = NULL; 1487 PetscCall(MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A), B, A->cmap->n, !C ? MAT_INITIAL_MATRIX : MAT_REUSE_MATRIX, &C)); 1488 PetscCall(MatDestroy(&B)); 1489 if (reuse == MAT_INPLACE_MATRIX) { 1490 PetscCall(MatHeaderReplace(A, &C)); 1491 } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C; 1492 PetscFunctionReturn(0); 1493 } 1494 1495 #if defined(PETSC_HAVE_ELEMENTAL) 1496 PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) { 1497 Mat mat_elemental; 1498 PetscScalar *v; 1499 PetscInt m = A->rmap->n, N = A->cmap->N, rstart = A->rmap->rstart, i, *rows, *cols; 1500 1501 PetscFunctionBegin; 1502 if (reuse == MAT_REUSE_MATRIX) { 1503 mat_elemental = *newmat; 1504 PetscCall(MatZeroEntries(*newmat)); 1505 } else { 1506 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental)); 1507 PetscCall(MatSetSizes(mat_elemental, PETSC_DECIDE, PETSC_DECIDE, A->rmap->N, A->cmap->N)); 1508 PetscCall(MatSetType(mat_elemental, MATELEMENTAL)); 1509 PetscCall(MatSetUp(mat_elemental)); 1510 PetscCall(MatSetOption(mat_elemental, MAT_ROW_ORIENTED, PETSC_FALSE)); 1511 } 1512 1513 PetscCall(PetscMalloc2(m, &rows, N, &cols)); 1514 for (i = 0; i < N; i++) cols[i] = i; 1515 for (i = 0; i < m; i++) rows[i] = rstart + i; 1516 1517 /* PETSc-Elemental interface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */ 1518 PetscCall(MatDenseGetArray(A, &v)); 1519 PetscCall(MatSetValues(mat_elemental, m, rows, N, cols, v, ADD_VALUES)); 1520 PetscCall(MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY)); 1521 PetscCall(MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY)); 1522 PetscCall(MatDenseRestoreArray(A, &v)); 1523 PetscCall(PetscFree2(rows, cols)); 1524 1525 if (reuse == MAT_INPLACE_MATRIX) { 1526 PetscCall(MatHeaderReplace(A, &mat_elemental)); 1527 } else { 1528 *newmat = mat_elemental; 1529 } 1530 PetscFunctionReturn(0); 1531 } 1532 #endif 1533 1534 static PetscErrorCode MatDenseGetColumn_MPIDense(Mat A, PetscInt col, PetscScalar **vals) { 1535 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 1536 1537 PetscFunctionBegin; 1538 PetscCall(MatDenseGetColumn(mat->A, col, vals)); 1539 PetscFunctionReturn(0); 1540 } 1541 1542 static PetscErrorCode MatDenseRestoreColumn_MPIDense(Mat A, PetscScalar **vals) { 1543 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 1544 1545 PetscFunctionBegin; 1546 PetscCall(MatDenseRestoreColumn(mat->A, vals)); 1547 PetscFunctionReturn(0); 1548 } 1549 1550 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIDense(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat) { 1551 Mat_MPIDense *mat; 1552 PetscInt m, nloc, N; 1553 1554 PetscFunctionBegin; 1555 PetscCall(MatGetSize(inmat, &m, &N)); 1556 PetscCall(MatGetLocalSize(inmat, NULL, &nloc)); 1557 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 1558 PetscInt sum; 1559 1560 if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnership(comm, &n, &N)); 1561 /* Check sum(n) = N */ 1562 PetscCall(MPIU_Allreduce(&n, &sum, 1, MPIU_INT, MPI_SUM, comm)); 1563 PetscCheck(sum == N, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT, sum, N); 1564 1565 PetscCall(MatCreateDense(comm, m, n, PETSC_DETERMINE, N, NULL, outmat)); 1566 PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 1567 } 1568 1569 /* numeric phase */ 1570 mat = (Mat_MPIDense *)(*outmat)->data; 1571 PetscCall(MatCopy(inmat, mat->A, SAME_NONZERO_PATTERN)); 1572 PetscFunctionReturn(0); 1573 } 1574 1575 #if defined(PETSC_HAVE_CUDA) 1576 PetscErrorCode MatConvert_MPIDenseCUDA_MPIDense(Mat M, MatType type, MatReuse reuse, Mat *newmat) { 1577 Mat B; 1578 Mat_MPIDense *m; 1579 1580 PetscFunctionBegin; 1581 if (reuse == MAT_INITIAL_MATRIX) { 1582 PetscCall(MatDuplicate(M, MAT_COPY_VALUES, newmat)); 1583 } else if (reuse == MAT_REUSE_MATRIX) { 1584 PetscCall(MatCopy(M, *newmat, SAME_NONZERO_PATTERN)); 1585 } 1586 1587 B = *newmat; 1588 PetscCall(MatBindToCPU_MPIDenseCUDA(B, PETSC_TRUE)); 1589 PetscCall(PetscFree(B->defaultvectype)); 1590 PetscCall(PetscStrallocpy(VECSTANDARD, &B->defaultvectype)); 1591 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIDENSE)); 1592 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpidensecuda_mpidense_C", NULL)); 1593 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpidensecuda_C", NULL)); 1594 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C", NULL)); 1595 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpidensecuda_mpiaij_C", NULL)); 1596 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C", NULL)); 1597 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAGetArray_C", NULL)); 1598 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAGetArrayRead_C", NULL)); 1599 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAGetArrayWrite_C", NULL)); 1600 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDARestoreArray_C", NULL)); 1601 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDARestoreArrayRead_C", NULL)); 1602 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDARestoreArrayWrite_C", NULL)); 1603 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAPlaceArray_C", NULL)); 1604 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAResetArray_C", NULL)); 1605 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAReplaceArray_C", NULL)); 1606 m = (Mat_MPIDense *)(B)->data; 1607 if (m->A) PetscCall(MatConvert(m->A, MATSEQDENSE, MAT_INPLACE_MATRIX, &m->A)); 1608 B->ops->bindtocpu = NULL; 1609 B->offloadmask = PETSC_OFFLOAD_CPU; 1610 PetscFunctionReturn(0); 1611 } 1612 1613 PetscErrorCode MatConvert_MPIDense_MPIDenseCUDA(Mat M, MatType type, MatReuse reuse, Mat *newmat) { 1614 Mat B; 1615 Mat_MPIDense *m; 1616 1617 PetscFunctionBegin; 1618 if (reuse == MAT_INITIAL_MATRIX) { 1619 PetscCall(MatDuplicate(M, MAT_COPY_VALUES, newmat)); 1620 } else if (reuse == MAT_REUSE_MATRIX) { 1621 PetscCall(MatCopy(M, *newmat, SAME_NONZERO_PATTERN)); 1622 } 1623 1624 B = *newmat; 1625 PetscCall(PetscFree(B->defaultvectype)); 1626 PetscCall(PetscStrallocpy(VECCUDA, &B->defaultvectype)); 1627 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIDENSECUDA)); 1628 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpidensecuda_mpidense_C", MatConvert_MPIDenseCUDA_MPIDense)); 1629 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpidensecuda_C", MatProductSetFromOptions_MPIAIJ_MPIDense)); 1630 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C", MatProductSetFromOptions_MPIAIJ_MPIDense)); 1631 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpidensecuda_mpiaij_C", MatProductSetFromOptions_MPIDense_MPIAIJ)); 1632 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C", MatProductSetFromOptions_MPIDense_MPIAIJ)); 1633 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAGetArray_C", MatDenseCUDAGetArray_MPIDenseCUDA)); 1634 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAGetArrayRead_C", MatDenseCUDAGetArrayRead_MPIDenseCUDA)); 1635 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAGetArrayWrite_C", MatDenseCUDAGetArrayWrite_MPIDenseCUDA)); 1636 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDARestoreArray_C", MatDenseCUDARestoreArray_MPIDenseCUDA)); 1637 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDARestoreArrayRead_C", MatDenseCUDARestoreArrayRead_MPIDenseCUDA)); 1638 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDARestoreArrayWrite_C", MatDenseCUDARestoreArrayWrite_MPIDenseCUDA)); 1639 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAPlaceArray_C", MatDenseCUDAPlaceArray_MPIDenseCUDA)); 1640 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAResetArray_C", MatDenseCUDAResetArray_MPIDenseCUDA)); 1641 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAReplaceArray_C", MatDenseCUDAReplaceArray_MPIDenseCUDA)); 1642 m = (Mat_MPIDense *)(B->data); 1643 if (m->A) { 1644 PetscCall(MatConvert(m->A, MATSEQDENSECUDA, MAT_INPLACE_MATRIX, &m->A)); 1645 B->offloadmask = PETSC_OFFLOAD_BOTH; 1646 } else { 1647 B->offloadmask = PETSC_OFFLOAD_UNALLOCATED; 1648 } 1649 PetscCall(MatBindToCPU_MPIDenseCUDA(B, PETSC_FALSE)); 1650 1651 B->ops->bindtocpu = MatBindToCPU_MPIDenseCUDA; 1652 PetscFunctionReturn(0); 1653 } 1654 #endif 1655 1656 PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat A, PetscInt col, Vec *v) { 1657 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1658 PetscInt lda; 1659 1660 PetscFunctionBegin; 1661 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1662 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1663 if (!a->cvec) PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, &a->cvec)); 1664 a->vecinuse = col + 1; 1665 PetscCall(MatDenseGetLDA(a->A, &lda)); 1666 PetscCall(MatDenseGetArray(a->A, (PetscScalar **)&a->ptrinuse)); 1667 PetscCall(VecPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda)); 1668 *v = a->cvec; 1669 PetscFunctionReturn(0); 1670 } 1671 1672 PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat A, PetscInt col, Vec *v) { 1673 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1674 1675 PetscFunctionBegin; 1676 PetscCheck(a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first"); 1677 PetscCheck(a->cvec, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing internal column vector"); 1678 a->vecinuse = 0; 1679 PetscCall(MatDenseRestoreArray(a->A, (PetscScalar **)&a->ptrinuse)); 1680 PetscCall(VecResetArray(a->cvec)); 1681 if (v) *v = NULL; 1682 PetscFunctionReturn(0); 1683 } 1684 1685 PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat A, PetscInt col, Vec *v) { 1686 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1687 PetscInt lda; 1688 1689 PetscFunctionBegin; 1690 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1691 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1692 if (!a->cvec) PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, &a->cvec)); 1693 a->vecinuse = col + 1; 1694 PetscCall(MatDenseGetLDA(a->A, &lda)); 1695 PetscCall(MatDenseGetArrayRead(a->A, &a->ptrinuse)); 1696 PetscCall(VecPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda)); 1697 PetscCall(VecLockReadPush(a->cvec)); 1698 *v = a->cvec; 1699 PetscFunctionReturn(0); 1700 } 1701 1702 PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat A, PetscInt col, Vec *v) { 1703 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1704 1705 PetscFunctionBegin; 1706 PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first"); 1707 PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector"); 1708 a->vecinuse = 0; 1709 PetscCall(MatDenseRestoreArrayRead(a->A, &a->ptrinuse)); 1710 PetscCall(VecLockReadPop(a->cvec)); 1711 PetscCall(VecResetArray(a->cvec)); 1712 if (v) *v = NULL; 1713 PetscFunctionReturn(0); 1714 } 1715 1716 PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat A, PetscInt col, Vec *v) { 1717 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1718 PetscInt lda; 1719 1720 PetscFunctionBegin; 1721 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1722 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1723 if (!a->cvec) PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, &a->cvec)); 1724 a->vecinuse = col + 1; 1725 PetscCall(MatDenseGetLDA(a->A, &lda)); 1726 PetscCall(MatDenseGetArrayWrite(a->A, (PetscScalar **)&a->ptrinuse)); 1727 PetscCall(VecPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda)); 1728 *v = a->cvec; 1729 PetscFunctionReturn(0); 1730 } 1731 1732 PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat A, PetscInt col, Vec *v) { 1733 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1734 1735 PetscFunctionBegin; 1736 PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first"); 1737 PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector"); 1738 a->vecinuse = 0; 1739 PetscCall(MatDenseRestoreArrayWrite(a->A, (PetscScalar **)&a->ptrinuse)); 1740 PetscCall(VecResetArray(a->cvec)); 1741 if (v) *v = NULL; 1742 PetscFunctionReturn(0); 1743 } 1744 1745 PetscErrorCode MatDenseGetSubMatrix_MPIDense(Mat A, PetscInt rbegin, PetscInt rend, PetscInt cbegin, PetscInt cend, Mat *v) { 1746 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1747 Mat_MPIDense *c; 1748 MPI_Comm comm; 1749 PetscInt pbegin, pend; 1750 1751 PetscFunctionBegin; 1752 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 1753 PetscCheck(!a->vecinuse, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1754 PetscCheck(!a->matinuse, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1755 pbegin = PetscMax(0, PetscMin(A->rmap->rend, rbegin) - A->rmap->rstart); 1756 pend = PetscMin(A->rmap->n, PetscMax(0, rend - A->rmap->rstart)); 1757 if (!a->cmat) { 1758 PetscCall(MatCreate(comm, &a->cmat)); 1759 PetscCall(PetscLogObjectParent((PetscObject)A, (PetscObject)a->cmat)); 1760 PetscCall(MatSetType(a->cmat, ((PetscObject)A)->type_name)); 1761 if (rend - rbegin == A->rmap->N) PetscCall(PetscLayoutReference(A->rmap, &a->cmat->rmap)); 1762 else { 1763 PetscCall(PetscLayoutSetLocalSize(a->cmat->rmap, pend - pbegin)); 1764 PetscCall(PetscLayoutSetSize(a->cmat->rmap, rend - rbegin)); 1765 PetscCall(PetscLayoutSetUp(a->cmat->rmap)); 1766 } 1767 PetscCall(PetscLayoutSetSize(a->cmat->cmap, cend - cbegin)); 1768 PetscCall(PetscLayoutSetUp(a->cmat->cmap)); 1769 } else { 1770 PetscBool same = (PetscBool)(rend - rbegin == a->cmat->rmap->N); 1771 if (same && a->cmat->rmap->N != A->rmap->N) { 1772 same = (PetscBool)(pend - pbegin == a->cmat->rmap->n); 1773 PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &same, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A))); 1774 } 1775 if (!same) { 1776 PetscCall(PetscLayoutDestroy(&a->cmat->rmap)); 1777 PetscCall(PetscLayoutCreate(comm, &a->cmat->rmap)); 1778 PetscCall(PetscLayoutSetLocalSize(a->cmat->rmap, pend - pbegin)); 1779 PetscCall(PetscLayoutSetSize(a->cmat->rmap, rend - rbegin)); 1780 PetscCall(PetscLayoutSetUp(a->cmat->rmap)); 1781 } 1782 if (cend - cbegin != a->cmat->cmap->N) { 1783 PetscCall(PetscLayoutDestroy(&a->cmat->cmap)); 1784 PetscCall(PetscLayoutCreate(comm, &a->cmat->cmap)); 1785 PetscCall(PetscLayoutSetSize(a->cmat->cmap, cend - cbegin)); 1786 PetscCall(PetscLayoutSetUp(a->cmat->cmap)); 1787 } 1788 } 1789 c = (Mat_MPIDense *)a->cmat->data; 1790 PetscCheck(!c->A, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1791 PetscCall(MatDenseGetSubMatrix(a->A, pbegin, pend, cbegin, cend, &c->A)); 1792 a->cmat->preallocated = PETSC_TRUE; 1793 a->cmat->assembled = PETSC_TRUE; 1794 a->matinuse = cbegin + 1; 1795 *v = a->cmat; 1796 PetscFunctionReturn(0); 1797 } 1798 1799 PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat A, Mat *v) { 1800 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1801 Mat_MPIDense *c; 1802 1803 PetscFunctionBegin; 1804 PetscCheck(a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetSubMatrix() first"); 1805 PetscCheck(a->cmat, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal matrix"); 1806 PetscCheck(*v == a->cmat, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Not the matrix obtained from MatDenseGetSubMatrix()"); 1807 a->matinuse = 0; 1808 c = (Mat_MPIDense *)a->cmat->data; 1809 PetscCall(MatDenseRestoreSubMatrix(a->A, &c->A)); 1810 if (v) *v = NULL; 1811 PetscFunctionReturn(0); 1812 } 1813 1814 /*MC 1815 MATMPIDENSE - MATMPIDENSE = "mpidense" - A matrix type to be used for distributed dense matrices. 1816 1817 Options Database Keys: 1818 . -mat_type mpidense - sets the matrix type to `MATMPIDENSE` during a call to `MatSetFromOptions()` 1819 1820 Level: beginner 1821 1822 .seealso: `MatCreateDense()`, `MATSEQDENSE`, `MATDENSE` 1823 M*/ 1824 PETSC_EXTERN PetscErrorCode MatCreate_MPIDense(Mat mat) { 1825 Mat_MPIDense *a; 1826 1827 PetscFunctionBegin; 1828 PetscCall(PetscNewLog(mat, &a)); 1829 mat->data = (void *)a; 1830 PetscCall(PetscMemcpy(mat->ops, &MatOps_Values, sizeof(struct _MatOps))); 1831 1832 mat->insertmode = NOT_SET_VALUES; 1833 1834 /* build cache for off array entries formed */ 1835 a->donotstash = PETSC_FALSE; 1836 1837 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)mat), 1, &mat->stash)); 1838 1839 /* stuff used for matrix vector multiply */ 1840 a->lvec = NULL; 1841 a->Mvctx = NULL; 1842 a->roworiented = PETSC_TRUE; 1843 1844 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetLDA_C", MatDenseGetLDA_MPIDense)); 1845 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseSetLDA_C", MatDenseSetLDA_MPIDense)); 1846 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArray_C", MatDenseGetArray_MPIDense)); 1847 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArray_C", MatDenseRestoreArray_MPIDense)); 1848 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayRead_C", MatDenseGetArrayRead_MPIDense)); 1849 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayRead_C", MatDenseRestoreArrayRead_MPIDense)); 1850 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayWrite_C", MatDenseGetArrayWrite_MPIDense)); 1851 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayWrite_C", MatDenseRestoreArrayWrite_MPIDense)); 1852 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDensePlaceArray_C", MatDensePlaceArray_MPIDense)); 1853 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseResetArray_C", MatDenseResetArray_MPIDense)); 1854 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseReplaceArray_C", MatDenseReplaceArray_MPIDense)); 1855 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", MatDenseGetColumnVec_MPIDense)); 1856 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", MatDenseRestoreColumnVec_MPIDense)); 1857 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", MatDenseGetColumnVecRead_MPIDense)); 1858 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", MatDenseRestoreColumnVecRead_MPIDense)); 1859 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", MatDenseGetColumnVecWrite_MPIDense)); 1860 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", MatDenseRestoreColumnVecWrite_MPIDense)); 1861 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetSubMatrix_C", MatDenseGetSubMatrix_MPIDense)); 1862 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreSubMatrix_C", MatDenseRestoreSubMatrix_MPIDense)); 1863 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", MatConvert_MPIAIJ_MPIDense)); 1864 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpiaij_C", MatConvert_MPIDense_MPIAIJ)); 1865 #if defined(PETSC_HAVE_ELEMENTAL) 1866 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_elemental_C", MatConvert_MPIDense_Elemental)); 1867 #endif 1868 #if defined(PETSC_HAVE_SCALAPACK) 1869 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_scalapack_C", MatConvert_Dense_ScaLAPACK)); 1870 #endif 1871 #if defined(PETSC_HAVE_CUDA) 1872 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensecuda_C", MatConvert_MPIDense_MPIDenseCUDA)); 1873 #endif 1874 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIDenseSetPreallocation_C", MatMPIDenseSetPreallocation_MPIDense)); 1875 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense)); 1876 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaij_C", MatProductSetFromOptions_MPIDense_MPIAIJ)); 1877 #if defined(PETSC_HAVE_CUDA) 1878 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense)); 1879 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijcusparse_C", MatProductSetFromOptions_MPIDense_MPIAIJ)); 1880 #endif 1881 1882 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumn_C", MatDenseGetColumn_MPIDense)); 1883 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumn_C", MatDenseRestoreColumn_MPIDense)); 1884 PetscCall(PetscObjectChangeTypeName((PetscObject)mat, MATMPIDENSE)); 1885 PetscFunctionReturn(0); 1886 } 1887 1888 /*MC 1889 MATMPIDENSECUDA - MATMPIDENSECUDA = "mpidensecuda" - A matrix type to be used for distributed dense matrices on GPUs. 1890 1891 Options Database Keys: 1892 . -mat_type mpidensecuda - sets the matrix type to `MATMPIDENSECUDA` during a call to `MatSetFromOptions()` 1893 1894 Level: beginner 1895 1896 .seealso: `MATMPIDENSE`, `MATSEQDENSE`, `MATSEQDENSECUDA` 1897 M*/ 1898 #if defined(PETSC_HAVE_CUDA) 1899 #include <petsc/private/deviceimpl.h> 1900 PETSC_EXTERN PetscErrorCode MatCreate_MPIDenseCUDA(Mat B) { 1901 PetscFunctionBegin; 1902 PetscCall(PetscDeviceInitialize(PETSC_DEVICE_CUDA)); 1903 PetscCall(MatCreate_MPIDense(B)); 1904 PetscCall(MatConvert_MPIDense_MPIDenseCUDA(B, MATMPIDENSECUDA, MAT_INPLACE_MATRIX, &B)); 1905 PetscFunctionReturn(0); 1906 } 1907 #endif 1908 1909 /*MC 1910 MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices. 1911 1912 This matrix type is identical to `MATSEQDENSE` when constructed with a single process communicator, 1913 and `MATMPIDENSE` otherwise. 1914 1915 Options Database Keys: 1916 . -mat_type dense - sets the matrix type to `MATDENSE` during a call to `MatSetFromOptions()` 1917 1918 Level: beginner 1919 1920 .seealso: `MATSEQDENSE`, `MATMPIDENSE`, `MATDENSECUDA` 1921 M*/ 1922 1923 /*MC 1924 MATDENSECUDA - MATDENSECUDA = "densecuda" - A matrix type to be used for dense matrices on GPUs. 1925 1926 This matrix type is identical to `MATSEQDENSECUDA` when constructed with a single process communicator, 1927 and `MATMPIDENSECUDA` otherwise. 1928 1929 Options Database Keys: 1930 . -mat_type densecuda - sets the matrix type to `MATDENSECUDA` during a call to `MatSetFromOptions()` 1931 1932 Level: beginner 1933 1934 .seealso: `MATSEQDENSECUDA`, `MATMPIDENSECUDA`, `MATDENSE` 1935 M*/ 1936 1937 /*@C 1938 MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries 1939 1940 Collective 1941 1942 Input Parameters: 1943 . B - the matrix 1944 - data - optional location of matrix data. Set data=NULL for PETSc 1945 to control all matrix memory allocation. 1946 1947 Notes: 1948 The dense format is fully compatible with standard Fortran 77 1949 storage by columns. 1950 1951 The data input variable is intended primarily for Fortran programmers 1952 who wish to allocate their own matrix memory space. Most users should 1953 set data=NULL. 1954 1955 Level: intermediate 1956 1957 .seealso: `MATMPIDENSE`, `MatCreate()`, `MatCreateSeqDense()`, `MatSetValues()` 1958 @*/ 1959 PetscErrorCode MatMPIDenseSetPreallocation(Mat B, PetscScalar *data) { 1960 PetscFunctionBegin; 1961 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 1962 PetscTryMethod(B, "MatMPIDenseSetPreallocation_C", (Mat, PetscScalar *), (B, data)); 1963 PetscFunctionReturn(0); 1964 } 1965 1966 /*@ 1967 MatDensePlaceArray - Allows one to replace the array in a `MATDENSE` matrix with an 1968 array provided by the user. This is useful to avoid copying an array 1969 into a matrix 1970 1971 Not Collective 1972 1973 Input Parameters: 1974 + mat - the matrix 1975 - array - the array in column major order 1976 1977 Note: 1978 You can return to the original array with a call to `MatDenseResetArray()`. The user is responsible for freeing this array; it will not be 1979 freed when the matrix is destroyed. 1980 1981 Level: developer 1982 1983 .seealso: `MATDENSE`, `MatDenseGetArray()`, `MatDenseResetArray()`, `VecPlaceArray()`, `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`, `VecResetArray()`, 1984 `MatDenseReplaceArray()` 1985 @*/ 1986 PetscErrorCode MatDensePlaceArray(Mat mat, const PetscScalar *array) { 1987 PetscFunctionBegin; 1988 PetscValidHeaderSpecific(mat, MAT_CLASSID, 1); 1989 PetscUseMethod(mat, "MatDensePlaceArray_C", (Mat, const PetscScalar *), (mat, array)); 1990 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 1991 #if defined(PETSC_HAVE_CUDA) 1992 mat->offloadmask = PETSC_OFFLOAD_CPU; 1993 #endif 1994 PetscFunctionReturn(0); 1995 } 1996 1997 /*@ 1998 MatDenseResetArray - Resets the matrix array to that it previously had before the call to `MatDensePlaceArray()` 1999 2000 Not Collective 2001 2002 Input Parameters: 2003 . mat - the matrix 2004 2005 Note: 2006 You can only call this after a call to `MatDensePlaceArray()` 2007 2008 Level: developer 2009 2010 .seealso: `MATDENSE`, `MatDenseGetArray()`, `MatDensePlaceArray()`, `VecPlaceArray()`, `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`, `VecResetArray()` 2011 @*/ 2012 PetscErrorCode MatDenseResetArray(Mat mat) { 2013 PetscFunctionBegin; 2014 PetscValidHeaderSpecific(mat, MAT_CLASSID, 1); 2015 PetscUseMethod(mat, "MatDenseResetArray_C", (Mat), (mat)); 2016 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 2017 PetscFunctionReturn(0); 2018 } 2019 2020 /*@ 2021 MatDenseReplaceArray - Allows one to replace the array in a dense matrix with an 2022 array provided by the user. This is useful to avoid copying an array 2023 into a matrix 2024 2025 Not Collective 2026 2027 Input Parameters: 2028 + mat - the matrix 2029 - array - the array in column major order 2030 2031 Note: 2032 The memory passed in MUST be obtained with `PetscMalloc()` and CANNOT be 2033 freed by the user. It will be freed when the matrix is destroyed. 2034 2035 Level: developer 2036 2037 .seealso: `MatDensePlaceArray()`, `MatDenseGetArray()`, `VecReplaceArray()` 2038 @*/ 2039 PetscErrorCode MatDenseReplaceArray(Mat mat, const PetscScalar *array) { 2040 PetscFunctionBegin; 2041 PetscValidHeaderSpecific(mat, MAT_CLASSID, 1); 2042 PetscUseMethod(mat, "MatDenseReplaceArray_C", (Mat, const PetscScalar *), (mat, array)); 2043 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 2044 #if defined(PETSC_HAVE_CUDA) 2045 mat->offloadmask = PETSC_OFFLOAD_CPU; 2046 #endif 2047 PetscFunctionReturn(0); 2048 } 2049 2050 #if defined(PETSC_HAVE_CUDA) 2051 /*@C 2052 MatDenseCUDAPlaceArray - Allows one to replace the GPU array in a `MATDENSECUDA` matrix with an 2053 array provided by the user. This is useful to avoid copying an array 2054 into a matrix 2055 2056 Not Collective 2057 2058 Input Parameters: 2059 + mat - the matrix 2060 - array - the array in column major order 2061 2062 Note: 2063 You can return to the original array with a call to `MatDenseCUDAResetArray()`. The user is responsible for freeing this array; it will not be 2064 freed when the matrix is destroyed. The array must have been allocated with cudaMalloc(). 2065 2066 Level: developer 2067 2068 .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDAResetArray()`, `MatDenseCUDAReplaceArray()` 2069 @*/ 2070 PetscErrorCode MatDenseCUDAPlaceArray(Mat mat, const PetscScalar *array) { 2071 PetscFunctionBegin; 2072 PetscValidHeaderSpecific(mat, MAT_CLASSID, 1); 2073 PetscUseMethod(mat, "MatDenseCUDAPlaceArray_C", (Mat, const PetscScalar *), (mat, array)); 2074 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 2075 mat->offloadmask = PETSC_OFFLOAD_GPU; 2076 PetscFunctionReturn(0); 2077 } 2078 2079 /*@C 2080 MatDenseCUDAResetArray - Resets the matrix array to that it previously had before the call to `MatDenseCUDAPlaceArray()` 2081 2082 Not Collective 2083 2084 Input Parameters: 2085 . mat - the matrix 2086 2087 Note: 2088 You can only call this after a call to `MatDenseCUDAPlaceArray()` 2089 2090 Level: developer 2091 2092 .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDAPlaceArray()` 2093 @*/ 2094 PetscErrorCode MatDenseCUDAResetArray(Mat mat) { 2095 PetscFunctionBegin; 2096 PetscValidHeaderSpecific(mat, MAT_CLASSID, 1); 2097 PetscUseMethod(mat, "MatDenseCUDAResetArray_C", (Mat), (mat)); 2098 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 2099 PetscFunctionReturn(0); 2100 } 2101 2102 /*@C 2103 MatDenseCUDAReplaceArray - Allows one to replace the GPU array in a `MATDENSECUDA` matrix with an 2104 array provided by the user. This is useful to avoid copying an array 2105 into a matrix 2106 2107 Not Collective 2108 2109 Input Parameters: 2110 + mat - the matrix 2111 - array - the array in column major order 2112 2113 Note: 2114 This permanently replaces the GPU array and frees the memory associated with the old GPU array. 2115 The memory passed in CANNOT be freed by the user. It will be freed 2116 when the matrix is destroyed. The array should respect the matrix leading dimension. 2117 2118 Level: developer 2119 2120 .seealso: `MatDenseCUDAGetArray()`, `MatDenseCUDAPlaceArray()`, `MatDenseCUDAResetArray()` 2121 @*/ 2122 PetscErrorCode MatDenseCUDAReplaceArray(Mat mat, const PetscScalar *array) { 2123 PetscFunctionBegin; 2124 PetscValidHeaderSpecific(mat, MAT_CLASSID, 1); 2125 PetscUseMethod(mat, "MatDenseCUDAReplaceArray_C", (Mat, const PetscScalar *), (mat, array)); 2126 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 2127 mat->offloadmask = PETSC_OFFLOAD_GPU; 2128 PetscFunctionReturn(0); 2129 } 2130 2131 /*@C 2132 MatDenseCUDAGetArrayWrite - Provides write access to the CUDA buffer inside a `MATDENSECUDA` matrix. 2133 2134 Not Collective 2135 2136 Input Parameters: 2137 . A - the matrix 2138 2139 Output Parameters 2140 . array - the GPU array in column major order 2141 2142 Notes: 2143 The data on the GPU may not be updated due to operations done on the CPU. If you need updated data, use `MatDenseCUDAGetArray()`. 2144 2145 The array must be restored with `MatDenseCUDARestoreArrayWrite()` when no longer needed. 2146 2147 Level: developer 2148 2149 .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`, `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayRead()`, `MatDenseCUDARestoreArrayRead()` 2150 @*/ 2151 PetscErrorCode MatDenseCUDAGetArrayWrite(Mat A, PetscScalar **a) { 2152 PetscFunctionBegin; 2153 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 2154 PetscUseMethod(A, "MatDenseCUDAGetArrayWrite_C", (Mat, PetscScalar **), (A, a)); 2155 PetscCall(PetscObjectStateIncrease((PetscObject)A)); 2156 PetscFunctionReturn(0); 2157 } 2158 2159 /*@C 2160 MatDenseCUDARestoreArrayWrite - Restore write access to the CUDA buffer inside a `MATDENSECUDA` matrix previously obtained with `MatDenseCUDAGetArrayWrite()`. 2161 2162 Not Collective 2163 2164 Input Parameters: 2165 + A - the matrix 2166 - array - the GPU array in column major order 2167 2168 Level: developer 2169 2170 .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDARestoreArrayRead()`, `MatDenseCUDAGetArrayRead()` 2171 @*/ 2172 PetscErrorCode MatDenseCUDARestoreArrayWrite(Mat A, PetscScalar **a) { 2173 PetscFunctionBegin; 2174 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 2175 PetscUseMethod(A, "MatDenseCUDARestoreArrayWrite_C", (Mat, PetscScalar **), (A, a)); 2176 PetscCall(PetscObjectStateIncrease((PetscObject)A)); 2177 A->offloadmask = PETSC_OFFLOAD_GPU; 2178 PetscFunctionReturn(0); 2179 } 2180 2181 /*@C 2182 MatDenseCUDAGetArrayRead - Provides read-only access to the CUDA buffer inside a `MATDENSECUDA` matrix. The array must be restored with `MatDenseCUDARestoreArrayRead()` when no longer needed. 2183 2184 Not Collective 2185 2186 Input Parameters: 2187 . A - the matrix 2188 2189 Output Parameters 2190 . array - the GPU array in column major order 2191 2192 Note: 2193 Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use `MatDenseCUDAGetArrayWrite()`. 2194 2195 Level: developer 2196 2197 .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`, `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDARestoreArrayRead()` 2198 @*/ 2199 PetscErrorCode MatDenseCUDAGetArrayRead(Mat A, const PetscScalar **a) { 2200 PetscFunctionBegin; 2201 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 2202 PetscUseMethod(A, "MatDenseCUDAGetArrayRead_C", (Mat, const PetscScalar **), (A, a)); 2203 PetscFunctionReturn(0); 2204 } 2205 2206 /*@C 2207 MatDenseCUDARestoreArrayRead - Restore read-only access to the CUDA buffer inside a `MATDENSECUDA` matrix previously obtained with a call to `MatDenseCUDAGetArrayRead()`. 2208 2209 Not Collective 2210 2211 Input Parameters: 2212 + A - the matrix 2213 - array - the GPU array in column major order 2214 2215 Note: 2216 Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use `MatDenseCUDAGetArrayWrite()`. 2217 2218 Level: developer 2219 2220 .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`, `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDAGetArrayRead()` 2221 @*/ 2222 PetscErrorCode MatDenseCUDARestoreArrayRead(Mat A, const PetscScalar **a) { 2223 PetscFunctionBegin; 2224 PetscUseMethod(A, "MatDenseCUDARestoreArrayRead_C", (Mat, const PetscScalar **), (A, a)); 2225 PetscFunctionReturn(0); 2226 } 2227 2228 /*@C 2229 MatDenseCUDAGetArray - Provides access to the CUDA buffer inside a `MATDENSECUDA` matrix. The array must be restored with `MatDenseCUDARestoreArray()` when no longer needed. 2230 2231 Not Collective 2232 2233 Input Parameters: 2234 . A - the matrix 2235 2236 Output Parameters 2237 . array - the GPU array in column major order 2238 2239 Note: 2240 Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use `MatDenseCUDAGetArrayWrite()`. For read-only access, use `MatDenseCUDAGetArrayRead()`. 2241 2242 Level: developer 2243 2244 .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArrayRead()`, `MatDenseCUDARestoreArray()`, `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDARestoreArrayRead()` 2245 @*/ 2246 PetscErrorCode MatDenseCUDAGetArray(Mat A, PetscScalar **a) { 2247 PetscFunctionBegin; 2248 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 2249 PetscUseMethod(A, "MatDenseCUDAGetArray_C", (Mat, PetscScalar **), (A, a)); 2250 PetscCall(PetscObjectStateIncrease((PetscObject)A)); 2251 PetscFunctionReturn(0); 2252 } 2253 2254 /*@C 2255 MatDenseCUDARestoreArray - Restore access to the CUDA buffer inside a `MATDENSECUDA` matrix previously obtained with `MatDenseCUDAGetArray()`. 2256 2257 Not Collective 2258 2259 Input Parameters: 2260 + A - the matrix 2261 - array - the GPU array in column major order 2262 2263 Level: developer 2264 2265 .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDARestoreArrayRead()`, `MatDenseCUDAGetArrayRead()` 2266 @*/ 2267 PetscErrorCode MatDenseCUDARestoreArray(Mat A, PetscScalar **a) { 2268 PetscFunctionBegin; 2269 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 2270 PetscUseMethod(A, "MatDenseCUDARestoreArray_C", (Mat, PetscScalar **), (A, a)); 2271 PetscCall(PetscObjectStateIncrease((PetscObject)A)); 2272 A->offloadmask = PETSC_OFFLOAD_GPU; 2273 PetscFunctionReturn(0); 2274 } 2275 #endif 2276 2277 /*@C 2278 MatCreateDense - Creates a matrix in `MATDENSE` format. 2279 2280 Collective 2281 2282 Input Parameters: 2283 + comm - MPI communicator 2284 . m - number of local rows (or `PETSC_DECIDE` to have calculated if M is given) 2285 . n - number of local columns (or `PETSC_DECIDE` to have calculated if N is given) 2286 . M - number of global rows (or `PETSC_DECIDE` to have calculated if m is given) 2287 . N - number of global columns (or `PETSC_DECIDE` to have calculated if n is given) 2288 - data - optional location of matrix data. Set data to NULL (`PETSC_NULL_SCALAR` for Fortran users) for PETSc 2289 to control all matrix memory allocation. 2290 2291 Output Parameter: 2292 . A - the matrix 2293 2294 Notes: 2295 The dense format is fully compatible with standard Fortran 77 2296 storage by columns. 2297 2298 Although local portions of the matrix are stored in column-major 2299 order, the matrix is partitioned across MPI ranks by row. 2300 2301 The data input variable is intended primarily for Fortran programmers 2302 who wish to allocate their own matrix memory space. Most users should 2303 set data=NULL (`PETSC_NULL_SCALAR` for Fortran users). 2304 2305 The user MUST specify either the local or global matrix dimensions 2306 (possibly both). 2307 2308 Level: intermediate 2309 2310 .seealso: `MATDENSE`, `MatCreate()`, `MatCreateSeqDense()`, `MatSetValues()` 2311 @*/ 2312 PetscErrorCode MatCreateDense(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscScalar *data, Mat *A) { 2313 PetscFunctionBegin; 2314 PetscCall(MatCreate(comm, A)); 2315 PetscCall(MatSetSizes(*A, m, n, M, N)); 2316 PetscCall(MatSetType(*A, MATDENSE)); 2317 PetscCall(MatSeqDenseSetPreallocation(*A, data)); 2318 PetscCall(MatMPIDenseSetPreallocation(*A, data)); 2319 PetscFunctionReturn(0); 2320 } 2321 2322 #if defined(PETSC_HAVE_CUDA) 2323 /*@C 2324 MatCreateDenseCUDA - Creates a matrix in `MATDENSECUDA` format using CUDA. 2325 2326 Collective 2327 2328 Input Parameters: 2329 + comm - MPI communicator 2330 . m - number of local rows (or `PETSC_DECIDE` to have calculated if M is given) 2331 . n - number of local columns (or `PETSC_DECIDE` to have calculated if N is given) 2332 . M - number of global rows (or `PETSC_DECIDE` to have calculated if m is given) 2333 . N - number of global columns (or `PETSC_DECIDE` to have calculated if n is given) 2334 - data - optional location of GPU matrix data. Set data=NULL for PETSc 2335 to control matrix memory allocation. 2336 2337 Output Parameter: 2338 . A - the matrix 2339 2340 Level: intermediate 2341 2342 .seealso: `MATDENSECUDA`, `MatCreate()`, `MatCreateDense()` 2343 @*/ 2344 PetscErrorCode MatCreateDenseCUDA(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscScalar *data, Mat *A) { 2345 PetscFunctionBegin; 2346 PetscCall(MatCreate(comm, A)); 2347 PetscValidLogicalCollectiveBool(*A, !!data, 6); 2348 PetscCall(MatSetSizes(*A, m, n, M, N)); 2349 PetscCall(MatSetType(*A, MATDENSECUDA)); 2350 PetscCall(MatSeqDenseCUDASetPreallocation(*A, data)); 2351 PetscCall(MatMPIDenseCUDASetPreallocation(*A, data)); 2352 PetscFunctionReturn(0); 2353 } 2354 #endif 2355 2356 static PetscErrorCode MatDuplicate_MPIDense(Mat A, MatDuplicateOption cpvalues, Mat *newmat) { 2357 Mat mat; 2358 Mat_MPIDense *a, *oldmat = (Mat_MPIDense *)A->data; 2359 2360 PetscFunctionBegin; 2361 *newmat = NULL; 2362 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat)); 2363 PetscCall(MatSetSizes(mat, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 2364 PetscCall(MatSetType(mat, ((PetscObject)A)->type_name)); 2365 a = (Mat_MPIDense *)mat->data; 2366 2367 mat->factortype = A->factortype; 2368 mat->assembled = PETSC_TRUE; 2369 mat->preallocated = PETSC_TRUE; 2370 2371 mat->insertmode = NOT_SET_VALUES; 2372 a->donotstash = oldmat->donotstash; 2373 2374 PetscCall(PetscLayoutReference(A->rmap, &mat->rmap)); 2375 PetscCall(PetscLayoutReference(A->cmap, &mat->cmap)); 2376 2377 PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A)); 2378 PetscCall(PetscLogObjectParent((PetscObject)mat, (PetscObject)a->A)); 2379 2380 *newmat = mat; 2381 PetscFunctionReturn(0); 2382 } 2383 2384 PetscErrorCode MatLoad_MPIDense(Mat newMat, PetscViewer viewer) { 2385 PetscBool isbinary; 2386 #if defined(PETSC_HAVE_HDF5) 2387 PetscBool ishdf5; 2388 #endif 2389 2390 PetscFunctionBegin; 2391 PetscValidHeaderSpecific(newMat, MAT_CLASSID, 1); 2392 PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2); 2393 /* force binary viewer to load .info file if it has not yet done so */ 2394 PetscCall(PetscViewerSetUp(viewer)); 2395 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 2396 #if defined(PETSC_HAVE_HDF5) 2397 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5)); 2398 #endif 2399 if (isbinary) { 2400 PetscCall(MatLoad_Dense_Binary(newMat, viewer)); 2401 #if defined(PETSC_HAVE_HDF5) 2402 } else if (ishdf5) { 2403 PetscCall(MatLoad_Dense_HDF5(newMat, viewer)); 2404 #endif 2405 } else SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name); 2406 PetscFunctionReturn(0); 2407 } 2408 2409 static PetscErrorCode MatEqual_MPIDense(Mat A, Mat B, PetscBool *flag) { 2410 Mat_MPIDense *matB = (Mat_MPIDense *)B->data, *matA = (Mat_MPIDense *)A->data; 2411 Mat a, b; 2412 2413 PetscFunctionBegin; 2414 a = matA->A; 2415 b = matB->A; 2416 PetscCall(MatEqual(a, b, flag)); 2417 PetscCall(MPIU_Allreduce(MPI_IN_PLACE, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A))); 2418 PetscFunctionReturn(0); 2419 } 2420 2421 PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(void *data) { 2422 Mat_TransMatMultDense *atb = (Mat_TransMatMultDense *)data; 2423 2424 PetscFunctionBegin; 2425 PetscCall(PetscFree2(atb->sendbuf, atb->recvcounts)); 2426 PetscCall(MatDestroy(&atb->atb)); 2427 PetscCall(PetscFree(atb)); 2428 PetscFunctionReturn(0); 2429 } 2430 2431 PetscErrorCode MatDestroy_MatMatTransMult_MPIDense_MPIDense(void *data) { 2432 Mat_MatTransMultDense *abt = (Mat_MatTransMultDense *)data; 2433 2434 PetscFunctionBegin; 2435 PetscCall(PetscFree2(abt->buf[0], abt->buf[1])); 2436 PetscCall(PetscFree2(abt->recvcounts, abt->recvdispls)); 2437 PetscCall(PetscFree(abt)); 2438 PetscFunctionReturn(0); 2439 } 2440 2441 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C) { 2442 Mat_MPIDense *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data; 2443 Mat_TransMatMultDense *atb; 2444 MPI_Comm comm; 2445 PetscMPIInt size, *recvcounts; 2446 PetscScalar *carray, *sendbuf; 2447 const PetscScalar *atbarray; 2448 PetscInt i, cN = C->cmap->N, proc, k, j, lda; 2449 const PetscInt *ranges; 2450 2451 PetscFunctionBegin; 2452 MatCheckProduct(C, 3); 2453 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 2454 atb = (Mat_TransMatMultDense *)C->product->data; 2455 recvcounts = atb->recvcounts; 2456 sendbuf = atb->sendbuf; 2457 2458 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 2459 PetscCallMPI(MPI_Comm_size(comm, &size)); 2460 2461 /* compute atbarray = aseq^T * bseq */ 2462 PetscCall(MatTransposeMatMult(a->A, b->A, atb->atb ? MAT_REUSE_MATRIX : MAT_INITIAL_MATRIX, PETSC_DEFAULT, &atb->atb)); 2463 2464 PetscCall(MatGetOwnershipRanges(C, &ranges)); 2465 2466 /* arrange atbarray into sendbuf */ 2467 PetscCall(MatDenseGetArrayRead(atb->atb, &atbarray)); 2468 PetscCall(MatDenseGetLDA(atb->atb, &lda)); 2469 for (proc = 0, k = 0; proc < size; proc++) { 2470 for (j = 0; j < cN; j++) { 2471 for (i = ranges[proc]; i < ranges[proc + 1]; i++) sendbuf[k++] = atbarray[i + j * lda]; 2472 } 2473 } 2474 PetscCall(MatDenseRestoreArrayRead(atb->atb, &atbarray)); 2475 2476 /* sum all atbarray to local values of C */ 2477 PetscCall(MatDenseGetArrayWrite(c->A, &carray)); 2478 PetscCallMPI(MPI_Reduce_scatter(sendbuf, carray, recvcounts, MPIU_SCALAR, MPIU_SUM, comm)); 2479 PetscCall(MatDenseRestoreArrayWrite(c->A, &carray)); 2480 PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY)); 2481 PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY)); 2482 PetscFunctionReturn(0); 2483 } 2484 2485 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C) { 2486 MPI_Comm comm; 2487 PetscMPIInt size; 2488 PetscInt cm = A->cmap->n, cM, cN = B->cmap->N; 2489 Mat_TransMatMultDense *atb; 2490 PetscBool cisdense; 2491 PetscInt i; 2492 const PetscInt *ranges; 2493 2494 PetscFunctionBegin; 2495 MatCheckProduct(C, 4); 2496 PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty"); 2497 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 2498 if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) { 2499 SETERRQ(comm, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend); 2500 } 2501 2502 /* create matrix product C */ 2503 PetscCall(MatSetSizes(C, cm, B->cmap->n, A->cmap->N, B->cmap->N)); 2504 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSECUDA, "")); 2505 if (!cisdense) PetscCall(MatSetType(C, ((PetscObject)A)->type_name)); 2506 PetscCall(MatSetUp(C)); 2507 2508 /* create data structure for reuse C */ 2509 PetscCallMPI(MPI_Comm_size(comm, &size)); 2510 PetscCall(PetscNew(&atb)); 2511 cM = C->rmap->N; 2512 PetscCall(PetscMalloc2(cM * cN, &atb->sendbuf, size, &atb->recvcounts)); 2513 PetscCall(MatGetOwnershipRanges(C, &ranges)); 2514 for (i = 0; i < size; i++) atb->recvcounts[i] = (ranges[i + 1] - ranges[i]) * cN; 2515 2516 C->product->data = atb; 2517 C->product->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense; 2518 PetscFunctionReturn(0); 2519 } 2520 2521 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C) { 2522 MPI_Comm comm; 2523 PetscMPIInt i, size; 2524 PetscInt maxRows, bufsiz; 2525 PetscMPIInt tag; 2526 PetscInt alg; 2527 Mat_MatTransMultDense *abt; 2528 Mat_Product *product = C->product; 2529 PetscBool flg; 2530 2531 PetscFunctionBegin; 2532 MatCheckProduct(C, 4); 2533 PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty"); 2534 /* check local size of A and B */ 2535 PetscCheck(A->cmap->n == B->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local column dimensions are incompatible, A (%" PetscInt_FMT ") != B (%" PetscInt_FMT ")", A->cmap->n, B->cmap->n); 2536 2537 PetscCall(PetscStrcmp(product->alg, "allgatherv", &flg)); 2538 alg = flg ? 0 : 1; 2539 2540 /* setup matrix product C */ 2541 PetscCall(MatSetSizes(C, A->rmap->n, B->rmap->n, A->rmap->N, B->rmap->N)); 2542 PetscCall(MatSetType(C, MATMPIDENSE)); 2543 PetscCall(MatSetUp(C)); 2544 PetscCall(PetscObjectGetNewTag((PetscObject)C, &tag)); 2545 2546 /* create data structure for reuse C */ 2547 PetscCall(PetscObjectGetComm((PetscObject)C, &comm)); 2548 PetscCallMPI(MPI_Comm_size(comm, &size)); 2549 PetscCall(PetscNew(&abt)); 2550 abt->tag = tag; 2551 abt->alg = alg; 2552 switch (alg) { 2553 case 1: /* alg: "cyclic" */ 2554 for (maxRows = 0, i = 0; i < size; i++) maxRows = PetscMax(maxRows, (B->rmap->range[i + 1] - B->rmap->range[i])); 2555 bufsiz = A->cmap->N * maxRows; 2556 PetscCall(PetscMalloc2(bufsiz, &(abt->buf[0]), bufsiz, &(abt->buf[1]))); 2557 break; 2558 default: /* alg: "allgatherv" */ 2559 PetscCall(PetscMalloc2(B->rmap->n * B->cmap->N, &(abt->buf[0]), B->rmap->N * B->cmap->N, &(abt->buf[1]))); 2560 PetscCall(PetscMalloc2(size, &(abt->recvcounts), size + 1, &(abt->recvdispls))); 2561 for (i = 0; i <= size; i++) abt->recvdispls[i] = B->rmap->range[i] * A->cmap->N; 2562 for (i = 0; i < size; i++) abt->recvcounts[i] = abt->recvdispls[i + 1] - abt->recvdispls[i]; 2563 break; 2564 } 2565 2566 C->product->data = abt; 2567 C->product->destroy = MatDestroy_MatMatTransMult_MPIDense_MPIDense; 2568 C->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_MPIDense_MPIDense; 2569 PetscFunctionReturn(0); 2570 } 2571 2572 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(Mat A, Mat B, Mat C) { 2573 Mat_MPIDense *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data; 2574 Mat_MatTransMultDense *abt; 2575 MPI_Comm comm; 2576 PetscMPIInt rank, size, sendsiz, recvsiz, sendto, recvfrom, recvisfrom; 2577 PetscScalar *sendbuf, *recvbuf = NULL, *cv; 2578 PetscInt i, cK = A->cmap->N, k, j, bn; 2579 PetscScalar _DOne = 1.0, _DZero = 0.0; 2580 const PetscScalar *av, *bv; 2581 PetscBLASInt cm, cn, ck, alda, blda = 0, clda; 2582 MPI_Request reqs[2]; 2583 const PetscInt *ranges; 2584 2585 PetscFunctionBegin; 2586 MatCheckProduct(C, 3); 2587 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 2588 abt = (Mat_MatTransMultDense *)C->product->data; 2589 PetscCall(PetscObjectGetComm((PetscObject)C, &comm)); 2590 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 2591 PetscCallMPI(MPI_Comm_size(comm, &size)); 2592 PetscCall(MatDenseGetArrayRead(a->A, &av)); 2593 PetscCall(MatDenseGetArrayRead(b->A, &bv)); 2594 PetscCall(MatDenseGetArrayWrite(c->A, &cv)); 2595 PetscCall(MatDenseGetLDA(a->A, &i)); 2596 PetscCall(PetscBLASIntCast(i, &alda)); 2597 PetscCall(MatDenseGetLDA(b->A, &i)); 2598 PetscCall(PetscBLASIntCast(i, &blda)); 2599 PetscCall(MatDenseGetLDA(c->A, &i)); 2600 PetscCall(PetscBLASIntCast(i, &clda)); 2601 PetscCall(MatGetOwnershipRanges(B, &ranges)); 2602 bn = B->rmap->n; 2603 if (blda == bn) { 2604 sendbuf = (PetscScalar *)bv; 2605 } else { 2606 sendbuf = abt->buf[0]; 2607 for (k = 0, i = 0; i < cK; i++) { 2608 for (j = 0; j < bn; j++, k++) sendbuf[k] = bv[i * blda + j]; 2609 } 2610 } 2611 if (size > 1) { 2612 sendto = (rank + size - 1) % size; 2613 recvfrom = (rank + size + 1) % size; 2614 } else { 2615 sendto = recvfrom = 0; 2616 } 2617 PetscCall(PetscBLASIntCast(cK, &ck)); 2618 PetscCall(PetscBLASIntCast(c->A->rmap->n, &cm)); 2619 recvisfrom = rank; 2620 for (i = 0; i < size; i++) { 2621 /* we have finished receiving in sending, bufs can be read/modified */ 2622 PetscInt nextrecvisfrom = (recvisfrom + 1) % size; /* which process the next recvbuf will originate on */ 2623 PetscInt nextbn = ranges[nextrecvisfrom + 1] - ranges[nextrecvisfrom]; 2624 2625 if (nextrecvisfrom != rank) { 2626 /* start the cyclic sends from sendbuf, to recvbuf (which will switch to sendbuf) */ 2627 sendsiz = cK * bn; 2628 recvsiz = cK * nextbn; 2629 recvbuf = (i & 1) ? abt->buf[0] : abt->buf[1]; 2630 PetscCallMPI(MPI_Isend(sendbuf, sendsiz, MPIU_SCALAR, sendto, abt->tag, comm, &reqs[0])); 2631 PetscCallMPI(MPI_Irecv(recvbuf, recvsiz, MPIU_SCALAR, recvfrom, abt->tag, comm, &reqs[1])); 2632 } 2633 2634 /* local aseq * sendbuf^T */ 2635 PetscCall(PetscBLASIntCast(ranges[recvisfrom + 1] - ranges[recvisfrom], &cn)); 2636 if (cm && cn && ck) PetscCallBLAS("BLASgemm", BLASgemm_("N", "T", &cm, &cn, &ck, &_DOne, av, &alda, sendbuf, &cn, &_DZero, cv + clda * ranges[recvisfrom], &clda)); 2637 2638 if (nextrecvisfrom != rank) { 2639 /* wait for the sends and receives to complete, swap sendbuf and recvbuf */ 2640 PetscCallMPI(MPI_Waitall(2, reqs, MPI_STATUSES_IGNORE)); 2641 } 2642 bn = nextbn; 2643 recvisfrom = nextrecvisfrom; 2644 sendbuf = recvbuf; 2645 } 2646 PetscCall(MatDenseRestoreArrayRead(a->A, &av)); 2647 PetscCall(MatDenseRestoreArrayRead(b->A, &bv)); 2648 PetscCall(MatDenseRestoreArrayWrite(c->A, &cv)); 2649 PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY)); 2650 PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY)); 2651 PetscFunctionReturn(0); 2652 } 2653 2654 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(Mat A, Mat B, Mat C) { 2655 Mat_MPIDense *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data; 2656 Mat_MatTransMultDense *abt; 2657 MPI_Comm comm; 2658 PetscMPIInt size; 2659 PetscScalar *cv, *sendbuf, *recvbuf; 2660 const PetscScalar *av, *bv; 2661 PetscInt blda, i, cK = A->cmap->N, k, j, bn; 2662 PetscScalar _DOne = 1.0, _DZero = 0.0; 2663 PetscBLASInt cm, cn, ck, alda, clda; 2664 2665 PetscFunctionBegin; 2666 MatCheckProduct(C, 3); 2667 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 2668 abt = (Mat_MatTransMultDense *)C->product->data; 2669 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 2670 PetscCallMPI(MPI_Comm_size(comm, &size)); 2671 PetscCall(MatDenseGetArrayRead(a->A, &av)); 2672 PetscCall(MatDenseGetArrayRead(b->A, &bv)); 2673 PetscCall(MatDenseGetArrayWrite(c->A, &cv)); 2674 PetscCall(MatDenseGetLDA(a->A, &i)); 2675 PetscCall(PetscBLASIntCast(i, &alda)); 2676 PetscCall(MatDenseGetLDA(b->A, &blda)); 2677 PetscCall(MatDenseGetLDA(c->A, &i)); 2678 PetscCall(PetscBLASIntCast(i, &clda)); 2679 /* copy transpose of B into buf[0] */ 2680 bn = B->rmap->n; 2681 sendbuf = abt->buf[0]; 2682 recvbuf = abt->buf[1]; 2683 for (k = 0, j = 0; j < bn; j++) { 2684 for (i = 0; i < cK; i++, k++) sendbuf[k] = bv[i * blda + j]; 2685 } 2686 PetscCall(MatDenseRestoreArrayRead(b->A, &bv)); 2687 PetscCallMPI(MPI_Allgatherv(sendbuf, bn * cK, MPIU_SCALAR, recvbuf, abt->recvcounts, abt->recvdispls, MPIU_SCALAR, comm)); 2688 PetscCall(PetscBLASIntCast(cK, &ck)); 2689 PetscCall(PetscBLASIntCast(c->A->rmap->n, &cm)); 2690 PetscCall(PetscBLASIntCast(c->A->cmap->n, &cn)); 2691 if (cm && cn && ck) PetscCallBLAS("BLASgemm", BLASgemm_("N", "N", &cm, &cn, &ck, &_DOne, av, &alda, recvbuf, &ck, &_DZero, cv, &clda)); 2692 PetscCall(MatDenseRestoreArrayRead(a->A, &av)); 2693 PetscCall(MatDenseRestoreArrayRead(b->A, &bv)); 2694 PetscCall(MatDenseRestoreArrayWrite(c->A, &cv)); 2695 PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY)); 2696 PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY)); 2697 PetscFunctionReturn(0); 2698 } 2699 2700 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C) { 2701 Mat_MatTransMultDense *abt; 2702 2703 PetscFunctionBegin; 2704 MatCheckProduct(C, 3); 2705 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 2706 abt = (Mat_MatTransMultDense *)C->product->data; 2707 switch (abt->alg) { 2708 case 1: PetscCall(MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(A, B, C)); break; 2709 default: PetscCall(MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(A, B, C)); break; 2710 } 2711 PetscFunctionReturn(0); 2712 } 2713 2714 PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(void *data) { 2715 Mat_MatMultDense *ab = (Mat_MatMultDense *)data; 2716 2717 PetscFunctionBegin; 2718 PetscCall(MatDestroy(&ab->Ce)); 2719 PetscCall(MatDestroy(&ab->Ae)); 2720 PetscCall(MatDestroy(&ab->Be)); 2721 PetscCall(PetscFree(ab)); 2722 PetscFunctionReturn(0); 2723 } 2724 2725 #if defined(PETSC_HAVE_ELEMENTAL) 2726 PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C) { 2727 Mat_MatMultDense *ab; 2728 2729 PetscFunctionBegin; 2730 MatCheckProduct(C, 3); 2731 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Missing product data"); 2732 ab = (Mat_MatMultDense *)C->product->data; 2733 PetscCall(MatConvert_MPIDense_Elemental(A, MATELEMENTAL, MAT_REUSE_MATRIX, &ab->Ae)); 2734 PetscCall(MatConvert_MPIDense_Elemental(B, MATELEMENTAL, MAT_REUSE_MATRIX, &ab->Be)); 2735 PetscCall(MatMatMultNumeric_Elemental(ab->Ae, ab->Be, ab->Ce)); 2736 PetscCall(MatConvert(ab->Ce, MATMPIDENSE, MAT_REUSE_MATRIX, &C)); 2737 PetscFunctionReturn(0); 2738 } 2739 2740 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C) { 2741 Mat Ae, Be, Ce; 2742 Mat_MatMultDense *ab; 2743 2744 PetscFunctionBegin; 2745 MatCheckProduct(C, 4); 2746 PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty"); 2747 /* check local size of A and B */ 2748 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 2749 SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend); 2750 } 2751 2752 /* create elemental matrices Ae and Be */ 2753 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &Ae)); 2754 PetscCall(MatSetSizes(Ae, PETSC_DECIDE, PETSC_DECIDE, A->rmap->N, A->cmap->N)); 2755 PetscCall(MatSetType(Ae, MATELEMENTAL)); 2756 PetscCall(MatSetUp(Ae)); 2757 PetscCall(MatSetOption(Ae, MAT_ROW_ORIENTED, PETSC_FALSE)); 2758 2759 PetscCall(MatCreate(PetscObjectComm((PetscObject)B), &Be)); 2760 PetscCall(MatSetSizes(Be, PETSC_DECIDE, PETSC_DECIDE, B->rmap->N, B->cmap->N)); 2761 PetscCall(MatSetType(Be, MATELEMENTAL)); 2762 PetscCall(MatSetUp(Be)); 2763 PetscCall(MatSetOption(Be, MAT_ROW_ORIENTED, PETSC_FALSE)); 2764 2765 /* compute symbolic Ce = Ae*Be */ 2766 PetscCall(MatCreate(PetscObjectComm((PetscObject)C), &Ce)); 2767 PetscCall(MatMatMultSymbolic_Elemental(Ae, Be, fill, Ce)); 2768 2769 /* setup C */ 2770 PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, PETSC_DECIDE, PETSC_DECIDE)); 2771 PetscCall(MatSetType(C, MATDENSE)); 2772 PetscCall(MatSetUp(C)); 2773 2774 /* create data structure for reuse Cdense */ 2775 PetscCall(PetscNew(&ab)); 2776 ab->Ae = Ae; 2777 ab->Be = Be; 2778 ab->Ce = Ce; 2779 2780 C->product->data = ab; 2781 C->product->destroy = MatDestroy_MatMatMult_MPIDense_MPIDense; 2782 C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense; 2783 PetscFunctionReturn(0); 2784 } 2785 #endif 2786 /* ----------------------------------------------- */ 2787 #if defined(PETSC_HAVE_ELEMENTAL) 2788 static PetscErrorCode MatProductSetFromOptions_MPIDense_AB(Mat C) { 2789 PetscFunctionBegin; 2790 C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIDense; 2791 C->ops->productsymbolic = MatProductSymbolic_AB; 2792 PetscFunctionReturn(0); 2793 } 2794 #endif 2795 2796 static PetscErrorCode MatProductSetFromOptions_MPIDense_AtB(Mat C) { 2797 Mat_Product *product = C->product; 2798 Mat A = product->A, B = product->B; 2799 2800 PetscFunctionBegin; 2801 if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) 2802 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend); 2803 C->ops->transposematmultsymbolic = MatTransposeMatMultSymbolic_MPIDense_MPIDense; 2804 C->ops->productsymbolic = MatProductSymbolic_AtB; 2805 PetscFunctionReturn(0); 2806 } 2807 2808 static PetscErrorCode MatProductSetFromOptions_MPIDense_ABt(Mat C) { 2809 Mat_Product *product = C->product; 2810 const char *algTypes[2] = {"allgatherv", "cyclic"}; 2811 PetscInt alg, nalg = 2; 2812 PetscBool flg = PETSC_FALSE; 2813 2814 PetscFunctionBegin; 2815 /* Set default algorithm */ 2816 alg = 0; /* default is allgatherv */ 2817 PetscCall(PetscStrcmp(product->alg, "default", &flg)); 2818 if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg])); 2819 2820 /* Get runtime option */ 2821 if (product->api_user) { 2822 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatMatTransposeMult", "Mat"); 2823 PetscCall(PetscOptionsEList("-matmattransmult_mpidense_mpidense_via", "Algorithmic approach", "MatMatTransposeMult", algTypes, nalg, algTypes[alg], &alg, &flg)); 2824 PetscOptionsEnd(); 2825 } else { 2826 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_ABt", "Mat"); 2827 PetscCall(PetscOptionsEList("-mat_product_algorithm", "Algorithmic approach", "MatProduct_ABt", algTypes, nalg, algTypes[alg], &alg, &flg)); 2828 PetscOptionsEnd(); 2829 } 2830 if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg])); 2831 2832 C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_MPIDense_MPIDense; 2833 C->ops->productsymbolic = MatProductSymbolic_ABt; 2834 PetscFunctionReturn(0); 2835 } 2836 2837 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense(Mat C) { 2838 Mat_Product *product = C->product; 2839 2840 PetscFunctionBegin; 2841 switch (product->type) { 2842 #if defined(PETSC_HAVE_ELEMENTAL) 2843 case MATPRODUCT_AB: PetscCall(MatProductSetFromOptions_MPIDense_AB(C)); break; 2844 #endif 2845 case MATPRODUCT_AtB: PetscCall(MatProductSetFromOptions_MPIDense_AtB(C)); break; 2846 case MATPRODUCT_ABt: PetscCall(MatProductSetFromOptions_MPIDense_ABt(C)); break; 2847 default: break; 2848 } 2849 PetscFunctionReturn(0); 2850 } 2851