1 2 /* 3 Basic functions for basic parallel dense matrices. 4 */ 5 6 #include <../src/mat/impls/dense/mpi/mpidense.h> /*I "petscmat.h" I*/ 7 #include <../src/mat/impls/aij/mpi/mpiaij.h> 8 #include <petscblaslapack.h> 9 10 /*@ 11 MatDenseGetLocalMatrix - For a `MATMPIDENSE` or `MATSEQDENSE` matrix returns the sequential 12 matrix that represents the operator. For sequential matrices it returns itself. 13 14 Input Parameter: 15 . A - the sequential or MPI dense matrix 16 17 Output Parameter: 18 . B - the inner matrix 19 20 Level: intermediate 21 22 .seealso: `MATDENSE`, `MATMPIDENSE`, `MATSEQDENSE` 23 @*/ 24 PetscErrorCode MatDenseGetLocalMatrix(Mat A, Mat *B) { 25 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 26 PetscBool flg; 27 28 PetscFunctionBegin; 29 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 30 PetscValidPointer(B, 2); 31 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIDENSE, &flg)); 32 if (flg) *B = mat->A; 33 else { 34 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQDENSE, &flg)); 35 PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Not for matrix type %s", ((PetscObject)A)->type_name); 36 *B = A; 37 } 38 PetscFunctionReturn(0); 39 } 40 41 PetscErrorCode MatCopy_MPIDense(Mat A, Mat B, MatStructure s) { 42 Mat_MPIDense *Amat = (Mat_MPIDense *)A->data; 43 Mat_MPIDense *Bmat = (Mat_MPIDense *)B->data; 44 45 PetscFunctionBegin; 46 PetscCall(MatCopy(Amat->A, Bmat->A, s)); 47 PetscFunctionReturn(0); 48 } 49 50 PetscErrorCode MatShift_MPIDense(Mat A, PetscScalar alpha) { 51 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 52 PetscInt j, lda, rstart = A->rmap->rstart, rend = A->rmap->rend, rend2; 53 PetscScalar *v; 54 55 PetscFunctionBegin; 56 PetscCall(MatDenseGetArray(mat->A, &v)); 57 PetscCall(MatDenseGetLDA(mat->A, &lda)); 58 rend2 = PetscMin(rend, A->cmap->N); 59 if (rend2 > rstart) { 60 for (j = rstart; j < rend2; j++) v[j - rstart + j * lda] += alpha; 61 PetscCall(PetscLogFlops(rend2 - rstart)); 62 } 63 PetscCall(MatDenseRestoreArray(mat->A, &v)); 64 PetscFunctionReturn(0); 65 } 66 67 PetscErrorCode MatGetRow_MPIDense(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) { 68 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 69 PetscInt lrow, rstart = A->rmap->rstart, rend = A->rmap->rend; 70 71 PetscFunctionBegin; 72 PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "only local rows"); 73 lrow = row - rstart; 74 PetscCall(MatGetRow(mat->A, lrow, nz, (const PetscInt **)idx, (const PetscScalar **)v)); 75 PetscFunctionReturn(0); 76 } 77 78 PetscErrorCode MatRestoreRow_MPIDense(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) { 79 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 80 PetscInt lrow, rstart = A->rmap->rstart, rend = A->rmap->rend; 81 82 PetscFunctionBegin; 83 PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "only local rows"); 84 lrow = row - rstart; 85 PetscCall(MatRestoreRow(mat->A, lrow, nz, (const PetscInt **)idx, (const PetscScalar **)v)); 86 PetscFunctionReturn(0); 87 } 88 89 PetscErrorCode MatGetDiagonalBlock_MPIDense(Mat A, Mat *a) { 90 Mat_MPIDense *mdn = (Mat_MPIDense *)A->data; 91 PetscInt m = A->rmap->n, rstart = A->rmap->rstart; 92 PetscScalar *array; 93 MPI_Comm comm; 94 PetscBool flg; 95 Mat B; 96 97 PetscFunctionBegin; 98 PetscCall(MatHasCongruentLayouts(A, &flg)); 99 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only square matrices supported."); 100 PetscCall(PetscObjectQuery((PetscObject)A, "DiagonalBlock", (PetscObject *)&B)); 101 if (!B) { /* This should use MatDenseGetSubMatrix (not create), but we would need a call like MatRestoreDiagonalBlock */ 102 103 PetscCall(PetscObjectTypeCompare((PetscObject)mdn->A, MATSEQDENSECUDA, &flg)); 104 PetscCheck(!flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature", MATSEQDENSECUDA); 105 PetscCall(PetscObjectGetComm((PetscObject)(mdn->A), &comm)); 106 PetscCall(MatCreate(comm, &B)); 107 PetscCall(MatSetSizes(B, m, m, m, m)); 108 PetscCall(MatSetType(B, ((PetscObject)mdn->A)->type_name)); 109 PetscCall(MatDenseGetArrayRead(mdn->A, (const PetscScalar **)&array)); 110 PetscCall(MatSeqDenseSetPreallocation(B, array + m * rstart)); 111 PetscCall(MatDenseRestoreArrayRead(mdn->A, (const PetscScalar **)&array)); 112 PetscCall(PetscObjectCompose((PetscObject)A, "DiagonalBlock", (PetscObject)B)); 113 *a = B; 114 PetscCall(MatDestroy(&B)); 115 } else *a = B; 116 PetscFunctionReturn(0); 117 } 118 119 PetscErrorCode MatSetValues_MPIDense(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], const PetscScalar v[], InsertMode addv) { 120 Mat_MPIDense *A = (Mat_MPIDense *)mat->data; 121 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend, row; 122 PetscBool roworiented = A->roworiented; 123 124 PetscFunctionBegin; 125 for (i = 0; i < m; i++) { 126 if (idxm[i] < 0) continue; 127 PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large"); 128 if (idxm[i] >= rstart && idxm[i] < rend) { 129 row = idxm[i] - rstart; 130 if (roworiented) { 131 PetscCall(MatSetValues(A->A, 1, &row, n, idxn, v + i * n, addv)); 132 } else { 133 for (j = 0; j < n; j++) { 134 if (idxn[j] < 0) continue; 135 PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large"); 136 PetscCall(MatSetValues(A->A, 1, &row, 1, &idxn[j], v + i + j * m, addv)); 137 } 138 } 139 } else if (!A->donotstash) { 140 mat->assembled = PETSC_FALSE; 141 if (roworiented) { 142 PetscCall(MatStashValuesRow_Private(&mat->stash, idxm[i], n, idxn, v + i * n, PETSC_FALSE)); 143 } else { 144 PetscCall(MatStashValuesCol_Private(&mat->stash, idxm[i], n, idxn, v + i, m, PETSC_FALSE)); 145 } 146 } 147 } 148 PetscFunctionReturn(0); 149 } 150 151 PetscErrorCode MatGetValues_MPIDense(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[]) { 152 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 153 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend, row; 154 155 PetscFunctionBegin; 156 for (i = 0; i < m; i++) { 157 if (idxm[i] < 0) continue; /* negative row */ 158 PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large"); 159 if (idxm[i] >= rstart && idxm[i] < rend) { 160 row = idxm[i] - rstart; 161 for (j = 0; j < n; j++) { 162 if (idxn[j] < 0) continue; /* negative column */ 163 PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large"); 164 PetscCall(MatGetValues(mdn->A, 1, &row, 1, &idxn[j], v + i * n + j)); 165 } 166 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported"); 167 } 168 PetscFunctionReturn(0); 169 } 170 171 static PetscErrorCode MatDenseGetLDA_MPIDense(Mat A, PetscInt *lda) { 172 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 173 174 PetscFunctionBegin; 175 PetscCall(MatDenseGetLDA(a->A, lda)); 176 PetscFunctionReturn(0); 177 } 178 179 static PetscErrorCode MatDenseSetLDA_MPIDense(Mat A, PetscInt lda) { 180 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 181 PetscBool iscuda; 182 183 PetscFunctionBegin; 184 if (!a->A) { 185 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 186 PetscCall(PetscLayoutSetUp(A->rmap)); 187 PetscCall(PetscLayoutSetUp(A->cmap)); 188 PetscCall(MatCreate(PETSC_COMM_SELF, &a->A)); 189 PetscCall(PetscLogObjectParent((PetscObject)A, (PetscObject)a->A)); 190 PetscCall(MatSetSizes(a->A, A->rmap->n, A->cmap->N, A->rmap->n, A->cmap->N)); 191 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIDENSECUDA, &iscuda)); 192 PetscCall(MatSetType(a->A, iscuda ? MATSEQDENSECUDA : MATSEQDENSE)); 193 } 194 PetscCall(MatDenseSetLDA(a->A, lda)); 195 PetscFunctionReturn(0); 196 } 197 198 static PetscErrorCode MatDenseGetArray_MPIDense(Mat A, PetscScalar **array) { 199 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 200 201 PetscFunctionBegin; 202 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 203 PetscCall(MatDenseGetArray(a->A, array)); 204 PetscFunctionReturn(0); 205 } 206 207 static PetscErrorCode MatDenseGetArrayRead_MPIDense(Mat A, const PetscScalar **array) { 208 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 209 210 PetscFunctionBegin; 211 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 212 PetscCall(MatDenseGetArrayRead(a->A, array)); 213 PetscFunctionReturn(0); 214 } 215 216 static PetscErrorCode MatDenseGetArrayWrite_MPIDense(Mat A, PetscScalar **array) { 217 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 218 219 PetscFunctionBegin; 220 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 221 PetscCall(MatDenseGetArrayWrite(a->A, array)); 222 PetscFunctionReturn(0); 223 } 224 225 static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A, const PetscScalar *array) { 226 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 227 228 PetscFunctionBegin; 229 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 230 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 231 PetscCall(MatDensePlaceArray(a->A, array)); 232 PetscFunctionReturn(0); 233 } 234 235 static PetscErrorCode MatDenseResetArray_MPIDense(Mat A) { 236 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 237 238 PetscFunctionBegin; 239 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 240 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 241 PetscCall(MatDenseResetArray(a->A)); 242 PetscFunctionReturn(0); 243 } 244 245 static PetscErrorCode MatDenseReplaceArray_MPIDense(Mat A, const PetscScalar *array) { 246 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 247 248 PetscFunctionBegin; 249 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 250 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 251 PetscCall(MatDenseReplaceArray(a->A, array)); 252 PetscFunctionReturn(0); 253 } 254 255 static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A, IS isrow, IS iscol, MatReuse scall, Mat *B) { 256 Mat_MPIDense *mat = (Mat_MPIDense *)A->data, *newmatd; 257 PetscInt lda, i, j, rstart, rend, nrows, ncols, Ncols, nlrows, nlcols; 258 const PetscInt *irow, *icol; 259 const PetscScalar *v; 260 PetscScalar *bv; 261 Mat newmat; 262 IS iscol_local; 263 MPI_Comm comm_is, comm_mat; 264 265 PetscFunctionBegin; 266 PetscCall(PetscObjectGetComm((PetscObject)A, &comm_mat)); 267 PetscCall(PetscObjectGetComm((PetscObject)iscol, &comm_is)); 268 PetscCheck(comm_mat == comm_is, PETSC_COMM_SELF, PETSC_ERR_ARG_NOTSAMECOMM, "IS communicator must match matrix communicator"); 269 270 PetscCall(ISAllGather(iscol, &iscol_local)); 271 PetscCall(ISGetIndices(isrow, &irow)); 272 PetscCall(ISGetIndices(iscol_local, &icol)); 273 PetscCall(ISGetLocalSize(isrow, &nrows)); 274 PetscCall(ISGetLocalSize(iscol, &ncols)); 275 PetscCall(ISGetSize(iscol, &Ncols)); /* global number of columns, size of iscol_local */ 276 277 /* No parallel redistribution currently supported! Should really check each index set 278 to comfirm that it is OK. ... Currently supports only submatrix same partitioning as 279 original matrix! */ 280 281 PetscCall(MatGetLocalSize(A, &nlrows, &nlcols)); 282 PetscCall(MatGetOwnershipRange(A, &rstart, &rend)); 283 284 /* Check submatrix call */ 285 if (scall == MAT_REUSE_MATRIX) { 286 /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */ 287 /* Really need to test rows and column sizes! */ 288 newmat = *B; 289 } else { 290 /* Create and fill new matrix */ 291 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &newmat)); 292 PetscCall(MatSetSizes(newmat, nrows, ncols, PETSC_DECIDE, Ncols)); 293 PetscCall(MatSetType(newmat, ((PetscObject)A)->type_name)); 294 PetscCall(MatMPIDenseSetPreallocation(newmat, NULL)); 295 } 296 297 /* Now extract the data pointers and do the copy, column at a time */ 298 newmatd = (Mat_MPIDense *)newmat->data; 299 PetscCall(MatDenseGetArray(newmatd->A, &bv)); 300 PetscCall(MatDenseGetArrayRead(mat->A, &v)); 301 PetscCall(MatDenseGetLDA(mat->A, &lda)); 302 for (i = 0; i < Ncols; i++) { 303 const PetscScalar *av = v + lda * icol[i]; 304 for (j = 0; j < nrows; j++) *bv++ = av[irow[j] - rstart]; 305 } 306 PetscCall(MatDenseRestoreArrayRead(mat->A, &v)); 307 PetscCall(MatDenseRestoreArray(newmatd->A, &bv)); 308 309 /* Assemble the matrices so that the correct flags are set */ 310 PetscCall(MatAssemblyBegin(newmat, MAT_FINAL_ASSEMBLY)); 311 PetscCall(MatAssemblyEnd(newmat, MAT_FINAL_ASSEMBLY)); 312 313 /* Free work space */ 314 PetscCall(ISRestoreIndices(isrow, &irow)); 315 PetscCall(ISRestoreIndices(iscol_local, &icol)); 316 PetscCall(ISDestroy(&iscol_local)); 317 *B = newmat; 318 PetscFunctionReturn(0); 319 } 320 321 PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A, PetscScalar **array) { 322 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 323 324 PetscFunctionBegin; 325 PetscCall(MatDenseRestoreArray(a->A, array)); 326 PetscFunctionReturn(0); 327 } 328 329 PetscErrorCode MatDenseRestoreArrayRead_MPIDense(Mat A, const PetscScalar **array) { 330 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 331 332 PetscFunctionBegin; 333 PetscCall(MatDenseRestoreArrayRead(a->A, array)); 334 PetscFunctionReturn(0); 335 } 336 337 PetscErrorCode MatDenseRestoreArrayWrite_MPIDense(Mat A, PetscScalar **array) { 338 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 339 340 PetscFunctionBegin; 341 PetscCall(MatDenseRestoreArrayWrite(a->A, array)); 342 PetscFunctionReturn(0); 343 } 344 345 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat, MatAssemblyType mode) { 346 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 347 PetscInt nstash, reallocs; 348 349 PetscFunctionBegin; 350 if (mdn->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 351 352 PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range)); 353 PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs)); 354 PetscCall(PetscInfo(mdn->A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 355 PetscFunctionReturn(0); 356 } 357 358 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat, MatAssemblyType mode) { 359 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 360 PetscInt i, *row, *col, flg, j, rstart, ncols; 361 PetscMPIInt n; 362 PetscScalar *val; 363 364 PetscFunctionBegin; 365 if (!mdn->donotstash && !mat->nooffprocentries) { 366 /* wait on receives */ 367 while (1) { 368 PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg)); 369 if (!flg) break; 370 371 for (i = 0; i < n;) { 372 /* Now identify the consecutive vals belonging to the same row */ 373 for (j = i, rstart = row[j]; j < n; j++) { 374 if (row[j] != rstart) break; 375 } 376 if (j < n) ncols = j - i; 377 else ncols = n - i; 378 /* Now assemble all these values with a single function call */ 379 PetscCall(MatSetValues_MPIDense(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode)); 380 i = j; 381 } 382 } 383 PetscCall(MatStashScatterEnd_Private(&mat->stash)); 384 } 385 386 PetscCall(MatAssemblyBegin(mdn->A, mode)); 387 PetscCall(MatAssemblyEnd(mdn->A, mode)); 388 PetscFunctionReturn(0); 389 } 390 391 PetscErrorCode MatZeroEntries_MPIDense(Mat A) { 392 Mat_MPIDense *l = (Mat_MPIDense *)A->data; 393 394 PetscFunctionBegin; 395 PetscCall(MatZeroEntries(l->A)); 396 PetscFunctionReturn(0); 397 } 398 399 PetscErrorCode MatZeroRows_MPIDense(Mat A, PetscInt n, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) { 400 Mat_MPIDense *l = (Mat_MPIDense *)A->data; 401 PetscInt i, len, *lrows; 402 403 PetscFunctionBegin; 404 /* get locally owned rows */ 405 PetscCall(PetscLayoutMapLocal(A->rmap, n, rows, &len, &lrows, NULL)); 406 /* fix right hand side if needed */ 407 if (x && b) { 408 const PetscScalar *xx; 409 PetscScalar *bb; 410 411 PetscCall(VecGetArrayRead(x, &xx)); 412 PetscCall(VecGetArrayWrite(b, &bb)); 413 for (i = 0; i < len; ++i) bb[lrows[i]] = diag * xx[lrows[i]]; 414 PetscCall(VecRestoreArrayRead(x, &xx)); 415 PetscCall(VecRestoreArrayWrite(b, &bb)); 416 } 417 PetscCall(MatZeroRows(l->A, len, lrows, 0.0, NULL, NULL)); 418 if (diag != 0.0) { 419 Vec d; 420 421 PetscCall(MatCreateVecs(A, NULL, &d)); 422 PetscCall(VecSet(d, diag)); 423 PetscCall(MatDiagonalSet(A, d, INSERT_VALUES)); 424 PetscCall(VecDestroy(&d)); 425 } 426 PetscCall(PetscFree(lrows)); 427 PetscFunctionReturn(0); 428 } 429 430 PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat, Vec, Vec); 431 PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat, Vec, Vec, Vec); 432 PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat, Vec, Vec); 433 PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat, Vec, Vec, Vec); 434 435 PetscErrorCode MatMult_MPIDense(Mat mat, Vec xx, Vec yy) { 436 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 437 const PetscScalar *ax; 438 PetscScalar *ay; 439 PetscMemType axmtype, aymtype; 440 441 PetscFunctionBegin; 442 if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat)); 443 PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype)); 444 PetscCall(VecGetArrayAndMemType(mdn->lvec, &ay, &aymtype)); 445 PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE)); 446 PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE)); 447 PetscCall(VecRestoreArrayAndMemType(mdn->lvec, &ay)); 448 PetscCall(VecRestoreArrayReadAndMemType(xx, &ax)); 449 PetscCall((*mdn->A->ops->mult)(mdn->A, mdn->lvec, yy)); 450 PetscFunctionReturn(0); 451 } 452 453 PetscErrorCode MatMultAdd_MPIDense(Mat mat, Vec xx, Vec yy, Vec zz) { 454 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 455 const PetscScalar *ax; 456 PetscScalar *ay; 457 PetscMemType axmtype, aymtype; 458 459 PetscFunctionBegin; 460 if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat)); 461 PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype)); 462 PetscCall(VecGetArrayAndMemType(mdn->lvec, &ay, &aymtype)); 463 PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE)); 464 PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE)); 465 PetscCall(VecRestoreArrayAndMemType(mdn->lvec, &ay)); 466 PetscCall(VecRestoreArrayReadAndMemType(xx, &ax)); 467 PetscCall((*mdn->A->ops->multadd)(mdn->A, mdn->lvec, yy, zz)); 468 PetscFunctionReturn(0); 469 } 470 471 PetscErrorCode MatMultTranspose_MPIDense(Mat A, Vec xx, Vec yy) { 472 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 473 const PetscScalar *ax; 474 PetscScalar *ay; 475 PetscMemType axmtype, aymtype; 476 477 PetscFunctionBegin; 478 if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A)); 479 PetscCall(VecSet(yy, 0.0)); 480 PetscCall((*a->A->ops->multtranspose)(a->A, xx, a->lvec)); 481 PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype)); 482 PetscCall(VecGetArrayAndMemType(yy, &ay, &aymtype)); 483 PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM)); 484 PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM)); 485 PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax)); 486 PetscCall(VecRestoreArrayAndMemType(yy, &ay)); 487 PetscFunctionReturn(0); 488 } 489 490 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A, Vec xx, Vec yy, Vec zz) { 491 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 492 const PetscScalar *ax; 493 PetscScalar *ay; 494 PetscMemType axmtype, aymtype; 495 496 PetscFunctionBegin; 497 if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A)); 498 PetscCall(VecCopy(yy, zz)); 499 PetscCall((*a->A->ops->multtranspose)(a->A, xx, a->lvec)); 500 PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype)); 501 PetscCall(VecGetArrayAndMemType(zz, &ay, &aymtype)); 502 PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM)); 503 PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM)); 504 PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax)); 505 PetscCall(VecRestoreArrayAndMemType(zz, &ay)); 506 PetscFunctionReturn(0); 507 } 508 509 PetscErrorCode MatGetDiagonal_MPIDense(Mat A, Vec v) { 510 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 511 PetscInt lda, len, i, n, m = A->rmap->n, radd; 512 PetscScalar *x, zero = 0.0; 513 const PetscScalar *av; 514 515 PetscFunctionBegin; 516 PetscCall(VecSet(v, zero)); 517 PetscCall(VecGetArray(v, &x)); 518 PetscCall(VecGetSize(v, &n)); 519 PetscCheck(n == A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming mat and vec"); 520 len = PetscMin(a->A->rmap->n, a->A->cmap->n); 521 radd = A->rmap->rstart * m; 522 PetscCall(MatDenseGetArrayRead(a->A, &av)); 523 PetscCall(MatDenseGetLDA(a->A, &lda)); 524 for (i = 0; i < len; i++) x[i] = av[radd + i * lda + i]; 525 PetscCall(MatDenseRestoreArrayRead(a->A, &av)); 526 PetscCall(VecRestoreArray(v, &x)); 527 PetscFunctionReturn(0); 528 } 529 530 PetscErrorCode MatDestroy_MPIDense(Mat mat) { 531 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 532 533 PetscFunctionBegin; 534 #if defined(PETSC_USE_LOG) 535 PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N); 536 #endif 537 PetscCall(MatStashDestroy_Private(&mat->stash)); 538 PetscCheck(!mdn->vecinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 539 PetscCheck(!mdn->matinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 540 PetscCall(MatDestroy(&mdn->A)); 541 PetscCall(VecDestroy(&mdn->lvec)); 542 PetscCall(PetscSFDestroy(&mdn->Mvctx)); 543 PetscCall(VecDestroy(&mdn->cvec)); 544 PetscCall(MatDestroy(&mdn->cmat)); 545 546 PetscCall(PetscFree(mat->data)); 547 PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL)); 548 549 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetLDA_C", NULL)); 550 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseSetLDA_C", NULL)); 551 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArray_C", NULL)); 552 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArray_C", NULL)); 553 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayRead_C", NULL)); 554 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayRead_C", NULL)); 555 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayWrite_C", NULL)); 556 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayWrite_C", NULL)); 557 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDensePlaceArray_C", NULL)); 558 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseResetArray_C", NULL)); 559 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseReplaceArray_C", NULL)); 560 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", NULL)); 561 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpiaij_C", NULL)); 562 #if defined(PETSC_HAVE_ELEMENTAL) 563 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_elemental_C", NULL)); 564 #endif 565 #if defined(PETSC_HAVE_SCALAPACK) 566 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_scalapack_C", NULL)); 567 #endif 568 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIDenseSetPreallocation_C", NULL)); 569 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidense_C", NULL)); 570 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaij_C", NULL)); 571 #if defined(PETSC_HAVE_CUDA) 572 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidense_C", NULL)); 573 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijcusparse_C", NULL)); 574 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensecuda_C", NULL)); 575 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidensecuda_mpidense_C", NULL)); 576 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidensecuda_C", NULL)); 577 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C", NULL)); 578 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensecuda_mpiaij_C", NULL)); 579 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C", NULL)); 580 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArray_C", NULL)); 581 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArrayRead_C", NULL)); 582 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArrayWrite_C", NULL)); 583 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArray_C", NULL)); 584 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArrayRead_C", NULL)); 585 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArrayWrite_C", NULL)); 586 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAPlaceArray_C", NULL)); 587 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAResetArray_C", NULL)); 588 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAReplaceArray_C", NULL)); 589 #endif 590 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumn_C", NULL)); 591 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumn_C", NULL)); 592 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", NULL)); 593 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", NULL)); 594 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", NULL)); 595 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", NULL)); 596 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", NULL)); 597 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", NULL)); 598 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetSubMatrix_C", NULL)); 599 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreSubMatrix_C", NULL)); 600 601 PetscCall(PetscObjectCompose((PetscObject)mat, "DiagonalBlock", NULL)); 602 PetscFunctionReturn(0); 603 } 604 605 PETSC_INTERN PetscErrorCode MatView_SeqDense(Mat, PetscViewer); 606 607 #include <petscdraw.h> 608 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat, PetscViewer viewer) { 609 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 610 PetscMPIInt rank; 611 PetscViewerType vtype; 612 PetscBool iascii, isdraw; 613 PetscViewer sviewer; 614 PetscViewerFormat format; 615 616 PetscFunctionBegin; 617 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 618 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 619 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 620 if (iascii) { 621 PetscCall(PetscViewerGetType(viewer, &vtype)); 622 PetscCall(PetscViewerGetFormat(viewer, &format)); 623 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 624 MatInfo info; 625 PetscCall(MatGetInfo(mat, MAT_LOCAL, &info)); 626 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 627 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %" PetscInt_FMT " \n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, 628 (PetscInt)info.memory)); 629 PetscCall(PetscViewerFlush(viewer)); 630 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 631 if (mdn->Mvctx) PetscCall(PetscSFView(mdn->Mvctx, viewer)); 632 PetscFunctionReturn(0); 633 } else if (format == PETSC_VIEWER_ASCII_INFO) { 634 PetscFunctionReturn(0); 635 } 636 } else if (isdraw) { 637 PetscDraw draw; 638 PetscBool isnull; 639 640 PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw)); 641 PetscCall(PetscDrawIsNull(draw, &isnull)); 642 if (isnull) PetscFunctionReturn(0); 643 } 644 645 { 646 /* assemble the entire matrix onto first processor. */ 647 Mat A; 648 PetscInt M = mat->rmap->N, N = mat->cmap->N, m, row, i, nz; 649 PetscInt *cols; 650 PetscScalar *vals; 651 652 PetscCall(MatCreate(PetscObjectComm((PetscObject)mat), &A)); 653 if (rank == 0) { 654 PetscCall(MatSetSizes(A, M, N, M, N)); 655 } else { 656 PetscCall(MatSetSizes(A, 0, 0, M, N)); 657 } 658 /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */ 659 PetscCall(MatSetType(A, MATMPIDENSE)); 660 PetscCall(MatMPIDenseSetPreallocation(A, NULL)); 661 PetscCall(PetscLogObjectParent((PetscObject)mat, (PetscObject)A)); 662 663 /* Copy the matrix ... This isn't the most efficient means, 664 but it's quick for now */ 665 A->insertmode = INSERT_VALUES; 666 667 row = mat->rmap->rstart; 668 m = mdn->A->rmap->n; 669 for (i = 0; i < m; i++) { 670 PetscCall(MatGetRow_MPIDense(mat, row, &nz, &cols, &vals)); 671 PetscCall(MatSetValues_MPIDense(A, 1, &row, nz, cols, vals, INSERT_VALUES)); 672 PetscCall(MatRestoreRow_MPIDense(mat, row, &nz, &cols, &vals)); 673 row++; 674 } 675 676 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 677 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 678 PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 679 if (rank == 0) { 680 PetscCall(PetscObjectSetName((PetscObject)((Mat_MPIDense *)(A->data))->A, ((PetscObject)mat)->name)); 681 PetscCall(MatView_SeqDense(((Mat_MPIDense *)(A->data))->A, sviewer)); 682 } 683 PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 684 PetscCall(PetscViewerFlush(viewer)); 685 PetscCall(MatDestroy(&A)); 686 } 687 PetscFunctionReturn(0); 688 } 689 690 PetscErrorCode MatView_MPIDense(Mat mat, PetscViewer viewer) { 691 PetscBool iascii, isbinary, isdraw, issocket; 692 693 PetscFunctionBegin; 694 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 695 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 696 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket)); 697 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 698 699 if (iascii || issocket || isdraw) { 700 PetscCall(MatView_MPIDense_ASCIIorDraworSocket(mat, viewer)); 701 } else if (isbinary) PetscCall(MatView_Dense_Binary(mat, viewer)); 702 PetscFunctionReturn(0); 703 } 704 705 PetscErrorCode MatGetInfo_MPIDense(Mat A, MatInfoType flag, MatInfo *info) { 706 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 707 Mat mdn = mat->A; 708 PetscLogDouble isend[5], irecv[5]; 709 710 PetscFunctionBegin; 711 info->block_size = 1.0; 712 713 PetscCall(MatGetInfo(mdn, MAT_LOCAL, info)); 714 715 isend[0] = info->nz_used; 716 isend[1] = info->nz_allocated; 717 isend[2] = info->nz_unneeded; 718 isend[3] = info->memory; 719 isend[4] = info->mallocs; 720 if (flag == MAT_LOCAL) { 721 info->nz_used = isend[0]; 722 info->nz_allocated = isend[1]; 723 info->nz_unneeded = isend[2]; 724 info->memory = isend[3]; 725 info->mallocs = isend[4]; 726 } else if (flag == MAT_GLOBAL_MAX) { 727 PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)A))); 728 729 info->nz_used = irecv[0]; 730 info->nz_allocated = irecv[1]; 731 info->nz_unneeded = irecv[2]; 732 info->memory = irecv[3]; 733 info->mallocs = irecv[4]; 734 } else if (flag == MAT_GLOBAL_SUM) { 735 PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)A))); 736 737 info->nz_used = irecv[0]; 738 info->nz_allocated = irecv[1]; 739 info->nz_unneeded = irecv[2]; 740 info->memory = irecv[3]; 741 info->mallocs = irecv[4]; 742 } 743 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 744 info->fill_ratio_needed = 0; 745 info->factor_mallocs = 0; 746 PetscFunctionReturn(0); 747 } 748 749 PetscErrorCode MatSetOption_MPIDense(Mat A, MatOption op, PetscBool flg) { 750 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 751 752 PetscFunctionBegin; 753 switch (op) { 754 case MAT_NEW_NONZERO_LOCATIONS: 755 case MAT_NEW_NONZERO_LOCATION_ERR: 756 case MAT_NEW_NONZERO_ALLOCATION_ERR: 757 MatCheckPreallocated(A, 1); 758 PetscCall(MatSetOption(a->A, op, flg)); 759 break; 760 case MAT_ROW_ORIENTED: 761 MatCheckPreallocated(A, 1); 762 a->roworiented = flg; 763 PetscCall(MatSetOption(a->A, op, flg)); 764 break; 765 case MAT_FORCE_DIAGONAL_ENTRIES: 766 case MAT_KEEP_NONZERO_PATTERN: 767 case MAT_USE_HASH_TABLE: 768 case MAT_SORTED_FULL: PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op])); break; 769 case MAT_IGNORE_OFF_PROC_ENTRIES: a->donotstash = flg; break; 770 case MAT_SYMMETRIC: 771 case MAT_STRUCTURALLY_SYMMETRIC: 772 case MAT_HERMITIAN: 773 case MAT_SYMMETRY_ETERNAL: 774 case MAT_STRUCTURAL_SYMMETRY_ETERNAL: 775 case MAT_SPD: 776 case MAT_IGNORE_LOWER_TRIANGULAR: 777 case MAT_IGNORE_ZERO_ENTRIES: 778 case MAT_SPD_ETERNAL: 779 /* if the diagonal matrix is square it inherits some of the properties above */ 780 PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op])); 781 break; 782 default: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %s", MatOptions[op]); 783 } 784 PetscFunctionReturn(0); 785 } 786 787 PetscErrorCode MatDiagonalScale_MPIDense(Mat A, Vec ll, Vec rr) { 788 Mat_MPIDense *mdn = (Mat_MPIDense *)A->data; 789 const PetscScalar *l; 790 PetscScalar x, *v, *vv, *r; 791 PetscInt i, j, s2a, s3a, s2, s3, m = mdn->A->rmap->n, n = mdn->A->cmap->n, lda; 792 793 PetscFunctionBegin; 794 PetscCall(MatDenseGetArray(mdn->A, &vv)); 795 PetscCall(MatDenseGetLDA(mdn->A, &lda)); 796 PetscCall(MatGetLocalSize(A, &s2, &s3)); 797 if (ll) { 798 PetscCall(VecGetLocalSize(ll, &s2a)); 799 PetscCheck(s2a == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Left scaling vector non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT, s2a, s2); 800 PetscCall(VecGetArrayRead(ll, &l)); 801 for (i = 0; i < m; i++) { 802 x = l[i]; 803 v = vv + i; 804 for (j = 0; j < n; j++) { 805 (*v) *= x; 806 v += lda; 807 } 808 } 809 PetscCall(VecRestoreArrayRead(ll, &l)); 810 PetscCall(PetscLogFlops(1.0 * n * m)); 811 } 812 if (rr) { 813 const PetscScalar *ar; 814 815 PetscCall(VecGetLocalSize(rr, &s3a)); 816 PetscCheck(s3a == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Right scaling vec non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT ".", s3a, s3); 817 PetscCall(VecGetArrayRead(rr, &ar)); 818 if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A)); 819 PetscCall(VecGetArray(mdn->lvec, &r)); 820 PetscCall(PetscSFBcastBegin(mdn->Mvctx, MPIU_SCALAR, ar, r, MPI_REPLACE)); 821 PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ar, r, MPI_REPLACE)); 822 PetscCall(VecRestoreArrayRead(rr, &ar)); 823 for (i = 0; i < n; i++) { 824 x = r[i]; 825 v = vv + i * lda; 826 for (j = 0; j < m; j++) (*v++) *= x; 827 } 828 PetscCall(VecRestoreArray(mdn->lvec, &r)); 829 PetscCall(PetscLogFlops(1.0 * n * m)); 830 } 831 PetscCall(MatDenseRestoreArray(mdn->A, &vv)); 832 PetscFunctionReturn(0); 833 } 834 835 PetscErrorCode MatNorm_MPIDense(Mat A, NormType type, PetscReal *nrm) { 836 Mat_MPIDense *mdn = (Mat_MPIDense *)A->data; 837 PetscInt i, j; 838 PetscMPIInt size; 839 PetscReal sum = 0.0; 840 const PetscScalar *av, *v; 841 842 PetscFunctionBegin; 843 PetscCall(MatDenseGetArrayRead(mdn->A, &av)); 844 v = av; 845 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 846 if (size == 1) { 847 PetscCall(MatNorm(mdn->A, type, nrm)); 848 } else { 849 if (type == NORM_FROBENIUS) { 850 for (i = 0; i < mdn->A->cmap->n * mdn->A->rmap->n; i++) { 851 sum += PetscRealPart(PetscConj(*v) * (*v)); 852 v++; 853 } 854 PetscCall(MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A))); 855 *nrm = PetscSqrtReal(*nrm); 856 PetscCall(PetscLogFlops(2.0 * mdn->A->cmap->n * mdn->A->rmap->n)); 857 } else if (type == NORM_1) { 858 PetscReal *tmp, *tmp2; 859 PetscCall(PetscCalloc2(A->cmap->N, &tmp, A->cmap->N, &tmp2)); 860 *nrm = 0.0; 861 v = av; 862 for (j = 0; j < mdn->A->cmap->n; j++) { 863 for (i = 0; i < mdn->A->rmap->n; i++) { 864 tmp[j] += PetscAbsScalar(*v); 865 v++; 866 } 867 } 868 PetscCall(MPIU_Allreduce(tmp, tmp2, A->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A))); 869 for (j = 0; j < A->cmap->N; j++) { 870 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 871 } 872 PetscCall(PetscFree2(tmp, tmp2)); 873 PetscCall(PetscLogFlops(A->cmap->n * A->rmap->n)); 874 } else if (type == NORM_INFINITY) { /* max row norm */ 875 PetscReal ntemp; 876 PetscCall(MatNorm(mdn->A, type, &ntemp)); 877 PetscCall(MPIU_Allreduce(&ntemp, nrm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A))); 878 } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "No support for two norm"); 879 } 880 PetscCall(MatDenseRestoreArrayRead(mdn->A, &av)); 881 PetscFunctionReturn(0); 882 } 883 884 PetscErrorCode MatTranspose_MPIDense(Mat A, MatReuse reuse, Mat *matout) { 885 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 886 Mat B; 887 PetscInt M = A->rmap->N, N = A->cmap->N, m, n, *rwork, rstart = A->rmap->rstart; 888 PetscInt j, i, lda; 889 PetscScalar *v; 890 891 PetscFunctionBegin; 892 if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout)); 893 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) { 894 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 895 PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M)); 896 PetscCall(MatSetType(B, ((PetscObject)A)->type_name)); 897 PetscCall(MatMPIDenseSetPreallocation(B, NULL)); 898 } else B = *matout; 899 900 m = a->A->rmap->n; 901 n = a->A->cmap->n; 902 PetscCall(MatDenseGetArrayRead(a->A, (const PetscScalar **)&v)); 903 PetscCall(MatDenseGetLDA(a->A, &lda)); 904 PetscCall(PetscMalloc1(m, &rwork)); 905 for (i = 0; i < m; i++) rwork[i] = rstart + i; 906 for (j = 0; j < n; j++) { 907 PetscCall(MatSetValues(B, 1, &j, m, rwork, v, INSERT_VALUES)); 908 v += lda; 909 } 910 PetscCall(MatDenseRestoreArrayRead(a->A, (const PetscScalar **)&v)); 911 PetscCall(PetscFree(rwork)); 912 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 913 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 914 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 915 *matout = B; 916 } else { 917 PetscCall(MatHeaderMerge(A, &B)); 918 } 919 PetscFunctionReturn(0); 920 } 921 922 static PetscErrorCode MatDuplicate_MPIDense(Mat, MatDuplicateOption, Mat *); 923 PETSC_INTERN PetscErrorCode MatScale_MPIDense(Mat, PetscScalar); 924 925 PetscErrorCode MatSetUp_MPIDense(Mat A) { 926 PetscFunctionBegin; 927 PetscCall(PetscLayoutSetUp(A->rmap)); 928 PetscCall(PetscLayoutSetUp(A->cmap)); 929 if (!A->preallocated) PetscCall(MatMPIDenseSetPreallocation(A, NULL)); 930 PetscFunctionReturn(0); 931 } 932 933 PetscErrorCode MatAXPY_MPIDense(Mat Y, PetscScalar alpha, Mat X, MatStructure str) { 934 Mat_MPIDense *A = (Mat_MPIDense *)Y->data, *B = (Mat_MPIDense *)X->data; 935 936 PetscFunctionBegin; 937 PetscCall(MatAXPY(A->A, alpha, B->A, str)); 938 PetscFunctionReturn(0); 939 } 940 941 PetscErrorCode MatConjugate_MPIDense(Mat mat) { 942 Mat_MPIDense *a = (Mat_MPIDense *)mat->data; 943 944 PetscFunctionBegin; 945 PetscCall(MatConjugate(a->A)); 946 PetscFunctionReturn(0); 947 } 948 949 PetscErrorCode MatRealPart_MPIDense(Mat A) { 950 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 951 952 PetscFunctionBegin; 953 PetscCall(MatRealPart(a->A)); 954 PetscFunctionReturn(0); 955 } 956 957 PetscErrorCode MatImaginaryPart_MPIDense(Mat A) { 958 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 959 960 PetscFunctionBegin; 961 PetscCall(MatImaginaryPart(a->A)); 962 PetscFunctionReturn(0); 963 } 964 965 static PetscErrorCode MatGetColumnVector_MPIDense(Mat A, Vec v, PetscInt col) { 966 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 967 968 PetscFunctionBegin; 969 PetscCheck(a->A, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Missing local matrix"); 970 PetscCheck(a->A->ops->getcolumnvector, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Missing get column operation"); 971 PetscCall((*a->A->ops->getcolumnvector)(a->A, v, col)); 972 PetscFunctionReturn(0); 973 } 974 975 PETSC_INTERN PetscErrorCode MatGetColumnReductions_SeqDense(Mat, PetscInt, PetscReal *); 976 977 PetscErrorCode MatGetColumnReductions_MPIDense(Mat A, PetscInt type, PetscReal *reductions) { 978 PetscInt i, m, n; 979 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 980 PetscReal *work; 981 982 PetscFunctionBegin; 983 PetscCall(MatGetSize(A, &m, &n)); 984 PetscCall(PetscMalloc1(n, &work)); 985 if (type == REDUCTION_MEAN_REALPART) { 986 PetscCall(MatGetColumnReductions_SeqDense(a->A, (PetscInt)REDUCTION_SUM_REALPART, work)); 987 } else if (type == REDUCTION_MEAN_IMAGINARYPART) { 988 PetscCall(MatGetColumnReductions_SeqDense(a->A, (PetscInt)REDUCTION_SUM_IMAGINARYPART, work)); 989 } else { 990 PetscCall(MatGetColumnReductions_SeqDense(a->A, type, work)); 991 } 992 if (type == NORM_2) { 993 for (i = 0; i < n; i++) work[i] *= work[i]; 994 } 995 if (type == NORM_INFINITY) { 996 PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_MAX, A->hdr.comm)); 997 } else { 998 PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_SUM, A->hdr.comm)); 999 } 1000 PetscCall(PetscFree(work)); 1001 if (type == NORM_2) { 1002 for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]); 1003 } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 1004 for (i = 0; i < n; i++) reductions[i] /= m; 1005 } 1006 PetscFunctionReturn(0); 1007 } 1008 1009 #if defined(PETSC_HAVE_CUDA) 1010 PetscErrorCode MatShift_MPIDenseCUDA(Mat A, PetscScalar alpha) { 1011 PetscScalar *da; 1012 PetscInt lda; 1013 1014 PetscFunctionBegin; 1015 PetscCall(MatDenseCUDAGetArray(A, &da)); 1016 PetscCall(MatDenseGetLDA(A, &lda)); 1017 PetscCall(PetscInfo(A, "Performing Shift on backend\n")); 1018 PetscCall(MatShift_DenseCUDA_Private(da, alpha, lda, A->rmap->rstart, A->rmap->rend, A->cmap->N)); 1019 PetscCall(MatDenseCUDARestoreArray(A, &da)); 1020 PetscFunctionReturn(0); 1021 } 1022 1023 static PetscErrorCode MatDenseGetColumnVec_MPIDenseCUDA(Mat A, PetscInt col, Vec *v) { 1024 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1025 PetscInt lda; 1026 1027 PetscFunctionBegin; 1028 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1029 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1030 if (!a->cvec) { 1031 PetscCall(VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, &a->cvec)); 1032 PetscCall(PetscLogObjectParent((PetscObject)A, (PetscObject)a->cvec)); 1033 } 1034 a->vecinuse = col + 1; 1035 PetscCall(MatDenseGetLDA(a->A, &lda)); 1036 PetscCall(MatDenseCUDAGetArray(a->A, (PetscScalar **)&a->ptrinuse)); 1037 PetscCall(VecCUDAPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda)); 1038 *v = a->cvec; 1039 PetscFunctionReturn(0); 1040 } 1041 1042 static PetscErrorCode MatDenseRestoreColumnVec_MPIDenseCUDA(Mat A, PetscInt col, Vec *v) { 1043 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1044 1045 PetscFunctionBegin; 1046 PetscCheck(a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first"); 1047 PetscCheck(a->cvec, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing internal column vector"); 1048 a->vecinuse = 0; 1049 PetscCall(MatDenseCUDARestoreArray(a->A, (PetscScalar **)&a->ptrinuse)); 1050 PetscCall(VecCUDAResetArray(a->cvec)); 1051 if (v) *v = NULL; 1052 PetscFunctionReturn(0); 1053 } 1054 1055 static PetscErrorCode MatDenseGetColumnVecRead_MPIDenseCUDA(Mat A, PetscInt col, Vec *v) { 1056 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1057 PetscInt lda; 1058 1059 PetscFunctionBegin; 1060 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1061 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1062 if (!a->cvec) { 1063 PetscCall(VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, &a->cvec)); 1064 PetscCall(PetscLogObjectParent((PetscObject)A, (PetscObject)a->cvec)); 1065 } 1066 a->vecinuse = col + 1; 1067 PetscCall(MatDenseGetLDA(a->A, &lda)); 1068 PetscCall(MatDenseCUDAGetArrayRead(a->A, &a->ptrinuse)); 1069 PetscCall(VecCUDAPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda)); 1070 PetscCall(VecLockReadPush(a->cvec)); 1071 *v = a->cvec; 1072 PetscFunctionReturn(0); 1073 } 1074 1075 static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDenseCUDA(Mat A, PetscInt col, Vec *v) { 1076 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1077 1078 PetscFunctionBegin; 1079 PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first"); 1080 PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector"); 1081 a->vecinuse = 0; 1082 PetscCall(MatDenseCUDARestoreArrayRead(a->A, &a->ptrinuse)); 1083 PetscCall(VecLockReadPop(a->cvec)); 1084 PetscCall(VecCUDAResetArray(a->cvec)); 1085 if (v) *v = NULL; 1086 PetscFunctionReturn(0); 1087 } 1088 1089 static PetscErrorCode MatDenseGetColumnVecWrite_MPIDenseCUDA(Mat A, PetscInt col, Vec *v) { 1090 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1091 PetscInt lda; 1092 1093 PetscFunctionBegin; 1094 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1095 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1096 if (!a->cvec) { 1097 PetscCall(VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, &a->cvec)); 1098 PetscCall(PetscLogObjectParent((PetscObject)A, (PetscObject)a->cvec)); 1099 } 1100 a->vecinuse = col + 1; 1101 PetscCall(MatDenseGetLDA(a->A, &lda)); 1102 PetscCall(MatDenseCUDAGetArrayWrite(a->A, (PetscScalar **)&a->ptrinuse)); 1103 PetscCall(VecCUDAPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda)); 1104 *v = a->cvec; 1105 PetscFunctionReturn(0); 1106 } 1107 1108 static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDenseCUDA(Mat A, PetscInt col, Vec *v) { 1109 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1110 1111 PetscFunctionBegin; 1112 PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first"); 1113 PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector"); 1114 a->vecinuse = 0; 1115 PetscCall(MatDenseCUDARestoreArrayWrite(a->A, (PetscScalar **)&a->ptrinuse)); 1116 PetscCall(VecCUDAResetArray(a->cvec)); 1117 if (v) *v = NULL; 1118 PetscFunctionReturn(0); 1119 } 1120 1121 static PetscErrorCode MatDenseCUDAPlaceArray_MPIDenseCUDA(Mat A, const PetscScalar *a) { 1122 Mat_MPIDense *l = (Mat_MPIDense *)A->data; 1123 1124 PetscFunctionBegin; 1125 PetscCheck(!l->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1126 PetscCheck(!l->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1127 PetscCall(MatDenseCUDAPlaceArray(l->A, a)); 1128 PetscFunctionReturn(0); 1129 } 1130 1131 static PetscErrorCode MatDenseCUDAResetArray_MPIDenseCUDA(Mat A) { 1132 Mat_MPIDense *l = (Mat_MPIDense *)A->data; 1133 1134 PetscFunctionBegin; 1135 PetscCheck(!l->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1136 PetscCheck(!l->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1137 PetscCall(MatDenseCUDAResetArray(l->A)); 1138 PetscFunctionReturn(0); 1139 } 1140 1141 static PetscErrorCode MatDenseCUDAReplaceArray_MPIDenseCUDA(Mat A, const PetscScalar *a) { 1142 Mat_MPIDense *l = (Mat_MPIDense *)A->data; 1143 1144 PetscFunctionBegin; 1145 PetscCheck(!l->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1146 PetscCheck(!l->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1147 PetscCall(MatDenseCUDAReplaceArray(l->A, a)); 1148 PetscFunctionReturn(0); 1149 } 1150 1151 static PetscErrorCode MatDenseCUDAGetArrayWrite_MPIDenseCUDA(Mat A, PetscScalar **a) { 1152 Mat_MPIDense *l = (Mat_MPIDense *)A->data; 1153 1154 PetscFunctionBegin; 1155 PetscCall(MatDenseCUDAGetArrayWrite(l->A, a)); 1156 PetscFunctionReturn(0); 1157 } 1158 1159 static PetscErrorCode MatDenseCUDARestoreArrayWrite_MPIDenseCUDA(Mat A, PetscScalar **a) { 1160 Mat_MPIDense *l = (Mat_MPIDense *)A->data; 1161 1162 PetscFunctionBegin; 1163 PetscCall(MatDenseCUDARestoreArrayWrite(l->A, a)); 1164 PetscFunctionReturn(0); 1165 } 1166 1167 static PetscErrorCode MatDenseCUDAGetArrayRead_MPIDenseCUDA(Mat A, const PetscScalar **a) { 1168 Mat_MPIDense *l = (Mat_MPIDense *)A->data; 1169 1170 PetscFunctionBegin; 1171 PetscCall(MatDenseCUDAGetArrayRead(l->A, a)); 1172 PetscFunctionReturn(0); 1173 } 1174 1175 static PetscErrorCode MatDenseCUDARestoreArrayRead_MPIDenseCUDA(Mat A, const PetscScalar **a) { 1176 Mat_MPIDense *l = (Mat_MPIDense *)A->data; 1177 1178 PetscFunctionBegin; 1179 PetscCall(MatDenseCUDARestoreArrayRead(l->A, a)); 1180 PetscFunctionReturn(0); 1181 } 1182 1183 static PetscErrorCode MatDenseCUDAGetArray_MPIDenseCUDA(Mat A, PetscScalar **a) { 1184 Mat_MPIDense *l = (Mat_MPIDense *)A->data; 1185 1186 PetscFunctionBegin; 1187 PetscCall(MatDenseCUDAGetArray(l->A, a)); 1188 PetscFunctionReturn(0); 1189 } 1190 1191 static PetscErrorCode MatDenseCUDARestoreArray_MPIDenseCUDA(Mat A, PetscScalar **a) { 1192 Mat_MPIDense *l = (Mat_MPIDense *)A->data; 1193 1194 PetscFunctionBegin; 1195 PetscCall(MatDenseCUDARestoreArray(l->A, a)); 1196 PetscFunctionReturn(0); 1197 } 1198 1199 static PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat, PetscInt, Vec *); 1200 static PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat, PetscInt, Vec *); 1201 static PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat, PetscInt, Vec *); 1202 static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat, PetscInt, Vec *); 1203 static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat, PetscInt, Vec *); 1204 static PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat, PetscInt, Vec *); 1205 static PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat, Mat *); 1206 1207 static PetscErrorCode MatBindToCPU_MPIDenseCUDA(Mat mat, PetscBool bind) { 1208 Mat_MPIDense *d = (Mat_MPIDense *)mat->data; 1209 1210 PetscFunctionBegin; 1211 PetscCheck(!d->vecinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1212 PetscCheck(!d->matinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1213 if (d->A) PetscCall(MatBindToCPU(d->A, bind)); 1214 mat->boundtocpu = bind; 1215 if (!bind) { 1216 PetscBool iscuda; 1217 1218 PetscCall(PetscFree(mat->defaultrandtype)); 1219 PetscCall(PetscStrallocpy(PETSCCURAND, &mat->defaultrandtype)); 1220 PetscCall(PetscObjectTypeCompare((PetscObject)d->cvec, VECMPICUDA, &iscuda)); 1221 if (!iscuda) PetscCall(VecDestroy(&d->cvec)); 1222 PetscCall(PetscObjectTypeCompare((PetscObject)d->cmat, MATMPIDENSECUDA, &iscuda)); 1223 if (!iscuda) PetscCall(MatDestroy(&d->cmat)); 1224 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", MatDenseGetColumnVec_MPIDenseCUDA)); 1225 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", MatDenseRestoreColumnVec_MPIDenseCUDA)); 1226 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", MatDenseGetColumnVecRead_MPIDenseCUDA)); 1227 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", MatDenseRestoreColumnVecRead_MPIDenseCUDA)); 1228 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", MatDenseGetColumnVecWrite_MPIDenseCUDA)); 1229 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", MatDenseRestoreColumnVecWrite_MPIDenseCUDA)); 1230 mat->ops->shift = MatShift_MPIDenseCUDA; 1231 } else { 1232 PetscCall(PetscFree(mat->defaultrandtype)); 1233 PetscCall(PetscStrallocpy(PETSCRANDER48, &mat->defaultrandtype)); 1234 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", MatDenseGetColumnVec_MPIDense)); 1235 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", MatDenseRestoreColumnVec_MPIDense)); 1236 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", MatDenseGetColumnVecRead_MPIDense)); 1237 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", MatDenseRestoreColumnVecRead_MPIDense)); 1238 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", MatDenseGetColumnVecWrite_MPIDense)); 1239 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", MatDenseRestoreColumnVecWrite_MPIDense)); 1240 mat->ops->shift = MatShift_MPIDense; 1241 } 1242 if (d->cmat) PetscCall(MatBindToCPU(d->cmat, bind)); 1243 PetscFunctionReturn(0); 1244 } 1245 1246 PetscErrorCode MatMPIDenseCUDASetPreallocation(Mat A, PetscScalar *d_data) { 1247 Mat_MPIDense *d = (Mat_MPIDense *)A->data; 1248 PetscBool iscuda; 1249 1250 PetscFunctionBegin; 1251 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 1252 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIDENSECUDA, &iscuda)); 1253 if (!iscuda) PetscFunctionReturn(0); 1254 PetscCall(PetscLayoutSetUp(A->rmap)); 1255 PetscCall(PetscLayoutSetUp(A->cmap)); 1256 if (!d->A) { 1257 PetscCall(MatCreate(PETSC_COMM_SELF, &d->A)); 1258 PetscCall(PetscLogObjectParent((PetscObject)A, (PetscObject)d->A)); 1259 PetscCall(MatSetSizes(d->A, A->rmap->n, A->cmap->N, A->rmap->n, A->cmap->N)); 1260 } 1261 PetscCall(MatSetType(d->A, MATSEQDENSECUDA)); 1262 PetscCall(MatSeqDenseCUDASetPreallocation(d->A, d_data)); 1263 A->preallocated = PETSC_TRUE; 1264 A->assembled = PETSC_TRUE; 1265 PetscFunctionReturn(0); 1266 } 1267 #endif 1268 1269 static PetscErrorCode MatSetRandom_MPIDense(Mat x, PetscRandom rctx) { 1270 Mat_MPIDense *d = (Mat_MPIDense *)x->data; 1271 1272 PetscFunctionBegin; 1273 PetscCall(MatSetRandom(d->A, rctx)); 1274 #if defined(PETSC_HAVE_DEVICE) 1275 x->offloadmask = d->A->offloadmask; 1276 #endif 1277 PetscFunctionReturn(0); 1278 } 1279 1280 static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A, PetscBool *missing, PetscInt *d) { 1281 PetscFunctionBegin; 1282 *missing = PETSC_FALSE; 1283 PetscFunctionReturn(0); 1284 } 1285 1286 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat, Mat, PetscReal, Mat); 1287 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat, Mat, Mat); 1288 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat, Mat, PetscReal, Mat); 1289 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat, Mat, Mat); 1290 static PetscErrorCode MatEqual_MPIDense(Mat, Mat, PetscBool *); 1291 static PetscErrorCode MatLoad_MPIDense(Mat, PetscViewer); 1292 1293 /* -------------------------------------------------------------------*/ 1294 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense, 1295 MatGetRow_MPIDense, 1296 MatRestoreRow_MPIDense, 1297 MatMult_MPIDense, 1298 /* 4*/ MatMultAdd_MPIDense, 1299 MatMultTranspose_MPIDense, 1300 MatMultTransposeAdd_MPIDense, 1301 NULL, 1302 NULL, 1303 NULL, 1304 /* 10*/ NULL, 1305 NULL, 1306 NULL, 1307 NULL, 1308 MatTranspose_MPIDense, 1309 /* 15*/ MatGetInfo_MPIDense, 1310 MatEqual_MPIDense, 1311 MatGetDiagonal_MPIDense, 1312 MatDiagonalScale_MPIDense, 1313 MatNorm_MPIDense, 1314 /* 20*/ MatAssemblyBegin_MPIDense, 1315 MatAssemblyEnd_MPIDense, 1316 MatSetOption_MPIDense, 1317 MatZeroEntries_MPIDense, 1318 /* 24*/ MatZeroRows_MPIDense, 1319 NULL, 1320 NULL, 1321 NULL, 1322 NULL, 1323 /* 29*/ MatSetUp_MPIDense, 1324 NULL, 1325 NULL, 1326 MatGetDiagonalBlock_MPIDense, 1327 NULL, 1328 /* 34*/ MatDuplicate_MPIDense, 1329 NULL, 1330 NULL, 1331 NULL, 1332 NULL, 1333 /* 39*/ MatAXPY_MPIDense, 1334 MatCreateSubMatrices_MPIDense, 1335 NULL, 1336 MatGetValues_MPIDense, 1337 MatCopy_MPIDense, 1338 /* 44*/ NULL, 1339 MatScale_MPIDense, 1340 MatShift_MPIDense, 1341 NULL, 1342 NULL, 1343 /* 49*/ MatSetRandom_MPIDense, 1344 NULL, 1345 NULL, 1346 NULL, 1347 NULL, 1348 /* 54*/ NULL, 1349 NULL, 1350 NULL, 1351 NULL, 1352 NULL, 1353 /* 59*/ MatCreateSubMatrix_MPIDense, 1354 MatDestroy_MPIDense, 1355 MatView_MPIDense, 1356 NULL, 1357 NULL, 1358 /* 64*/ NULL, 1359 NULL, 1360 NULL, 1361 NULL, 1362 NULL, 1363 /* 69*/ NULL, 1364 NULL, 1365 NULL, 1366 NULL, 1367 NULL, 1368 /* 74*/ NULL, 1369 NULL, 1370 NULL, 1371 NULL, 1372 NULL, 1373 /* 79*/ NULL, 1374 NULL, 1375 NULL, 1376 NULL, 1377 /* 83*/ MatLoad_MPIDense, 1378 NULL, 1379 NULL, 1380 NULL, 1381 NULL, 1382 NULL, 1383 /* 89*/ NULL, 1384 NULL, 1385 NULL, 1386 NULL, 1387 NULL, 1388 /* 94*/ NULL, 1389 NULL, 1390 MatMatTransposeMultSymbolic_MPIDense_MPIDense, 1391 MatMatTransposeMultNumeric_MPIDense_MPIDense, 1392 NULL, 1393 /* 99*/ MatProductSetFromOptions_MPIDense, 1394 NULL, 1395 NULL, 1396 MatConjugate_MPIDense, 1397 NULL, 1398 /*104*/ NULL, 1399 MatRealPart_MPIDense, 1400 MatImaginaryPart_MPIDense, 1401 NULL, 1402 NULL, 1403 /*109*/ NULL, 1404 NULL, 1405 NULL, 1406 MatGetColumnVector_MPIDense, 1407 MatMissingDiagonal_MPIDense, 1408 /*114*/ NULL, 1409 NULL, 1410 NULL, 1411 NULL, 1412 NULL, 1413 /*119*/ NULL, 1414 NULL, 1415 NULL, 1416 NULL, 1417 NULL, 1418 /*124*/ NULL, 1419 MatGetColumnReductions_MPIDense, 1420 NULL, 1421 NULL, 1422 NULL, 1423 /*129*/ NULL, 1424 NULL, 1425 MatTransposeMatMultSymbolic_MPIDense_MPIDense, 1426 MatTransposeMatMultNumeric_MPIDense_MPIDense, 1427 NULL, 1428 /*134*/ NULL, 1429 NULL, 1430 NULL, 1431 NULL, 1432 NULL, 1433 /*139*/ NULL, 1434 NULL, 1435 NULL, 1436 NULL, 1437 NULL, 1438 MatCreateMPIMatConcatenateSeqMat_MPIDense, 1439 /*145*/ NULL, 1440 NULL, 1441 NULL, 1442 NULL, 1443 NULL, 1444 /*150*/ NULL}; 1445 1446 PetscErrorCode MatMPIDenseSetPreallocation_MPIDense(Mat mat, PetscScalar *data) { 1447 Mat_MPIDense *a = (Mat_MPIDense *)mat->data; 1448 PetscBool iscuda; 1449 1450 PetscFunctionBegin; 1451 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1452 PetscCall(PetscLayoutSetUp(mat->rmap)); 1453 PetscCall(PetscLayoutSetUp(mat->cmap)); 1454 if (!a->A) { 1455 PetscCall(MatCreate(PETSC_COMM_SELF, &a->A)); 1456 PetscCall(PetscLogObjectParent((PetscObject)mat, (PetscObject)a->A)); 1457 PetscCall(MatSetSizes(a->A, mat->rmap->n, mat->cmap->N, mat->rmap->n, mat->cmap->N)); 1458 } 1459 PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPIDENSECUDA, &iscuda)); 1460 PetscCall(MatSetType(a->A, iscuda ? MATSEQDENSECUDA : MATSEQDENSE)); 1461 PetscCall(MatSeqDenseSetPreallocation(a->A, data)); 1462 #if defined(PETSC_HAVE_DEVICE) 1463 mat->offloadmask = a->A->offloadmask; 1464 #endif 1465 mat->preallocated = PETSC_TRUE; 1466 mat->assembled = PETSC_TRUE; 1467 PetscFunctionReturn(0); 1468 } 1469 1470 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIDense(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) { 1471 Mat B, C; 1472 1473 PetscFunctionBegin; 1474 PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &C)); 1475 PetscCall(MatConvert_SeqAIJ_SeqDense(C, MATSEQDENSE, MAT_INITIAL_MATRIX, &B)); 1476 PetscCall(MatDestroy(&C)); 1477 if (reuse == MAT_REUSE_MATRIX) { 1478 C = *newmat; 1479 } else C = NULL; 1480 PetscCall(MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A), B, A->cmap->n, !C ? MAT_INITIAL_MATRIX : MAT_REUSE_MATRIX, &C)); 1481 PetscCall(MatDestroy(&B)); 1482 if (reuse == MAT_INPLACE_MATRIX) { 1483 PetscCall(MatHeaderReplace(A, &C)); 1484 } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C; 1485 PetscFunctionReturn(0); 1486 } 1487 1488 PetscErrorCode MatConvert_MPIDense_MPIAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) { 1489 Mat B, C; 1490 1491 PetscFunctionBegin; 1492 PetscCall(MatDenseGetLocalMatrix(A, &C)); 1493 PetscCall(MatConvert_SeqDense_SeqAIJ(C, MATSEQAIJ, MAT_INITIAL_MATRIX, &B)); 1494 if (reuse == MAT_REUSE_MATRIX) { 1495 C = *newmat; 1496 } else C = NULL; 1497 PetscCall(MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A), B, A->cmap->n, !C ? MAT_INITIAL_MATRIX : MAT_REUSE_MATRIX, &C)); 1498 PetscCall(MatDestroy(&B)); 1499 if (reuse == MAT_INPLACE_MATRIX) { 1500 PetscCall(MatHeaderReplace(A, &C)); 1501 } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C; 1502 PetscFunctionReturn(0); 1503 } 1504 1505 #if defined(PETSC_HAVE_ELEMENTAL) 1506 PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) { 1507 Mat mat_elemental; 1508 PetscScalar *v; 1509 PetscInt m = A->rmap->n, N = A->cmap->N, rstart = A->rmap->rstart, i, *rows, *cols; 1510 1511 PetscFunctionBegin; 1512 if (reuse == MAT_REUSE_MATRIX) { 1513 mat_elemental = *newmat; 1514 PetscCall(MatZeroEntries(*newmat)); 1515 } else { 1516 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental)); 1517 PetscCall(MatSetSizes(mat_elemental, PETSC_DECIDE, PETSC_DECIDE, A->rmap->N, A->cmap->N)); 1518 PetscCall(MatSetType(mat_elemental, MATELEMENTAL)); 1519 PetscCall(MatSetUp(mat_elemental)); 1520 PetscCall(MatSetOption(mat_elemental, MAT_ROW_ORIENTED, PETSC_FALSE)); 1521 } 1522 1523 PetscCall(PetscMalloc2(m, &rows, N, &cols)); 1524 for (i = 0; i < N; i++) cols[i] = i; 1525 for (i = 0; i < m; i++) rows[i] = rstart + i; 1526 1527 /* PETSc-Elemental interface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */ 1528 PetscCall(MatDenseGetArray(A, &v)); 1529 PetscCall(MatSetValues(mat_elemental, m, rows, N, cols, v, ADD_VALUES)); 1530 PetscCall(MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY)); 1531 PetscCall(MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY)); 1532 PetscCall(MatDenseRestoreArray(A, &v)); 1533 PetscCall(PetscFree2(rows, cols)); 1534 1535 if (reuse == MAT_INPLACE_MATRIX) { 1536 PetscCall(MatHeaderReplace(A, &mat_elemental)); 1537 } else { 1538 *newmat = mat_elemental; 1539 } 1540 PetscFunctionReturn(0); 1541 } 1542 #endif 1543 1544 static PetscErrorCode MatDenseGetColumn_MPIDense(Mat A, PetscInt col, PetscScalar **vals) { 1545 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 1546 1547 PetscFunctionBegin; 1548 PetscCall(MatDenseGetColumn(mat->A, col, vals)); 1549 PetscFunctionReturn(0); 1550 } 1551 1552 static PetscErrorCode MatDenseRestoreColumn_MPIDense(Mat A, PetscScalar **vals) { 1553 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 1554 1555 PetscFunctionBegin; 1556 PetscCall(MatDenseRestoreColumn(mat->A, vals)); 1557 PetscFunctionReturn(0); 1558 } 1559 1560 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIDense(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat) { 1561 Mat_MPIDense *mat; 1562 PetscInt m, nloc, N; 1563 1564 PetscFunctionBegin; 1565 PetscCall(MatGetSize(inmat, &m, &N)); 1566 PetscCall(MatGetLocalSize(inmat, NULL, &nloc)); 1567 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 1568 PetscInt sum; 1569 1570 if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnership(comm, &n, &N)); 1571 /* Check sum(n) = N */ 1572 PetscCall(MPIU_Allreduce(&n, &sum, 1, MPIU_INT, MPI_SUM, comm)); 1573 PetscCheck(sum == N, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT, sum, N); 1574 1575 PetscCall(MatCreateDense(comm, m, n, PETSC_DETERMINE, N, NULL, outmat)); 1576 PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 1577 } 1578 1579 /* numeric phase */ 1580 mat = (Mat_MPIDense *)(*outmat)->data; 1581 PetscCall(MatCopy(inmat, mat->A, SAME_NONZERO_PATTERN)); 1582 PetscFunctionReturn(0); 1583 } 1584 1585 #if defined(PETSC_HAVE_CUDA) 1586 PetscErrorCode MatConvert_MPIDenseCUDA_MPIDense(Mat M, MatType type, MatReuse reuse, Mat *newmat) { 1587 Mat B; 1588 Mat_MPIDense *m; 1589 1590 PetscFunctionBegin; 1591 if (reuse == MAT_INITIAL_MATRIX) { 1592 PetscCall(MatDuplicate(M, MAT_COPY_VALUES, newmat)); 1593 } else if (reuse == MAT_REUSE_MATRIX) { 1594 PetscCall(MatCopy(M, *newmat, SAME_NONZERO_PATTERN)); 1595 } 1596 1597 B = *newmat; 1598 PetscCall(MatBindToCPU_MPIDenseCUDA(B, PETSC_TRUE)); 1599 PetscCall(PetscFree(B->defaultvectype)); 1600 PetscCall(PetscStrallocpy(VECSTANDARD, &B->defaultvectype)); 1601 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIDENSE)); 1602 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpidensecuda_mpidense_C", NULL)); 1603 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpidensecuda_C", NULL)); 1604 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C", NULL)); 1605 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpidensecuda_mpiaij_C", NULL)); 1606 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C", NULL)); 1607 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAGetArray_C", NULL)); 1608 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAGetArrayRead_C", NULL)); 1609 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAGetArrayWrite_C", NULL)); 1610 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDARestoreArray_C", NULL)); 1611 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDARestoreArrayRead_C", NULL)); 1612 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDARestoreArrayWrite_C", NULL)); 1613 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAPlaceArray_C", NULL)); 1614 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAResetArray_C", NULL)); 1615 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAReplaceArray_C", NULL)); 1616 m = (Mat_MPIDense *)(B)->data; 1617 if (m->A) PetscCall(MatConvert(m->A, MATSEQDENSE, MAT_INPLACE_MATRIX, &m->A)); 1618 B->ops->bindtocpu = NULL; 1619 B->offloadmask = PETSC_OFFLOAD_CPU; 1620 PetscFunctionReturn(0); 1621 } 1622 1623 PetscErrorCode MatConvert_MPIDense_MPIDenseCUDA(Mat M, MatType type, MatReuse reuse, Mat *newmat) { 1624 Mat B; 1625 Mat_MPIDense *m; 1626 1627 PetscFunctionBegin; 1628 if (reuse == MAT_INITIAL_MATRIX) { 1629 PetscCall(MatDuplicate(M, MAT_COPY_VALUES, newmat)); 1630 } else if (reuse == MAT_REUSE_MATRIX) { 1631 PetscCall(MatCopy(M, *newmat, SAME_NONZERO_PATTERN)); 1632 } 1633 1634 B = *newmat; 1635 PetscCall(PetscFree(B->defaultvectype)); 1636 PetscCall(PetscStrallocpy(VECCUDA, &B->defaultvectype)); 1637 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIDENSECUDA)); 1638 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpidensecuda_mpidense_C", MatConvert_MPIDenseCUDA_MPIDense)); 1639 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpidensecuda_C", MatProductSetFromOptions_MPIAIJ_MPIDense)); 1640 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C", MatProductSetFromOptions_MPIAIJ_MPIDense)); 1641 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpidensecuda_mpiaij_C", MatProductSetFromOptions_MPIDense_MPIAIJ)); 1642 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C", MatProductSetFromOptions_MPIDense_MPIAIJ)); 1643 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAGetArray_C", MatDenseCUDAGetArray_MPIDenseCUDA)); 1644 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAGetArrayRead_C", MatDenseCUDAGetArrayRead_MPIDenseCUDA)); 1645 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAGetArrayWrite_C", MatDenseCUDAGetArrayWrite_MPIDenseCUDA)); 1646 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDARestoreArray_C", MatDenseCUDARestoreArray_MPIDenseCUDA)); 1647 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDARestoreArrayRead_C", MatDenseCUDARestoreArrayRead_MPIDenseCUDA)); 1648 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDARestoreArrayWrite_C", MatDenseCUDARestoreArrayWrite_MPIDenseCUDA)); 1649 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAPlaceArray_C", MatDenseCUDAPlaceArray_MPIDenseCUDA)); 1650 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAResetArray_C", MatDenseCUDAResetArray_MPIDenseCUDA)); 1651 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAReplaceArray_C", MatDenseCUDAReplaceArray_MPIDenseCUDA)); 1652 m = (Mat_MPIDense *)(B->data); 1653 if (m->A) { 1654 PetscCall(MatConvert(m->A, MATSEQDENSECUDA, MAT_INPLACE_MATRIX, &m->A)); 1655 B->offloadmask = PETSC_OFFLOAD_BOTH; 1656 } else { 1657 B->offloadmask = PETSC_OFFLOAD_UNALLOCATED; 1658 } 1659 PetscCall(MatBindToCPU_MPIDenseCUDA(B, PETSC_FALSE)); 1660 1661 B->ops->bindtocpu = MatBindToCPU_MPIDenseCUDA; 1662 PetscFunctionReturn(0); 1663 } 1664 #endif 1665 1666 PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat A, PetscInt col, Vec *v) { 1667 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1668 PetscInt lda; 1669 1670 PetscFunctionBegin; 1671 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1672 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1673 if (!a->cvec) PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, &a->cvec)); 1674 a->vecinuse = col + 1; 1675 PetscCall(MatDenseGetLDA(a->A, &lda)); 1676 PetscCall(MatDenseGetArray(a->A, (PetscScalar **)&a->ptrinuse)); 1677 PetscCall(VecPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda)); 1678 *v = a->cvec; 1679 PetscFunctionReturn(0); 1680 } 1681 1682 PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat A, PetscInt col, Vec *v) { 1683 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1684 1685 PetscFunctionBegin; 1686 PetscCheck(a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first"); 1687 PetscCheck(a->cvec, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing internal column vector"); 1688 a->vecinuse = 0; 1689 PetscCall(MatDenseRestoreArray(a->A, (PetscScalar **)&a->ptrinuse)); 1690 PetscCall(VecResetArray(a->cvec)); 1691 if (v) *v = NULL; 1692 PetscFunctionReturn(0); 1693 } 1694 1695 PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat A, PetscInt col, Vec *v) { 1696 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1697 PetscInt lda; 1698 1699 PetscFunctionBegin; 1700 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1701 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1702 if (!a->cvec) PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, &a->cvec)); 1703 a->vecinuse = col + 1; 1704 PetscCall(MatDenseGetLDA(a->A, &lda)); 1705 PetscCall(MatDenseGetArrayRead(a->A, &a->ptrinuse)); 1706 PetscCall(VecPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda)); 1707 PetscCall(VecLockReadPush(a->cvec)); 1708 *v = a->cvec; 1709 PetscFunctionReturn(0); 1710 } 1711 1712 PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat A, PetscInt col, Vec *v) { 1713 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1714 1715 PetscFunctionBegin; 1716 PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first"); 1717 PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector"); 1718 a->vecinuse = 0; 1719 PetscCall(MatDenseRestoreArrayRead(a->A, &a->ptrinuse)); 1720 PetscCall(VecLockReadPop(a->cvec)); 1721 PetscCall(VecResetArray(a->cvec)); 1722 if (v) *v = NULL; 1723 PetscFunctionReturn(0); 1724 } 1725 1726 PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat A, PetscInt col, Vec *v) { 1727 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1728 PetscInt lda; 1729 1730 PetscFunctionBegin; 1731 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1732 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1733 if (!a->cvec) PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, &a->cvec)); 1734 a->vecinuse = col + 1; 1735 PetscCall(MatDenseGetLDA(a->A, &lda)); 1736 PetscCall(MatDenseGetArrayWrite(a->A, (PetscScalar **)&a->ptrinuse)); 1737 PetscCall(VecPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda)); 1738 *v = a->cvec; 1739 PetscFunctionReturn(0); 1740 } 1741 1742 PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat A, PetscInt col, Vec *v) { 1743 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1744 1745 PetscFunctionBegin; 1746 PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first"); 1747 PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector"); 1748 a->vecinuse = 0; 1749 PetscCall(MatDenseRestoreArrayWrite(a->A, (PetscScalar **)&a->ptrinuse)); 1750 PetscCall(VecResetArray(a->cvec)); 1751 if (v) *v = NULL; 1752 PetscFunctionReturn(0); 1753 } 1754 1755 PetscErrorCode MatDenseGetSubMatrix_MPIDense(Mat A, PetscInt rbegin, PetscInt rend, PetscInt cbegin, PetscInt cend, Mat *v) { 1756 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1757 Mat_MPIDense *c; 1758 MPI_Comm comm; 1759 PetscInt pbegin, pend; 1760 1761 PetscFunctionBegin; 1762 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 1763 PetscCheck(!a->vecinuse, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1764 PetscCheck(!a->matinuse, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1765 pbegin = PetscMax(0, PetscMin(A->rmap->rend, rbegin) - A->rmap->rstart); 1766 pend = PetscMin(A->rmap->n, PetscMax(0, rend - A->rmap->rstart)); 1767 if (!a->cmat) { 1768 PetscCall(MatCreate(comm, &a->cmat)); 1769 PetscCall(PetscLogObjectParent((PetscObject)A, (PetscObject)a->cmat)); 1770 PetscCall(MatSetType(a->cmat, ((PetscObject)A)->type_name)); 1771 if (rend - rbegin == A->rmap->N) PetscCall(PetscLayoutReference(A->rmap, &a->cmat->rmap)); 1772 else { 1773 PetscCall(PetscLayoutSetLocalSize(a->cmat->rmap, pend - pbegin)); 1774 PetscCall(PetscLayoutSetSize(a->cmat->rmap, rend - rbegin)); 1775 PetscCall(PetscLayoutSetUp(a->cmat->rmap)); 1776 } 1777 PetscCall(PetscLayoutSetSize(a->cmat->cmap, cend - cbegin)); 1778 PetscCall(PetscLayoutSetUp(a->cmat->cmap)); 1779 } else { 1780 PetscBool same = (PetscBool)(rend - rbegin == a->cmat->rmap->N); 1781 if (same && a->cmat->rmap->N != A->rmap->N) { 1782 same = (PetscBool)(pend - pbegin == a->cmat->rmap->n); 1783 PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &same, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A))); 1784 } 1785 if (!same) { 1786 PetscCall(PetscLayoutDestroy(&a->cmat->rmap)); 1787 PetscCall(PetscLayoutCreate(comm, &a->cmat->rmap)); 1788 PetscCall(PetscLayoutSetLocalSize(a->cmat->rmap, pend - pbegin)); 1789 PetscCall(PetscLayoutSetSize(a->cmat->rmap, rend - rbegin)); 1790 PetscCall(PetscLayoutSetUp(a->cmat->rmap)); 1791 } 1792 if (cend - cbegin != a->cmat->cmap->N) { 1793 PetscCall(PetscLayoutDestroy(&a->cmat->cmap)); 1794 PetscCall(PetscLayoutCreate(comm, &a->cmat->cmap)); 1795 PetscCall(PetscLayoutSetSize(a->cmat->cmap, cend - cbegin)); 1796 PetscCall(PetscLayoutSetUp(a->cmat->cmap)); 1797 } 1798 } 1799 c = (Mat_MPIDense *)a->cmat->data; 1800 PetscCheck(!c->A, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1801 PetscCall(MatDenseGetSubMatrix(a->A, pbegin, pend, cbegin, cend, &c->A)); 1802 1803 a->cmat->preallocated = PETSC_TRUE; 1804 a->cmat->assembled = PETSC_TRUE; 1805 #if defined(PETSC_HAVE_DEVICE) 1806 a->cmat->offloadmask = c->A->offloadmask; 1807 #endif 1808 a->matinuse = cbegin + 1; 1809 *v = a->cmat; 1810 PetscFunctionReturn(0); 1811 } 1812 1813 PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat A, Mat *v) { 1814 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1815 Mat_MPIDense *c; 1816 1817 PetscFunctionBegin; 1818 PetscCheck(a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetSubMatrix() first"); 1819 PetscCheck(a->cmat, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal matrix"); 1820 PetscCheck(*v == a->cmat, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Not the matrix obtained from MatDenseGetSubMatrix()"); 1821 a->matinuse = 0; 1822 c = (Mat_MPIDense *)a->cmat->data; 1823 PetscCall(MatDenseRestoreSubMatrix(a->A, &c->A)); 1824 if (v) *v = NULL; 1825 #if defined(PETSC_HAVE_DEVICE) 1826 A->offloadmask = a->A->offloadmask; 1827 #endif 1828 PetscFunctionReturn(0); 1829 } 1830 1831 /*MC 1832 MATMPIDENSE - MATMPIDENSE = "mpidense" - A matrix type to be used for distributed dense matrices. 1833 1834 Options Database Keys: 1835 . -mat_type mpidense - sets the matrix type to `MATMPIDENSE` during a call to `MatSetFromOptions()` 1836 1837 Level: beginner 1838 1839 .seealso: `MatCreateDense()`, `MATSEQDENSE`, `MATDENSE` 1840 M*/ 1841 PETSC_EXTERN PetscErrorCode MatCreate_MPIDense(Mat mat) { 1842 Mat_MPIDense *a; 1843 1844 PetscFunctionBegin; 1845 PetscCall(PetscNewLog(mat, &a)); 1846 mat->data = (void *)a; 1847 PetscCall(PetscMemcpy(mat->ops, &MatOps_Values, sizeof(struct _MatOps))); 1848 1849 mat->insertmode = NOT_SET_VALUES; 1850 1851 /* build cache for off array entries formed */ 1852 a->donotstash = PETSC_FALSE; 1853 1854 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)mat), 1, &mat->stash)); 1855 1856 /* stuff used for matrix vector multiply */ 1857 a->lvec = NULL; 1858 a->Mvctx = NULL; 1859 a->roworiented = PETSC_TRUE; 1860 1861 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetLDA_C", MatDenseGetLDA_MPIDense)); 1862 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseSetLDA_C", MatDenseSetLDA_MPIDense)); 1863 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArray_C", MatDenseGetArray_MPIDense)); 1864 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArray_C", MatDenseRestoreArray_MPIDense)); 1865 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayRead_C", MatDenseGetArrayRead_MPIDense)); 1866 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayRead_C", MatDenseRestoreArrayRead_MPIDense)); 1867 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayWrite_C", MatDenseGetArrayWrite_MPIDense)); 1868 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayWrite_C", MatDenseRestoreArrayWrite_MPIDense)); 1869 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDensePlaceArray_C", MatDensePlaceArray_MPIDense)); 1870 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseResetArray_C", MatDenseResetArray_MPIDense)); 1871 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseReplaceArray_C", MatDenseReplaceArray_MPIDense)); 1872 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", MatDenseGetColumnVec_MPIDense)); 1873 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", MatDenseRestoreColumnVec_MPIDense)); 1874 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", MatDenseGetColumnVecRead_MPIDense)); 1875 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", MatDenseRestoreColumnVecRead_MPIDense)); 1876 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", MatDenseGetColumnVecWrite_MPIDense)); 1877 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", MatDenseRestoreColumnVecWrite_MPIDense)); 1878 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetSubMatrix_C", MatDenseGetSubMatrix_MPIDense)); 1879 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreSubMatrix_C", MatDenseRestoreSubMatrix_MPIDense)); 1880 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", MatConvert_MPIAIJ_MPIDense)); 1881 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpiaij_C", MatConvert_MPIDense_MPIAIJ)); 1882 #if defined(PETSC_HAVE_ELEMENTAL) 1883 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_elemental_C", MatConvert_MPIDense_Elemental)); 1884 #endif 1885 #if defined(PETSC_HAVE_SCALAPACK) 1886 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_scalapack_C", MatConvert_Dense_ScaLAPACK)); 1887 #endif 1888 #if defined(PETSC_HAVE_CUDA) 1889 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensecuda_C", MatConvert_MPIDense_MPIDenseCUDA)); 1890 #endif 1891 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIDenseSetPreallocation_C", MatMPIDenseSetPreallocation_MPIDense)); 1892 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense)); 1893 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaij_C", MatProductSetFromOptions_MPIDense_MPIAIJ)); 1894 #if defined(PETSC_HAVE_CUDA) 1895 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense)); 1896 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijcusparse_C", MatProductSetFromOptions_MPIDense_MPIAIJ)); 1897 #endif 1898 1899 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumn_C", MatDenseGetColumn_MPIDense)); 1900 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumn_C", MatDenseRestoreColumn_MPIDense)); 1901 PetscCall(PetscObjectChangeTypeName((PetscObject)mat, MATMPIDENSE)); 1902 PetscFunctionReturn(0); 1903 } 1904 1905 /*MC 1906 MATMPIDENSECUDA - MATMPIDENSECUDA = "mpidensecuda" - A matrix type to be used for distributed dense matrices on GPUs. 1907 1908 Options Database Keys: 1909 . -mat_type mpidensecuda - sets the matrix type to `MATMPIDENSECUDA` during a call to `MatSetFromOptions()` 1910 1911 Level: beginner 1912 1913 .seealso: `MATMPIDENSE`, `MATSEQDENSE`, `MATSEQDENSECUDA` 1914 M*/ 1915 #if defined(PETSC_HAVE_CUDA) 1916 #include <petsc/private/deviceimpl.h> 1917 PETSC_EXTERN PetscErrorCode MatCreate_MPIDenseCUDA(Mat B) { 1918 PetscFunctionBegin; 1919 PetscCall(PetscDeviceInitialize(PETSC_DEVICE_CUDA)); 1920 PetscCall(MatCreate_MPIDense(B)); 1921 PetscCall(MatConvert_MPIDense_MPIDenseCUDA(B, MATMPIDENSECUDA, MAT_INPLACE_MATRIX, &B)); 1922 PetscFunctionReturn(0); 1923 } 1924 #endif 1925 1926 /*MC 1927 MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices. 1928 1929 This matrix type is identical to `MATSEQDENSE` when constructed with a single process communicator, 1930 and `MATMPIDENSE` otherwise. 1931 1932 Options Database Keys: 1933 . -mat_type dense - sets the matrix type to `MATDENSE` during a call to `MatSetFromOptions()` 1934 1935 Level: beginner 1936 1937 .seealso: `MATSEQDENSE`, `MATMPIDENSE`, `MATDENSECUDA` 1938 M*/ 1939 1940 /*MC 1941 MATDENSECUDA - MATDENSECUDA = "densecuda" - A matrix type to be used for dense matrices on GPUs. 1942 1943 This matrix type is identical to `MATSEQDENSECUDA` when constructed with a single process communicator, 1944 and `MATMPIDENSECUDA` otherwise. 1945 1946 Options Database Keys: 1947 . -mat_type densecuda - sets the matrix type to `MATDENSECUDA` during a call to `MatSetFromOptions()` 1948 1949 Level: beginner 1950 1951 .seealso: `MATSEQDENSECUDA`, `MATMPIDENSECUDA`, `MATDENSE` 1952 M*/ 1953 1954 /*@C 1955 MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries 1956 1957 Collective 1958 1959 Input Parameters: 1960 . B - the matrix 1961 - data - optional location of matrix data. Set data=NULL for PETSc 1962 to control all matrix memory allocation. 1963 1964 Notes: 1965 The dense format is fully compatible with standard Fortran 77 1966 storage by columns. 1967 1968 The data input variable is intended primarily for Fortran programmers 1969 who wish to allocate their own matrix memory space. Most users should 1970 set data=NULL. 1971 1972 Level: intermediate 1973 1974 .seealso: `MATMPIDENSE`, `MatCreate()`, `MatCreateSeqDense()`, `MatSetValues()` 1975 @*/ 1976 PetscErrorCode MatMPIDenseSetPreallocation(Mat B, PetscScalar *data) { 1977 PetscFunctionBegin; 1978 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 1979 PetscTryMethod(B, "MatMPIDenseSetPreallocation_C", (Mat, PetscScalar *), (B, data)); 1980 PetscFunctionReturn(0); 1981 } 1982 1983 /*@ 1984 MatDensePlaceArray - Allows one to replace the array in a `MATDENSE` matrix with an 1985 array provided by the user. This is useful to avoid copying an array 1986 into a matrix 1987 1988 Not Collective 1989 1990 Input Parameters: 1991 + mat - the matrix 1992 - array - the array in column major order 1993 1994 Note: 1995 You can return to the original array with a call to `MatDenseResetArray()`. The user is responsible for freeing this array; it will not be 1996 freed when the matrix is destroyed. 1997 1998 Level: developer 1999 2000 .seealso: `MATDENSE`, `MatDenseGetArray()`, `MatDenseResetArray()`, `VecPlaceArray()`, `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`, `VecResetArray()`, 2001 `MatDenseReplaceArray()` 2002 @*/ 2003 PetscErrorCode MatDensePlaceArray(Mat mat, const PetscScalar *array) { 2004 PetscFunctionBegin; 2005 PetscValidHeaderSpecific(mat, MAT_CLASSID, 1); 2006 PetscUseMethod(mat, "MatDensePlaceArray_C", (Mat, const PetscScalar *), (mat, array)); 2007 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 2008 #if defined(PETSC_HAVE_DEVICE) 2009 mat->offloadmask = PETSC_OFFLOAD_CPU; 2010 #endif 2011 PetscFunctionReturn(0); 2012 } 2013 2014 /*@ 2015 MatDenseResetArray - Resets the matrix array to that it previously had before the call to `MatDensePlaceArray()` 2016 2017 Not Collective 2018 2019 Input Parameters: 2020 . mat - the matrix 2021 2022 Note: 2023 You can only call this after a call to `MatDensePlaceArray()` 2024 2025 Level: developer 2026 2027 .seealso: `MATDENSE`, `MatDenseGetArray()`, `MatDensePlaceArray()`, `VecPlaceArray()`, `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`, `VecResetArray()` 2028 @*/ 2029 PetscErrorCode MatDenseResetArray(Mat mat) { 2030 PetscFunctionBegin; 2031 PetscValidHeaderSpecific(mat, MAT_CLASSID, 1); 2032 PetscUseMethod(mat, "MatDenseResetArray_C", (Mat), (mat)); 2033 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 2034 PetscFunctionReturn(0); 2035 } 2036 2037 /*@ 2038 MatDenseReplaceArray - Allows one to replace the array in a dense matrix with an 2039 array provided by the user. This is useful to avoid copying an array 2040 into a matrix 2041 2042 Not Collective 2043 2044 Input Parameters: 2045 + mat - the matrix 2046 - array - the array in column major order 2047 2048 Note: 2049 The memory passed in MUST be obtained with `PetscMalloc()` and CANNOT be 2050 freed by the user. It will be freed when the matrix is destroyed. 2051 2052 Level: developer 2053 2054 .seealso: `MatDensePlaceArray()`, `MatDenseGetArray()`, `VecReplaceArray()` 2055 @*/ 2056 PetscErrorCode MatDenseReplaceArray(Mat mat, const PetscScalar *array) { 2057 PetscFunctionBegin; 2058 PetscValidHeaderSpecific(mat, MAT_CLASSID, 1); 2059 PetscUseMethod(mat, "MatDenseReplaceArray_C", (Mat, const PetscScalar *), (mat, array)); 2060 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 2061 #if defined(PETSC_HAVE_DEVICE) 2062 mat->offloadmask = PETSC_OFFLOAD_CPU; 2063 #endif 2064 PetscFunctionReturn(0); 2065 } 2066 2067 #if defined(PETSC_HAVE_CUDA) 2068 /*@C 2069 MatDenseCUDAPlaceArray - Allows one to replace the GPU array in a `MATDENSECUDA` matrix with an 2070 array provided by the user. This is useful to avoid copying an array 2071 into a matrix 2072 2073 Not Collective 2074 2075 Input Parameters: 2076 + mat - the matrix 2077 - array - the array in column major order 2078 2079 Note: 2080 You can return to the original array with a call to `MatDenseCUDAResetArray()`. The user is responsible for freeing this array; it will not be 2081 freed when the matrix is destroyed. The array must have been allocated with cudaMalloc(). 2082 2083 Level: developer 2084 2085 .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDAResetArray()`, `MatDenseCUDAReplaceArray()` 2086 @*/ 2087 PetscErrorCode MatDenseCUDAPlaceArray(Mat mat, const PetscScalar *array) { 2088 PetscFunctionBegin; 2089 PetscValidHeaderSpecific(mat, MAT_CLASSID, 1); 2090 PetscUseMethod(mat, "MatDenseCUDAPlaceArray_C", (Mat, const PetscScalar *), (mat, array)); 2091 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 2092 mat->offloadmask = PETSC_OFFLOAD_GPU; 2093 PetscFunctionReturn(0); 2094 } 2095 2096 /*@C 2097 MatDenseCUDAResetArray - Resets the matrix array to that it previously had before the call to `MatDenseCUDAPlaceArray()` 2098 2099 Not Collective 2100 2101 Input Parameters: 2102 . mat - the matrix 2103 2104 Note: 2105 You can only call this after a call to `MatDenseCUDAPlaceArray()` 2106 2107 Level: developer 2108 2109 .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDAPlaceArray()` 2110 @*/ 2111 PetscErrorCode MatDenseCUDAResetArray(Mat mat) { 2112 PetscFunctionBegin; 2113 PetscValidHeaderSpecific(mat, MAT_CLASSID, 1); 2114 PetscUseMethod(mat, "MatDenseCUDAResetArray_C", (Mat), (mat)); 2115 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 2116 PetscFunctionReturn(0); 2117 } 2118 2119 /*@C 2120 MatDenseCUDAReplaceArray - Allows one to replace the GPU array in a `MATDENSECUDA` matrix with an 2121 array provided by the user. This is useful to avoid copying an array 2122 into a matrix 2123 2124 Not Collective 2125 2126 Input Parameters: 2127 + mat - the matrix 2128 - array - the array in column major order 2129 2130 Note: 2131 This permanently replaces the GPU array and frees the memory associated with the old GPU array. 2132 The memory passed in CANNOT be freed by the user. It will be freed 2133 when the matrix is destroyed. The array should respect the matrix leading dimension. 2134 2135 Level: developer 2136 2137 .seealso: `MatDenseCUDAGetArray()`, `MatDenseCUDAPlaceArray()`, `MatDenseCUDAResetArray()` 2138 @*/ 2139 PetscErrorCode MatDenseCUDAReplaceArray(Mat mat, const PetscScalar *array) { 2140 PetscFunctionBegin; 2141 PetscValidHeaderSpecific(mat, MAT_CLASSID, 1); 2142 PetscUseMethod(mat, "MatDenseCUDAReplaceArray_C", (Mat, const PetscScalar *), (mat, array)); 2143 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 2144 mat->offloadmask = PETSC_OFFLOAD_GPU; 2145 PetscFunctionReturn(0); 2146 } 2147 2148 /*@C 2149 MatDenseCUDAGetArrayWrite - Provides write access to the CUDA buffer inside a `MATDENSECUDA` matrix. 2150 2151 Not Collective 2152 2153 Input Parameters: 2154 . A - the matrix 2155 2156 Output Parameters 2157 . array - the GPU array in column major order 2158 2159 Notes: 2160 The data on the GPU may not be updated due to operations done on the CPU. If you need updated data, use `MatDenseCUDAGetArray()`. 2161 2162 The array must be restored with `MatDenseCUDARestoreArrayWrite()` when no longer needed. 2163 2164 Level: developer 2165 2166 .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`, `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayRead()`, `MatDenseCUDARestoreArrayRead()` 2167 @*/ 2168 PetscErrorCode MatDenseCUDAGetArrayWrite(Mat A, PetscScalar **a) { 2169 PetscFunctionBegin; 2170 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 2171 PetscUseMethod(A, "MatDenseCUDAGetArrayWrite_C", (Mat, PetscScalar **), (A, a)); 2172 PetscCall(PetscObjectStateIncrease((PetscObject)A)); 2173 PetscFunctionReturn(0); 2174 } 2175 2176 /*@C 2177 MatDenseCUDARestoreArrayWrite - Restore write access to the CUDA buffer inside a `MATDENSECUDA` matrix previously obtained with `MatDenseCUDAGetArrayWrite()`. 2178 2179 Not Collective 2180 2181 Input Parameters: 2182 + A - the matrix 2183 - array - the GPU array in column major order 2184 2185 Level: developer 2186 2187 .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDARestoreArrayRead()`, `MatDenseCUDAGetArrayRead()` 2188 @*/ 2189 PetscErrorCode MatDenseCUDARestoreArrayWrite(Mat A, PetscScalar **a) { 2190 PetscFunctionBegin; 2191 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 2192 PetscUseMethod(A, "MatDenseCUDARestoreArrayWrite_C", (Mat, PetscScalar **), (A, a)); 2193 PetscCall(PetscObjectStateIncrease((PetscObject)A)); 2194 A->offloadmask = PETSC_OFFLOAD_GPU; 2195 PetscFunctionReturn(0); 2196 } 2197 2198 /*@C 2199 MatDenseCUDAGetArrayRead - Provides read-only access to the CUDA buffer inside a `MATDENSECUDA` matrix. The array must be restored with `MatDenseCUDARestoreArrayRead()` when no longer needed. 2200 2201 Not Collective 2202 2203 Input Parameters: 2204 . A - the matrix 2205 2206 Output Parameters 2207 . array - the GPU array in column major order 2208 2209 Note: 2210 Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use `MatDenseCUDAGetArrayWrite()`. 2211 2212 Level: developer 2213 2214 .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`, `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDARestoreArrayRead()` 2215 @*/ 2216 PetscErrorCode MatDenseCUDAGetArrayRead(Mat A, const PetscScalar **a) { 2217 PetscFunctionBegin; 2218 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 2219 PetscUseMethod(A, "MatDenseCUDAGetArrayRead_C", (Mat, const PetscScalar **), (A, a)); 2220 PetscFunctionReturn(0); 2221 } 2222 2223 /*@C 2224 MatDenseCUDARestoreArrayRead - Restore read-only access to the CUDA buffer inside a `MATDENSECUDA` matrix previously obtained with a call to `MatDenseCUDAGetArrayRead()`. 2225 2226 Not Collective 2227 2228 Input Parameters: 2229 + A - the matrix 2230 - array - the GPU array in column major order 2231 2232 Note: 2233 Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use `MatDenseCUDAGetArrayWrite()`. 2234 2235 Level: developer 2236 2237 .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`, `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDAGetArrayRead()` 2238 @*/ 2239 PetscErrorCode MatDenseCUDARestoreArrayRead(Mat A, const PetscScalar **a) { 2240 PetscFunctionBegin; 2241 PetscUseMethod(A, "MatDenseCUDARestoreArrayRead_C", (Mat, const PetscScalar **), (A, a)); 2242 PetscFunctionReturn(0); 2243 } 2244 2245 /*@C 2246 MatDenseCUDAGetArray - Provides access to the CUDA buffer inside a `MATDENSECUDA` matrix. The array must be restored with `MatDenseCUDARestoreArray()` when no longer needed. 2247 2248 Not Collective 2249 2250 Input Parameters: 2251 . A - the matrix 2252 2253 Output Parameters 2254 . array - the GPU array in column major order 2255 2256 Note: 2257 Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use `MatDenseCUDAGetArrayWrite()`. For read-only access, use `MatDenseCUDAGetArrayRead()`. 2258 2259 Level: developer 2260 2261 .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArrayRead()`, `MatDenseCUDARestoreArray()`, `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDARestoreArrayRead()` 2262 @*/ 2263 PetscErrorCode MatDenseCUDAGetArray(Mat A, PetscScalar **a) { 2264 PetscFunctionBegin; 2265 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 2266 PetscUseMethod(A, "MatDenseCUDAGetArray_C", (Mat, PetscScalar **), (A, a)); 2267 PetscCall(PetscObjectStateIncrease((PetscObject)A)); 2268 PetscFunctionReturn(0); 2269 } 2270 2271 /*@C 2272 MatDenseCUDARestoreArray - Restore access to the CUDA buffer inside a `MATDENSECUDA` matrix previously obtained with `MatDenseCUDAGetArray()`. 2273 2274 Not Collective 2275 2276 Input Parameters: 2277 + A - the matrix 2278 - array - the GPU array in column major order 2279 2280 Level: developer 2281 2282 .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDARestoreArrayRead()`, `MatDenseCUDAGetArrayRead()` 2283 @*/ 2284 PetscErrorCode MatDenseCUDARestoreArray(Mat A, PetscScalar **a) { 2285 PetscFunctionBegin; 2286 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 2287 PetscUseMethod(A, "MatDenseCUDARestoreArray_C", (Mat, PetscScalar **), (A, a)); 2288 PetscCall(PetscObjectStateIncrease((PetscObject)A)); 2289 A->offloadmask = PETSC_OFFLOAD_GPU; 2290 PetscFunctionReturn(0); 2291 } 2292 #endif 2293 2294 /*@C 2295 MatCreateDense - Creates a matrix in `MATDENSE` format. 2296 2297 Collective 2298 2299 Input Parameters: 2300 + comm - MPI communicator 2301 . m - number of local rows (or `PETSC_DECIDE` to have calculated if M is given) 2302 . n - number of local columns (or `PETSC_DECIDE` to have calculated if N is given) 2303 . M - number of global rows (or `PETSC_DECIDE` to have calculated if m is given) 2304 . N - number of global columns (or `PETSC_DECIDE` to have calculated if n is given) 2305 - data - optional location of matrix data. Set data to NULL (`PETSC_NULL_SCALAR` for Fortran users) for PETSc 2306 to control all matrix memory allocation. 2307 2308 Output Parameter: 2309 . A - the matrix 2310 2311 Notes: 2312 The dense format is fully compatible with standard Fortran 77 2313 storage by columns. 2314 2315 Although local portions of the matrix are stored in column-major 2316 order, the matrix is partitioned across MPI ranks by row. 2317 2318 The data input variable is intended primarily for Fortran programmers 2319 who wish to allocate their own matrix memory space. Most users should 2320 set data=NULL (`PETSC_NULL_SCALAR` for Fortran users). 2321 2322 The user MUST specify either the local or global matrix dimensions 2323 (possibly both). 2324 2325 Level: intermediate 2326 2327 .seealso: `MATDENSE`, `MatCreate()`, `MatCreateSeqDense()`, `MatSetValues()` 2328 @*/ 2329 PetscErrorCode MatCreateDense(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscScalar *data, Mat *A) { 2330 PetscFunctionBegin; 2331 PetscCall(MatCreate(comm, A)); 2332 PetscCall(MatSetSizes(*A, m, n, M, N)); 2333 PetscCall(MatSetType(*A, MATDENSE)); 2334 PetscCall(MatSeqDenseSetPreallocation(*A, data)); 2335 PetscCall(MatMPIDenseSetPreallocation(*A, data)); 2336 PetscFunctionReturn(0); 2337 } 2338 2339 #if defined(PETSC_HAVE_CUDA) 2340 /*@C 2341 MatCreateDenseCUDA - Creates a matrix in `MATDENSECUDA` format using CUDA. 2342 2343 Collective 2344 2345 Input Parameters: 2346 + comm - MPI communicator 2347 . m - number of local rows (or `PETSC_DECIDE` to have calculated if M is given) 2348 . n - number of local columns (or `PETSC_DECIDE` to have calculated if N is given) 2349 . M - number of global rows (or `PETSC_DECIDE` to have calculated if m is given) 2350 . N - number of global columns (or `PETSC_DECIDE` to have calculated if n is given) 2351 - data - optional location of GPU matrix data. Set data=NULL for PETSc 2352 to control matrix memory allocation. 2353 2354 Output Parameter: 2355 . A - the matrix 2356 2357 Level: intermediate 2358 2359 .seealso: `MATDENSECUDA`, `MatCreate()`, `MatCreateDense()` 2360 @*/ 2361 PetscErrorCode MatCreateDenseCUDA(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscScalar *data, Mat *A) { 2362 PetscFunctionBegin; 2363 PetscCall(MatCreate(comm, A)); 2364 PetscValidLogicalCollectiveBool(*A, !!data, 6); 2365 PetscCall(MatSetSizes(*A, m, n, M, N)); 2366 PetscCall(MatSetType(*A, MATDENSECUDA)); 2367 PetscCall(MatSeqDenseCUDASetPreallocation(*A, data)); 2368 PetscCall(MatMPIDenseCUDASetPreallocation(*A, data)); 2369 PetscFunctionReturn(0); 2370 } 2371 #endif 2372 2373 static PetscErrorCode MatDuplicate_MPIDense(Mat A, MatDuplicateOption cpvalues, Mat *newmat) { 2374 Mat mat; 2375 Mat_MPIDense *a, *oldmat = (Mat_MPIDense *)A->data; 2376 2377 PetscFunctionBegin; 2378 *newmat = NULL; 2379 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat)); 2380 PetscCall(MatSetSizes(mat, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 2381 PetscCall(MatSetType(mat, ((PetscObject)A)->type_name)); 2382 a = (Mat_MPIDense *)mat->data; 2383 2384 mat->factortype = A->factortype; 2385 mat->assembled = PETSC_TRUE; 2386 mat->preallocated = PETSC_TRUE; 2387 2388 mat->insertmode = NOT_SET_VALUES; 2389 a->donotstash = oldmat->donotstash; 2390 2391 PetscCall(PetscLayoutReference(A->rmap, &mat->rmap)); 2392 PetscCall(PetscLayoutReference(A->cmap, &mat->cmap)); 2393 2394 PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A)); 2395 PetscCall(PetscLogObjectParent((PetscObject)mat, (PetscObject)a->A)); 2396 2397 *newmat = mat; 2398 PetscFunctionReturn(0); 2399 } 2400 2401 PetscErrorCode MatLoad_MPIDense(Mat newMat, PetscViewer viewer) { 2402 PetscBool isbinary; 2403 #if defined(PETSC_HAVE_HDF5) 2404 PetscBool ishdf5; 2405 #endif 2406 2407 PetscFunctionBegin; 2408 PetscValidHeaderSpecific(newMat, MAT_CLASSID, 1); 2409 PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2); 2410 /* force binary viewer to load .info file if it has not yet done so */ 2411 PetscCall(PetscViewerSetUp(viewer)); 2412 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 2413 #if defined(PETSC_HAVE_HDF5) 2414 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5)); 2415 #endif 2416 if (isbinary) { 2417 PetscCall(MatLoad_Dense_Binary(newMat, viewer)); 2418 #if defined(PETSC_HAVE_HDF5) 2419 } else if (ishdf5) { 2420 PetscCall(MatLoad_Dense_HDF5(newMat, viewer)); 2421 #endif 2422 } else SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name); 2423 PetscFunctionReturn(0); 2424 } 2425 2426 static PetscErrorCode MatEqual_MPIDense(Mat A, Mat B, PetscBool *flag) { 2427 Mat_MPIDense *matB = (Mat_MPIDense *)B->data, *matA = (Mat_MPIDense *)A->data; 2428 Mat a, b; 2429 2430 PetscFunctionBegin; 2431 a = matA->A; 2432 b = matB->A; 2433 PetscCall(MatEqual(a, b, flag)); 2434 PetscCall(MPIU_Allreduce(MPI_IN_PLACE, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A))); 2435 PetscFunctionReturn(0); 2436 } 2437 2438 PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(void *data) { 2439 Mat_TransMatMultDense *atb = (Mat_TransMatMultDense *)data; 2440 2441 PetscFunctionBegin; 2442 PetscCall(PetscFree2(atb->sendbuf, atb->recvcounts)); 2443 PetscCall(MatDestroy(&atb->atb)); 2444 PetscCall(PetscFree(atb)); 2445 PetscFunctionReturn(0); 2446 } 2447 2448 PetscErrorCode MatDestroy_MatMatTransMult_MPIDense_MPIDense(void *data) { 2449 Mat_MatTransMultDense *abt = (Mat_MatTransMultDense *)data; 2450 2451 PetscFunctionBegin; 2452 PetscCall(PetscFree2(abt->buf[0], abt->buf[1])); 2453 PetscCall(PetscFree2(abt->recvcounts, abt->recvdispls)); 2454 PetscCall(PetscFree(abt)); 2455 PetscFunctionReturn(0); 2456 } 2457 2458 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C) { 2459 Mat_MPIDense *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data; 2460 Mat_TransMatMultDense *atb; 2461 MPI_Comm comm; 2462 PetscMPIInt size, *recvcounts; 2463 PetscScalar *carray, *sendbuf; 2464 const PetscScalar *atbarray; 2465 PetscInt i, cN = C->cmap->N, proc, k, j, lda; 2466 const PetscInt *ranges; 2467 2468 PetscFunctionBegin; 2469 MatCheckProduct(C, 3); 2470 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 2471 atb = (Mat_TransMatMultDense *)C->product->data; 2472 recvcounts = atb->recvcounts; 2473 sendbuf = atb->sendbuf; 2474 2475 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 2476 PetscCallMPI(MPI_Comm_size(comm, &size)); 2477 2478 /* compute atbarray = aseq^T * bseq */ 2479 PetscCall(MatTransposeMatMult(a->A, b->A, atb->atb ? MAT_REUSE_MATRIX : MAT_INITIAL_MATRIX, PETSC_DEFAULT, &atb->atb)); 2480 2481 PetscCall(MatGetOwnershipRanges(C, &ranges)); 2482 2483 /* arrange atbarray into sendbuf */ 2484 PetscCall(MatDenseGetArrayRead(atb->atb, &atbarray)); 2485 PetscCall(MatDenseGetLDA(atb->atb, &lda)); 2486 for (proc = 0, k = 0; proc < size; proc++) { 2487 for (j = 0; j < cN; j++) { 2488 for (i = ranges[proc]; i < ranges[proc + 1]; i++) sendbuf[k++] = atbarray[i + j * lda]; 2489 } 2490 } 2491 PetscCall(MatDenseRestoreArrayRead(atb->atb, &atbarray)); 2492 2493 /* sum all atbarray to local values of C */ 2494 PetscCall(MatDenseGetArrayWrite(c->A, &carray)); 2495 PetscCallMPI(MPI_Reduce_scatter(sendbuf, carray, recvcounts, MPIU_SCALAR, MPIU_SUM, comm)); 2496 PetscCall(MatDenseRestoreArrayWrite(c->A, &carray)); 2497 PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY)); 2498 PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY)); 2499 PetscFunctionReturn(0); 2500 } 2501 2502 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C) { 2503 MPI_Comm comm; 2504 PetscMPIInt size; 2505 PetscInt cm = A->cmap->n, cM, cN = B->cmap->N; 2506 Mat_TransMatMultDense *atb; 2507 PetscBool cisdense; 2508 PetscInt i; 2509 const PetscInt *ranges; 2510 2511 PetscFunctionBegin; 2512 MatCheckProduct(C, 4); 2513 PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty"); 2514 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 2515 if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) { 2516 SETERRQ(comm, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend); 2517 } 2518 2519 /* create matrix product C */ 2520 PetscCall(MatSetSizes(C, cm, B->cmap->n, A->cmap->N, B->cmap->N)); 2521 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSECUDA, "")); 2522 if (!cisdense) PetscCall(MatSetType(C, ((PetscObject)A)->type_name)); 2523 PetscCall(MatSetUp(C)); 2524 2525 /* create data structure for reuse C */ 2526 PetscCallMPI(MPI_Comm_size(comm, &size)); 2527 PetscCall(PetscNew(&atb)); 2528 cM = C->rmap->N; 2529 PetscCall(PetscMalloc2(cM * cN, &atb->sendbuf, size, &atb->recvcounts)); 2530 PetscCall(MatGetOwnershipRanges(C, &ranges)); 2531 for (i = 0; i < size; i++) atb->recvcounts[i] = (ranges[i + 1] - ranges[i]) * cN; 2532 2533 C->product->data = atb; 2534 C->product->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense; 2535 PetscFunctionReturn(0); 2536 } 2537 2538 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C) { 2539 MPI_Comm comm; 2540 PetscMPIInt i, size; 2541 PetscInt maxRows, bufsiz; 2542 PetscMPIInt tag; 2543 PetscInt alg; 2544 Mat_MatTransMultDense *abt; 2545 Mat_Product *product = C->product; 2546 PetscBool flg; 2547 2548 PetscFunctionBegin; 2549 MatCheckProduct(C, 4); 2550 PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty"); 2551 /* check local size of A and B */ 2552 PetscCheck(A->cmap->n == B->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local column dimensions are incompatible, A (%" PetscInt_FMT ") != B (%" PetscInt_FMT ")", A->cmap->n, B->cmap->n); 2553 2554 PetscCall(PetscStrcmp(product->alg, "allgatherv", &flg)); 2555 alg = flg ? 0 : 1; 2556 2557 /* setup matrix product C */ 2558 PetscCall(MatSetSizes(C, A->rmap->n, B->rmap->n, A->rmap->N, B->rmap->N)); 2559 PetscCall(MatSetType(C, MATMPIDENSE)); 2560 PetscCall(MatSetUp(C)); 2561 PetscCall(PetscObjectGetNewTag((PetscObject)C, &tag)); 2562 2563 /* create data structure for reuse C */ 2564 PetscCall(PetscObjectGetComm((PetscObject)C, &comm)); 2565 PetscCallMPI(MPI_Comm_size(comm, &size)); 2566 PetscCall(PetscNew(&abt)); 2567 abt->tag = tag; 2568 abt->alg = alg; 2569 switch (alg) { 2570 case 1: /* alg: "cyclic" */ 2571 for (maxRows = 0, i = 0; i < size; i++) maxRows = PetscMax(maxRows, (B->rmap->range[i + 1] - B->rmap->range[i])); 2572 bufsiz = A->cmap->N * maxRows; 2573 PetscCall(PetscMalloc2(bufsiz, &(abt->buf[0]), bufsiz, &(abt->buf[1]))); 2574 break; 2575 default: /* alg: "allgatherv" */ 2576 PetscCall(PetscMalloc2(B->rmap->n * B->cmap->N, &(abt->buf[0]), B->rmap->N * B->cmap->N, &(abt->buf[1]))); 2577 PetscCall(PetscMalloc2(size, &(abt->recvcounts), size + 1, &(abt->recvdispls))); 2578 for (i = 0; i <= size; i++) abt->recvdispls[i] = B->rmap->range[i] * A->cmap->N; 2579 for (i = 0; i < size; i++) abt->recvcounts[i] = abt->recvdispls[i + 1] - abt->recvdispls[i]; 2580 break; 2581 } 2582 2583 C->product->data = abt; 2584 C->product->destroy = MatDestroy_MatMatTransMult_MPIDense_MPIDense; 2585 C->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_MPIDense_MPIDense; 2586 PetscFunctionReturn(0); 2587 } 2588 2589 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(Mat A, Mat B, Mat C) { 2590 Mat_MPIDense *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data; 2591 Mat_MatTransMultDense *abt; 2592 MPI_Comm comm; 2593 PetscMPIInt rank, size, sendsiz, recvsiz, sendto, recvfrom, recvisfrom; 2594 PetscScalar *sendbuf, *recvbuf = NULL, *cv; 2595 PetscInt i, cK = A->cmap->N, k, j, bn; 2596 PetscScalar _DOne = 1.0, _DZero = 0.0; 2597 const PetscScalar *av, *bv; 2598 PetscBLASInt cm, cn, ck, alda, blda = 0, clda; 2599 MPI_Request reqs[2]; 2600 const PetscInt *ranges; 2601 2602 PetscFunctionBegin; 2603 MatCheckProduct(C, 3); 2604 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 2605 abt = (Mat_MatTransMultDense *)C->product->data; 2606 PetscCall(PetscObjectGetComm((PetscObject)C, &comm)); 2607 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 2608 PetscCallMPI(MPI_Comm_size(comm, &size)); 2609 PetscCall(MatDenseGetArrayRead(a->A, &av)); 2610 PetscCall(MatDenseGetArrayRead(b->A, &bv)); 2611 PetscCall(MatDenseGetArrayWrite(c->A, &cv)); 2612 PetscCall(MatDenseGetLDA(a->A, &i)); 2613 PetscCall(PetscBLASIntCast(i, &alda)); 2614 PetscCall(MatDenseGetLDA(b->A, &i)); 2615 PetscCall(PetscBLASIntCast(i, &blda)); 2616 PetscCall(MatDenseGetLDA(c->A, &i)); 2617 PetscCall(PetscBLASIntCast(i, &clda)); 2618 PetscCall(MatGetOwnershipRanges(B, &ranges)); 2619 bn = B->rmap->n; 2620 if (blda == bn) { 2621 sendbuf = (PetscScalar *)bv; 2622 } else { 2623 sendbuf = abt->buf[0]; 2624 for (k = 0, i = 0; i < cK; i++) { 2625 for (j = 0; j < bn; j++, k++) sendbuf[k] = bv[i * blda + j]; 2626 } 2627 } 2628 if (size > 1) { 2629 sendto = (rank + size - 1) % size; 2630 recvfrom = (rank + size + 1) % size; 2631 } else { 2632 sendto = recvfrom = 0; 2633 } 2634 PetscCall(PetscBLASIntCast(cK, &ck)); 2635 PetscCall(PetscBLASIntCast(c->A->rmap->n, &cm)); 2636 recvisfrom = rank; 2637 for (i = 0; i < size; i++) { 2638 /* we have finished receiving in sending, bufs can be read/modified */ 2639 PetscInt nextrecvisfrom = (recvisfrom + 1) % size; /* which process the next recvbuf will originate on */ 2640 PetscInt nextbn = ranges[nextrecvisfrom + 1] - ranges[nextrecvisfrom]; 2641 2642 if (nextrecvisfrom != rank) { 2643 /* start the cyclic sends from sendbuf, to recvbuf (which will switch to sendbuf) */ 2644 sendsiz = cK * bn; 2645 recvsiz = cK * nextbn; 2646 recvbuf = (i & 1) ? abt->buf[0] : abt->buf[1]; 2647 PetscCallMPI(MPI_Isend(sendbuf, sendsiz, MPIU_SCALAR, sendto, abt->tag, comm, &reqs[0])); 2648 PetscCallMPI(MPI_Irecv(recvbuf, recvsiz, MPIU_SCALAR, recvfrom, abt->tag, comm, &reqs[1])); 2649 } 2650 2651 /* local aseq * sendbuf^T */ 2652 PetscCall(PetscBLASIntCast(ranges[recvisfrom + 1] - ranges[recvisfrom], &cn)); 2653 if (cm && cn && ck) PetscCallBLAS("BLASgemm", BLASgemm_("N", "T", &cm, &cn, &ck, &_DOne, av, &alda, sendbuf, &cn, &_DZero, cv + clda * ranges[recvisfrom], &clda)); 2654 2655 if (nextrecvisfrom != rank) { 2656 /* wait for the sends and receives to complete, swap sendbuf and recvbuf */ 2657 PetscCallMPI(MPI_Waitall(2, reqs, MPI_STATUSES_IGNORE)); 2658 } 2659 bn = nextbn; 2660 recvisfrom = nextrecvisfrom; 2661 sendbuf = recvbuf; 2662 } 2663 PetscCall(MatDenseRestoreArrayRead(a->A, &av)); 2664 PetscCall(MatDenseRestoreArrayRead(b->A, &bv)); 2665 PetscCall(MatDenseRestoreArrayWrite(c->A, &cv)); 2666 PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY)); 2667 PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY)); 2668 PetscFunctionReturn(0); 2669 } 2670 2671 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(Mat A, Mat B, Mat C) { 2672 Mat_MPIDense *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data; 2673 Mat_MatTransMultDense *abt; 2674 MPI_Comm comm; 2675 PetscMPIInt size; 2676 PetscScalar *cv, *sendbuf, *recvbuf; 2677 const PetscScalar *av, *bv; 2678 PetscInt blda, i, cK = A->cmap->N, k, j, bn; 2679 PetscScalar _DOne = 1.0, _DZero = 0.0; 2680 PetscBLASInt cm, cn, ck, alda, clda; 2681 2682 PetscFunctionBegin; 2683 MatCheckProduct(C, 3); 2684 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 2685 abt = (Mat_MatTransMultDense *)C->product->data; 2686 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 2687 PetscCallMPI(MPI_Comm_size(comm, &size)); 2688 PetscCall(MatDenseGetArrayRead(a->A, &av)); 2689 PetscCall(MatDenseGetArrayRead(b->A, &bv)); 2690 PetscCall(MatDenseGetArrayWrite(c->A, &cv)); 2691 PetscCall(MatDenseGetLDA(a->A, &i)); 2692 PetscCall(PetscBLASIntCast(i, &alda)); 2693 PetscCall(MatDenseGetLDA(b->A, &blda)); 2694 PetscCall(MatDenseGetLDA(c->A, &i)); 2695 PetscCall(PetscBLASIntCast(i, &clda)); 2696 /* copy transpose of B into buf[0] */ 2697 bn = B->rmap->n; 2698 sendbuf = abt->buf[0]; 2699 recvbuf = abt->buf[1]; 2700 for (k = 0, j = 0; j < bn; j++) { 2701 for (i = 0; i < cK; i++, k++) sendbuf[k] = bv[i * blda + j]; 2702 } 2703 PetscCall(MatDenseRestoreArrayRead(b->A, &bv)); 2704 PetscCallMPI(MPI_Allgatherv(sendbuf, bn * cK, MPIU_SCALAR, recvbuf, abt->recvcounts, abt->recvdispls, MPIU_SCALAR, comm)); 2705 PetscCall(PetscBLASIntCast(cK, &ck)); 2706 PetscCall(PetscBLASIntCast(c->A->rmap->n, &cm)); 2707 PetscCall(PetscBLASIntCast(c->A->cmap->n, &cn)); 2708 if (cm && cn && ck) PetscCallBLAS("BLASgemm", BLASgemm_("N", "N", &cm, &cn, &ck, &_DOne, av, &alda, recvbuf, &ck, &_DZero, cv, &clda)); 2709 PetscCall(MatDenseRestoreArrayRead(a->A, &av)); 2710 PetscCall(MatDenseRestoreArrayRead(b->A, &bv)); 2711 PetscCall(MatDenseRestoreArrayWrite(c->A, &cv)); 2712 PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY)); 2713 PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY)); 2714 PetscFunctionReturn(0); 2715 } 2716 2717 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C) { 2718 Mat_MatTransMultDense *abt; 2719 2720 PetscFunctionBegin; 2721 MatCheckProduct(C, 3); 2722 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 2723 abt = (Mat_MatTransMultDense *)C->product->data; 2724 switch (abt->alg) { 2725 case 1: PetscCall(MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(A, B, C)); break; 2726 default: PetscCall(MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(A, B, C)); break; 2727 } 2728 PetscFunctionReturn(0); 2729 } 2730 2731 PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(void *data) { 2732 Mat_MatMultDense *ab = (Mat_MatMultDense *)data; 2733 2734 PetscFunctionBegin; 2735 PetscCall(MatDestroy(&ab->Ce)); 2736 PetscCall(MatDestroy(&ab->Ae)); 2737 PetscCall(MatDestroy(&ab->Be)); 2738 PetscCall(PetscFree(ab)); 2739 PetscFunctionReturn(0); 2740 } 2741 2742 #if defined(PETSC_HAVE_ELEMENTAL) 2743 PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C) { 2744 Mat_MatMultDense *ab; 2745 2746 PetscFunctionBegin; 2747 MatCheckProduct(C, 3); 2748 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Missing product data"); 2749 ab = (Mat_MatMultDense *)C->product->data; 2750 PetscCall(MatConvert_MPIDense_Elemental(A, MATELEMENTAL, MAT_REUSE_MATRIX, &ab->Ae)); 2751 PetscCall(MatConvert_MPIDense_Elemental(B, MATELEMENTAL, MAT_REUSE_MATRIX, &ab->Be)); 2752 PetscCall(MatMatMultNumeric_Elemental(ab->Ae, ab->Be, ab->Ce)); 2753 PetscCall(MatConvert(ab->Ce, MATMPIDENSE, MAT_REUSE_MATRIX, &C)); 2754 PetscFunctionReturn(0); 2755 } 2756 2757 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C) { 2758 Mat Ae, Be, Ce; 2759 Mat_MatMultDense *ab; 2760 2761 PetscFunctionBegin; 2762 MatCheckProduct(C, 4); 2763 PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty"); 2764 /* check local size of A and B */ 2765 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 2766 SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend); 2767 } 2768 2769 /* create elemental matrices Ae and Be */ 2770 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &Ae)); 2771 PetscCall(MatSetSizes(Ae, PETSC_DECIDE, PETSC_DECIDE, A->rmap->N, A->cmap->N)); 2772 PetscCall(MatSetType(Ae, MATELEMENTAL)); 2773 PetscCall(MatSetUp(Ae)); 2774 PetscCall(MatSetOption(Ae, MAT_ROW_ORIENTED, PETSC_FALSE)); 2775 2776 PetscCall(MatCreate(PetscObjectComm((PetscObject)B), &Be)); 2777 PetscCall(MatSetSizes(Be, PETSC_DECIDE, PETSC_DECIDE, B->rmap->N, B->cmap->N)); 2778 PetscCall(MatSetType(Be, MATELEMENTAL)); 2779 PetscCall(MatSetUp(Be)); 2780 PetscCall(MatSetOption(Be, MAT_ROW_ORIENTED, PETSC_FALSE)); 2781 2782 /* compute symbolic Ce = Ae*Be */ 2783 PetscCall(MatCreate(PetscObjectComm((PetscObject)C), &Ce)); 2784 PetscCall(MatMatMultSymbolic_Elemental(Ae, Be, fill, Ce)); 2785 2786 /* setup C */ 2787 PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, PETSC_DECIDE, PETSC_DECIDE)); 2788 PetscCall(MatSetType(C, MATDENSE)); 2789 PetscCall(MatSetUp(C)); 2790 2791 /* create data structure for reuse Cdense */ 2792 PetscCall(PetscNew(&ab)); 2793 ab->Ae = Ae; 2794 ab->Be = Be; 2795 ab->Ce = Ce; 2796 2797 C->product->data = ab; 2798 C->product->destroy = MatDestroy_MatMatMult_MPIDense_MPIDense; 2799 C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense; 2800 PetscFunctionReturn(0); 2801 } 2802 #endif 2803 /* ----------------------------------------------- */ 2804 #if defined(PETSC_HAVE_ELEMENTAL) 2805 static PetscErrorCode MatProductSetFromOptions_MPIDense_AB(Mat C) { 2806 PetscFunctionBegin; 2807 C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIDense; 2808 C->ops->productsymbolic = MatProductSymbolic_AB; 2809 PetscFunctionReturn(0); 2810 } 2811 #endif 2812 2813 static PetscErrorCode MatProductSetFromOptions_MPIDense_AtB(Mat C) { 2814 Mat_Product *product = C->product; 2815 Mat A = product->A, B = product->B; 2816 2817 PetscFunctionBegin; 2818 if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) 2819 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend); 2820 C->ops->transposematmultsymbolic = MatTransposeMatMultSymbolic_MPIDense_MPIDense; 2821 C->ops->productsymbolic = MatProductSymbolic_AtB; 2822 PetscFunctionReturn(0); 2823 } 2824 2825 static PetscErrorCode MatProductSetFromOptions_MPIDense_ABt(Mat C) { 2826 Mat_Product *product = C->product; 2827 const char *algTypes[2] = {"allgatherv", "cyclic"}; 2828 PetscInt alg, nalg = 2; 2829 PetscBool flg = PETSC_FALSE; 2830 2831 PetscFunctionBegin; 2832 /* Set default algorithm */ 2833 alg = 0; /* default is allgatherv */ 2834 PetscCall(PetscStrcmp(product->alg, "default", &flg)); 2835 if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg])); 2836 2837 /* Get runtime option */ 2838 if (product->api_user) { 2839 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatMatTransposeMult", "Mat"); 2840 PetscCall(PetscOptionsEList("-matmattransmult_mpidense_mpidense_via", "Algorithmic approach", "MatMatTransposeMult", algTypes, nalg, algTypes[alg], &alg, &flg)); 2841 PetscOptionsEnd(); 2842 } else { 2843 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_ABt", "Mat"); 2844 PetscCall(PetscOptionsEList("-mat_product_algorithm", "Algorithmic approach", "MatProduct_ABt", algTypes, nalg, algTypes[alg], &alg, &flg)); 2845 PetscOptionsEnd(); 2846 } 2847 if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg])); 2848 2849 C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_MPIDense_MPIDense; 2850 C->ops->productsymbolic = MatProductSymbolic_ABt; 2851 PetscFunctionReturn(0); 2852 } 2853 2854 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense(Mat C) { 2855 Mat_Product *product = C->product; 2856 2857 PetscFunctionBegin; 2858 switch (product->type) { 2859 #if defined(PETSC_HAVE_ELEMENTAL) 2860 case MATPRODUCT_AB: PetscCall(MatProductSetFromOptions_MPIDense_AB(C)); break; 2861 #endif 2862 case MATPRODUCT_AtB: PetscCall(MatProductSetFromOptions_MPIDense_AtB(C)); break; 2863 case MATPRODUCT_ABt: PetscCall(MatProductSetFromOptions_MPIDense_ABt(C)); break; 2864 default: break; 2865 } 2866 PetscFunctionReturn(0); 2867 } 2868