1 2 /* 3 Basic functions for basic parallel dense matrices. 4 */ 5 6 #include <../src/mat/impls/dense/mpi/mpidense.h> /*I "petscmat.h" I*/ 7 #include <../src/mat/impls/aij/mpi/mpiaij.h> 8 #include <petscblaslapack.h> 9 10 /*@ 11 MatDenseGetLocalMatrix - For a `MATMPIDENSE` or `MATSEQDENSE` matrix returns the sequential 12 matrix that represents the operator. For sequential matrices it returns itself. 13 14 Input Parameter: 15 . A - the sequential or MPI dense matrix 16 17 Output Parameter: 18 . B - the inner matrix 19 20 Level: intermediate 21 22 .seealso: `MATDENSE`, `MATMPIDENSE`, `MATSEQDENSE` 23 @*/ 24 PetscErrorCode MatDenseGetLocalMatrix(Mat A, Mat *B) 25 { 26 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 27 PetscBool flg; 28 29 PetscFunctionBegin; 30 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 31 PetscValidPointer(B, 2); 32 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIDENSE, &flg)); 33 if (flg) *B = mat->A; 34 else { 35 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQDENSE, &flg)); 36 PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Not for matrix type %s", ((PetscObject)A)->type_name); 37 *B = A; 38 } 39 PetscFunctionReturn(0); 40 } 41 42 PetscErrorCode MatCopy_MPIDense(Mat A, Mat B, MatStructure s) 43 { 44 Mat_MPIDense *Amat = (Mat_MPIDense *)A->data; 45 Mat_MPIDense *Bmat = (Mat_MPIDense *)B->data; 46 47 PetscFunctionBegin; 48 PetscCall(MatCopy(Amat->A, Bmat->A, s)); 49 PetscFunctionReturn(0); 50 } 51 52 PetscErrorCode MatShift_MPIDense(Mat A, PetscScalar alpha) 53 { 54 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 55 PetscInt j, lda, rstart = A->rmap->rstart, rend = A->rmap->rend, rend2; 56 PetscScalar *v; 57 58 PetscFunctionBegin; 59 PetscCall(MatDenseGetArray(mat->A, &v)); 60 PetscCall(MatDenseGetLDA(mat->A, &lda)); 61 rend2 = PetscMin(rend, A->cmap->N); 62 if (rend2 > rstart) { 63 for (j = rstart; j < rend2; j++) v[j - rstart + j * lda] += alpha; 64 PetscCall(PetscLogFlops(rend2 - rstart)); 65 } 66 PetscCall(MatDenseRestoreArray(mat->A, &v)); 67 PetscFunctionReturn(0); 68 } 69 70 PetscErrorCode MatGetRow_MPIDense(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 71 { 72 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 73 PetscInt lrow, rstart = A->rmap->rstart, rend = A->rmap->rend; 74 75 PetscFunctionBegin; 76 PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "only local rows"); 77 lrow = row - rstart; 78 PetscCall(MatGetRow(mat->A, lrow, nz, (const PetscInt **)idx, (const PetscScalar **)v)); 79 PetscFunctionReturn(0); 80 } 81 82 PetscErrorCode MatRestoreRow_MPIDense(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 83 { 84 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 85 PetscInt lrow, rstart = A->rmap->rstart, rend = A->rmap->rend; 86 87 PetscFunctionBegin; 88 PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "only local rows"); 89 lrow = row - rstart; 90 PetscCall(MatRestoreRow(mat->A, lrow, nz, (const PetscInt **)idx, (const PetscScalar **)v)); 91 PetscFunctionReturn(0); 92 } 93 94 PetscErrorCode MatGetDiagonalBlock_MPIDense(Mat A, Mat *a) 95 { 96 Mat_MPIDense *mdn = (Mat_MPIDense *)A->data; 97 PetscInt m = A->rmap->n, rstart = A->rmap->rstart; 98 PetscScalar *array; 99 MPI_Comm comm; 100 PetscBool flg; 101 Mat B; 102 103 PetscFunctionBegin; 104 PetscCall(MatHasCongruentLayouts(A, &flg)); 105 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only square matrices supported."); 106 PetscCall(PetscObjectQuery((PetscObject)A, "DiagonalBlock", (PetscObject *)&B)); 107 if (!B) { /* This should use MatDenseGetSubMatrix (not create), but we would need a call like MatRestoreDiagonalBlock */ 108 109 PetscCall(PetscObjectTypeCompare((PetscObject)mdn->A, MATSEQDENSECUDA, &flg)); 110 PetscCheck(!flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature", MATSEQDENSECUDA); 111 PetscCall(PetscObjectGetComm((PetscObject)(mdn->A), &comm)); 112 PetscCall(MatCreate(comm, &B)); 113 PetscCall(MatSetSizes(B, m, m, m, m)); 114 PetscCall(MatSetType(B, ((PetscObject)mdn->A)->type_name)); 115 PetscCall(MatDenseGetArrayRead(mdn->A, (const PetscScalar **)&array)); 116 PetscCall(MatSeqDenseSetPreallocation(B, array + m * rstart)); 117 PetscCall(MatDenseRestoreArrayRead(mdn->A, (const PetscScalar **)&array)); 118 PetscCall(PetscObjectCompose((PetscObject)A, "DiagonalBlock", (PetscObject)B)); 119 *a = B; 120 PetscCall(MatDestroy(&B)); 121 } else *a = B; 122 PetscFunctionReturn(0); 123 } 124 125 PetscErrorCode MatSetValues_MPIDense(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], const PetscScalar v[], InsertMode addv) 126 { 127 Mat_MPIDense *A = (Mat_MPIDense *)mat->data; 128 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend, row; 129 PetscBool roworiented = A->roworiented; 130 131 PetscFunctionBegin; 132 for (i = 0; i < m; i++) { 133 if (idxm[i] < 0) continue; 134 PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large"); 135 if (idxm[i] >= rstart && idxm[i] < rend) { 136 row = idxm[i] - rstart; 137 if (roworiented) { 138 PetscCall(MatSetValues(A->A, 1, &row, n, idxn, v + i * n, addv)); 139 } else { 140 for (j = 0; j < n; j++) { 141 if (idxn[j] < 0) continue; 142 PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large"); 143 PetscCall(MatSetValues(A->A, 1, &row, 1, &idxn[j], v + i + j * m, addv)); 144 } 145 } 146 } else if (!A->donotstash) { 147 mat->assembled = PETSC_FALSE; 148 if (roworiented) { 149 PetscCall(MatStashValuesRow_Private(&mat->stash, idxm[i], n, idxn, v + i * n, PETSC_FALSE)); 150 } else { 151 PetscCall(MatStashValuesCol_Private(&mat->stash, idxm[i], n, idxn, v + i, m, PETSC_FALSE)); 152 } 153 } 154 } 155 PetscFunctionReturn(0); 156 } 157 158 PetscErrorCode MatGetValues_MPIDense(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[]) 159 { 160 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 161 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend, row; 162 163 PetscFunctionBegin; 164 for (i = 0; i < m; i++) { 165 if (idxm[i] < 0) continue; /* negative row */ 166 PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large"); 167 if (idxm[i] >= rstart && idxm[i] < rend) { 168 row = idxm[i] - rstart; 169 for (j = 0; j < n; j++) { 170 if (idxn[j] < 0) continue; /* negative column */ 171 PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large"); 172 PetscCall(MatGetValues(mdn->A, 1, &row, 1, &idxn[j], v + i * n + j)); 173 } 174 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported"); 175 } 176 PetscFunctionReturn(0); 177 } 178 179 static PetscErrorCode MatDenseGetLDA_MPIDense(Mat A, PetscInt *lda) 180 { 181 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 182 183 PetscFunctionBegin; 184 PetscCall(MatDenseGetLDA(a->A, lda)); 185 PetscFunctionReturn(0); 186 } 187 188 static PetscErrorCode MatDenseSetLDA_MPIDense(Mat A, PetscInt lda) 189 { 190 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 191 PetscBool iscuda; 192 193 PetscFunctionBegin; 194 if (!a->A) { 195 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 196 PetscCall(PetscLayoutSetUp(A->rmap)); 197 PetscCall(PetscLayoutSetUp(A->cmap)); 198 PetscCall(MatCreate(PETSC_COMM_SELF, &a->A)); 199 PetscCall(MatSetSizes(a->A, A->rmap->n, A->cmap->N, A->rmap->n, A->cmap->N)); 200 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIDENSECUDA, &iscuda)); 201 PetscCall(MatSetType(a->A, iscuda ? MATSEQDENSECUDA : MATSEQDENSE)); 202 } 203 PetscCall(MatDenseSetLDA(a->A, lda)); 204 PetscFunctionReturn(0); 205 } 206 207 static PetscErrorCode MatDenseGetArray_MPIDense(Mat A, PetscScalar **array) 208 { 209 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 210 211 PetscFunctionBegin; 212 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 213 PetscCall(MatDenseGetArray(a->A, array)); 214 PetscFunctionReturn(0); 215 } 216 217 static PetscErrorCode MatDenseGetArrayRead_MPIDense(Mat A, const PetscScalar **array) 218 { 219 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 220 221 PetscFunctionBegin; 222 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 223 PetscCall(MatDenseGetArrayRead(a->A, array)); 224 PetscFunctionReturn(0); 225 } 226 227 static PetscErrorCode MatDenseGetArrayWrite_MPIDense(Mat A, PetscScalar **array) 228 { 229 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 230 231 PetscFunctionBegin; 232 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 233 PetscCall(MatDenseGetArrayWrite(a->A, array)); 234 PetscFunctionReturn(0); 235 } 236 237 static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A, const PetscScalar *array) 238 { 239 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 240 241 PetscFunctionBegin; 242 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 243 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 244 PetscCall(MatDensePlaceArray(a->A, array)); 245 PetscFunctionReturn(0); 246 } 247 248 static PetscErrorCode MatDenseResetArray_MPIDense(Mat A) 249 { 250 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 251 252 PetscFunctionBegin; 253 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 254 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 255 PetscCall(MatDenseResetArray(a->A)); 256 PetscFunctionReturn(0); 257 } 258 259 static PetscErrorCode MatDenseReplaceArray_MPIDense(Mat A, const PetscScalar *array) 260 { 261 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 262 263 PetscFunctionBegin; 264 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 265 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 266 PetscCall(MatDenseReplaceArray(a->A, array)); 267 PetscFunctionReturn(0); 268 } 269 270 static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A, IS isrow, IS iscol, MatReuse scall, Mat *B) 271 { 272 Mat_MPIDense *mat = (Mat_MPIDense *)A->data, *newmatd; 273 PetscInt lda, i, j, rstart, rend, nrows, ncols, Ncols, nlrows, nlcols; 274 const PetscInt *irow, *icol; 275 const PetscScalar *v; 276 PetscScalar *bv; 277 Mat newmat; 278 IS iscol_local; 279 MPI_Comm comm_is, comm_mat; 280 281 PetscFunctionBegin; 282 PetscCall(PetscObjectGetComm((PetscObject)A, &comm_mat)); 283 PetscCall(PetscObjectGetComm((PetscObject)iscol, &comm_is)); 284 PetscCheck(comm_mat == comm_is, PETSC_COMM_SELF, PETSC_ERR_ARG_NOTSAMECOMM, "IS communicator must match matrix communicator"); 285 286 PetscCall(ISAllGather(iscol, &iscol_local)); 287 PetscCall(ISGetIndices(isrow, &irow)); 288 PetscCall(ISGetIndices(iscol_local, &icol)); 289 PetscCall(ISGetLocalSize(isrow, &nrows)); 290 PetscCall(ISGetLocalSize(iscol, &ncols)); 291 PetscCall(ISGetSize(iscol, &Ncols)); /* global number of columns, size of iscol_local */ 292 293 /* No parallel redistribution currently supported! Should really check each index set 294 to comfirm that it is OK. ... Currently supports only submatrix same partitioning as 295 original matrix! */ 296 297 PetscCall(MatGetLocalSize(A, &nlrows, &nlcols)); 298 PetscCall(MatGetOwnershipRange(A, &rstart, &rend)); 299 300 /* Check submatrix call */ 301 if (scall == MAT_REUSE_MATRIX) { 302 /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */ 303 /* Really need to test rows and column sizes! */ 304 newmat = *B; 305 } else { 306 /* Create and fill new matrix */ 307 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &newmat)); 308 PetscCall(MatSetSizes(newmat, nrows, ncols, PETSC_DECIDE, Ncols)); 309 PetscCall(MatSetType(newmat, ((PetscObject)A)->type_name)); 310 PetscCall(MatMPIDenseSetPreallocation(newmat, NULL)); 311 } 312 313 /* Now extract the data pointers and do the copy, column at a time */ 314 newmatd = (Mat_MPIDense *)newmat->data; 315 PetscCall(MatDenseGetArray(newmatd->A, &bv)); 316 PetscCall(MatDenseGetArrayRead(mat->A, &v)); 317 PetscCall(MatDenseGetLDA(mat->A, &lda)); 318 for (i = 0; i < Ncols; i++) { 319 const PetscScalar *av = v + lda * icol[i]; 320 for (j = 0; j < nrows; j++) *bv++ = av[irow[j] - rstart]; 321 } 322 PetscCall(MatDenseRestoreArrayRead(mat->A, &v)); 323 PetscCall(MatDenseRestoreArray(newmatd->A, &bv)); 324 325 /* Assemble the matrices so that the correct flags are set */ 326 PetscCall(MatAssemblyBegin(newmat, MAT_FINAL_ASSEMBLY)); 327 PetscCall(MatAssemblyEnd(newmat, MAT_FINAL_ASSEMBLY)); 328 329 /* Free work space */ 330 PetscCall(ISRestoreIndices(isrow, &irow)); 331 PetscCall(ISRestoreIndices(iscol_local, &icol)); 332 PetscCall(ISDestroy(&iscol_local)); 333 *B = newmat; 334 PetscFunctionReturn(0); 335 } 336 337 PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A, PetscScalar **array) 338 { 339 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 340 341 PetscFunctionBegin; 342 PetscCall(MatDenseRestoreArray(a->A, array)); 343 PetscFunctionReturn(0); 344 } 345 346 PetscErrorCode MatDenseRestoreArrayRead_MPIDense(Mat A, const PetscScalar **array) 347 { 348 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 349 350 PetscFunctionBegin; 351 PetscCall(MatDenseRestoreArrayRead(a->A, array)); 352 PetscFunctionReturn(0); 353 } 354 355 PetscErrorCode MatDenseRestoreArrayWrite_MPIDense(Mat A, PetscScalar **array) 356 { 357 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 358 359 PetscFunctionBegin; 360 PetscCall(MatDenseRestoreArrayWrite(a->A, array)); 361 PetscFunctionReturn(0); 362 } 363 364 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat, MatAssemblyType mode) 365 { 366 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 367 PetscInt nstash, reallocs; 368 369 PetscFunctionBegin; 370 if (mdn->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 371 372 PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range)); 373 PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs)); 374 PetscCall(PetscInfo(mdn->A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 375 PetscFunctionReturn(0); 376 } 377 378 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat, MatAssemblyType mode) 379 { 380 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 381 PetscInt i, *row, *col, flg, j, rstart, ncols; 382 PetscMPIInt n; 383 PetscScalar *val; 384 385 PetscFunctionBegin; 386 if (!mdn->donotstash && !mat->nooffprocentries) { 387 /* wait on receives */ 388 while (1) { 389 PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg)); 390 if (!flg) break; 391 392 for (i = 0; i < n;) { 393 /* Now identify the consecutive vals belonging to the same row */ 394 for (j = i, rstart = row[j]; j < n; j++) { 395 if (row[j] != rstart) break; 396 } 397 if (j < n) ncols = j - i; 398 else ncols = n - i; 399 /* Now assemble all these values with a single function call */ 400 PetscCall(MatSetValues_MPIDense(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode)); 401 i = j; 402 } 403 } 404 PetscCall(MatStashScatterEnd_Private(&mat->stash)); 405 } 406 407 PetscCall(MatAssemblyBegin(mdn->A, mode)); 408 PetscCall(MatAssemblyEnd(mdn->A, mode)); 409 PetscFunctionReturn(0); 410 } 411 412 PetscErrorCode MatZeroEntries_MPIDense(Mat A) 413 { 414 Mat_MPIDense *l = (Mat_MPIDense *)A->data; 415 416 PetscFunctionBegin; 417 PetscCall(MatZeroEntries(l->A)); 418 PetscFunctionReturn(0); 419 } 420 421 PetscErrorCode MatZeroRows_MPIDense(Mat A, PetscInt n, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 422 { 423 Mat_MPIDense *l = (Mat_MPIDense *)A->data; 424 PetscInt i, len, *lrows; 425 426 PetscFunctionBegin; 427 /* get locally owned rows */ 428 PetscCall(PetscLayoutMapLocal(A->rmap, n, rows, &len, &lrows, NULL)); 429 /* fix right hand side if needed */ 430 if (x && b) { 431 const PetscScalar *xx; 432 PetscScalar *bb; 433 434 PetscCall(VecGetArrayRead(x, &xx)); 435 PetscCall(VecGetArrayWrite(b, &bb)); 436 for (i = 0; i < len; ++i) bb[lrows[i]] = diag * xx[lrows[i]]; 437 PetscCall(VecRestoreArrayRead(x, &xx)); 438 PetscCall(VecRestoreArrayWrite(b, &bb)); 439 } 440 PetscCall(MatZeroRows(l->A, len, lrows, 0.0, NULL, NULL)); 441 if (diag != 0.0) { 442 Vec d; 443 444 PetscCall(MatCreateVecs(A, NULL, &d)); 445 PetscCall(VecSet(d, diag)); 446 PetscCall(MatDiagonalSet(A, d, INSERT_VALUES)); 447 PetscCall(VecDestroy(&d)); 448 } 449 PetscCall(PetscFree(lrows)); 450 PetscFunctionReturn(0); 451 } 452 453 PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat, Vec, Vec); 454 PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat, Vec, Vec, Vec); 455 PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat, Vec, Vec); 456 PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat, Vec, Vec, Vec); 457 458 PetscErrorCode MatMult_MPIDense(Mat mat, Vec xx, Vec yy) 459 { 460 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 461 const PetscScalar *ax; 462 PetscScalar *ay; 463 PetscMemType axmtype, aymtype; 464 465 PetscFunctionBegin; 466 if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat)); 467 PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype)); 468 PetscCall(VecGetArrayAndMemType(mdn->lvec, &ay, &aymtype)); 469 PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE)); 470 PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE)); 471 PetscCall(VecRestoreArrayAndMemType(mdn->lvec, &ay)); 472 PetscCall(VecRestoreArrayReadAndMemType(xx, &ax)); 473 PetscCall((*mdn->A->ops->mult)(mdn->A, mdn->lvec, yy)); 474 PetscFunctionReturn(0); 475 } 476 477 PetscErrorCode MatMultAdd_MPIDense(Mat mat, Vec xx, Vec yy, Vec zz) 478 { 479 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 480 const PetscScalar *ax; 481 PetscScalar *ay; 482 PetscMemType axmtype, aymtype; 483 484 PetscFunctionBegin; 485 if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat)); 486 PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype)); 487 PetscCall(VecGetArrayAndMemType(mdn->lvec, &ay, &aymtype)); 488 PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE)); 489 PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE)); 490 PetscCall(VecRestoreArrayAndMemType(mdn->lvec, &ay)); 491 PetscCall(VecRestoreArrayReadAndMemType(xx, &ax)); 492 PetscCall((*mdn->A->ops->multadd)(mdn->A, mdn->lvec, yy, zz)); 493 PetscFunctionReturn(0); 494 } 495 496 PetscErrorCode MatMultTranspose_MPIDense(Mat A, Vec xx, Vec yy) 497 { 498 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 499 const PetscScalar *ax; 500 PetscScalar *ay; 501 PetscMemType axmtype, aymtype; 502 503 PetscFunctionBegin; 504 if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A)); 505 PetscCall(VecSet(yy, 0.0)); 506 PetscCall((*a->A->ops->multtranspose)(a->A, xx, a->lvec)); 507 PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype)); 508 PetscCall(VecGetArrayAndMemType(yy, &ay, &aymtype)); 509 PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM)); 510 PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM)); 511 PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax)); 512 PetscCall(VecRestoreArrayAndMemType(yy, &ay)); 513 PetscFunctionReturn(0); 514 } 515 516 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A, Vec xx, Vec yy, Vec zz) 517 { 518 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 519 const PetscScalar *ax; 520 PetscScalar *ay; 521 PetscMemType axmtype, aymtype; 522 523 PetscFunctionBegin; 524 if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A)); 525 PetscCall(VecCopy(yy, zz)); 526 PetscCall((*a->A->ops->multtranspose)(a->A, xx, a->lvec)); 527 PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype)); 528 PetscCall(VecGetArrayAndMemType(zz, &ay, &aymtype)); 529 PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM)); 530 PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM)); 531 PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax)); 532 PetscCall(VecRestoreArrayAndMemType(zz, &ay)); 533 PetscFunctionReturn(0); 534 } 535 536 PetscErrorCode MatGetDiagonal_MPIDense(Mat A, Vec v) 537 { 538 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 539 PetscInt lda, len, i, n, m = A->rmap->n, radd; 540 PetscScalar *x, zero = 0.0; 541 const PetscScalar *av; 542 543 PetscFunctionBegin; 544 PetscCall(VecSet(v, zero)); 545 PetscCall(VecGetArray(v, &x)); 546 PetscCall(VecGetSize(v, &n)); 547 PetscCheck(n == A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming mat and vec"); 548 len = PetscMin(a->A->rmap->n, a->A->cmap->n); 549 radd = A->rmap->rstart * m; 550 PetscCall(MatDenseGetArrayRead(a->A, &av)); 551 PetscCall(MatDenseGetLDA(a->A, &lda)); 552 for (i = 0; i < len; i++) x[i] = av[radd + i * lda + i]; 553 PetscCall(MatDenseRestoreArrayRead(a->A, &av)); 554 PetscCall(VecRestoreArray(v, &x)); 555 PetscFunctionReturn(0); 556 } 557 558 PetscErrorCode MatDestroy_MPIDense(Mat mat) 559 { 560 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 561 562 PetscFunctionBegin; 563 #if defined(PETSC_USE_LOG) 564 PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N); 565 #endif 566 PetscCall(MatStashDestroy_Private(&mat->stash)); 567 PetscCheck(!mdn->vecinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 568 PetscCheck(!mdn->matinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 569 PetscCall(MatDestroy(&mdn->A)); 570 PetscCall(VecDestroy(&mdn->lvec)); 571 PetscCall(PetscSFDestroy(&mdn->Mvctx)); 572 PetscCall(VecDestroy(&mdn->cvec)); 573 PetscCall(MatDestroy(&mdn->cmat)); 574 575 PetscCall(PetscFree(mat->data)); 576 PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL)); 577 578 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetLDA_C", NULL)); 579 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseSetLDA_C", NULL)); 580 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArray_C", NULL)); 581 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArray_C", NULL)); 582 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayRead_C", NULL)); 583 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayRead_C", NULL)); 584 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayWrite_C", NULL)); 585 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayWrite_C", NULL)); 586 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDensePlaceArray_C", NULL)); 587 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseResetArray_C", NULL)); 588 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseReplaceArray_C", NULL)); 589 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", NULL)); 590 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpiaij_C", NULL)); 591 #if defined(PETSC_HAVE_ELEMENTAL) 592 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_elemental_C", NULL)); 593 #endif 594 #if defined(PETSC_HAVE_SCALAPACK) 595 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_scalapack_C", NULL)); 596 #endif 597 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIDenseSetPreallocation_C", NULL)); 598 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidense_C", NULL)); 599 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaij_C", NULL)); 600 #if defined(PETSC_HAVE_CUDA) 601 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidense_C", NULL)); 602 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijcusparse_C", NULL)); 603 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensecuda_C", NULL)); 604 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidensecuda_mpidense_C", NULL)); 605 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidensecuda_C", NULL)); 606 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C", NULL)); 607 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensecuda_mpiaij_C", NULL)); 608 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C", NULL)); 609 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArray_C", NULL)); 610 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArrayRead_C", NULL)); 611 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArrayWrite_C", NULL)); 612 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArray_C", NULL)); 613 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArrayRead_C", NULL)); 614 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArrayWrite_C", NULL)); 615 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAPlaceArray_C", NULL)); 616 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAResetArray_C", NULL)); 617 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAReplaceArray_C", NULL)); 618 #endif 619 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumn_C", NULL)); 620 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumn_C", NULL)); 621 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", NULL)); 622 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", NULL)); 623 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", NULL)); 624 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", NULL)); 625 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", NULL)); 626 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", NULL)); 627 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetSubMatrix_C", NULL)); 628 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreSubMatrix_C", NULL)); 629 630 PetscCall(PetscObjectCompose((PetscObject)mat, "DiagonalBlock", NULL)); 631 PetscFunctionReturn(0); 632 } 633 634 PETSC_INTERN PetscErrorCode MatView_SeqDense(Mat, PetscViewer); 635 636 #include <petscdraw.h> 637 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat, PetscViewer viewer) 638 { 639 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 640 PetscMPIInt rank; 641 PetscViewerType vtype; 642 PetscBool iascii, isdraw; 643 PetscViewer sviewer; 644 PetscViewerFormat format; 645 646 PetscFunctionBegin; 647 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 648 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 649 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 650 if (iascii) { 651 PetscCall(PetscViewerGetType(viewer, &vtype)); 652 PetscCall(PetscViewerGetFormat(viewer, &format)); 653 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 654 MatInfo info; 655 PetscCall(MatGetInfo(mat, MAT_LOCAL, &info)); 656 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 657 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %" PetscInt_FMT " \n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, 658 (PetscInt)info.memory)); 659 PetscCall(PetscViewerFlush(viewer)); 660 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 661 if (mdn->Mvctx) PetscCall(PetscSFView(mdn->Mvctx, viewer)); 662 PetscFunctionReturn(0); 663 } else if (format == PETSC_VIEWER_ASCII_INFO) { 664 PetscFunctionReturn(0); 665 } 666 } else if (isdraw) { 667 PetscDraw draw; 668 PetscBool isnull; 669 670 PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw)); 671 PetscCall(PetscDrawIsNull(draw, &isnull)); 672 if (isnull) PetscFunctionReturn(0); 673 } 674 675 { 676 /* assemble the entire matrix onto first processor. */ 677 Mat A; 678 PetscInt M = mat->rmap->N, N = mat->cmap->N, m, row, i, nz; 679 PetscInt *cols; 680 PetscScalar *vals; 681 682 PetscCall(MatCreate(PetscObjectComm((PetscObject)mat), &A)); 683 if (rank == 0) { 684 PetscCall(MatSetSizes(A, M, N, M, N)); 685 } else { 686 PetscCall(MatSetSizes(A, 0, 0, M, N)); 687 } 688 /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */ 689 PetscCall(MatSetType(A, MATMPIDENSE)); 690 PetscCall(MatMPIDenseSetPreallocation(A, NULL)); 691 692 /* Copy the matrix ... This isn't the most efficient means, 693 but it's quick for now */ 694 A->insertmode = INSERT_VALUES; 695 696 row = mat->rmap->rstart; 697 m = mdn->A->rmap->n; 698 for (i = 0; i < m; i++) { 699 PetscCall(MatGetRow_MPIDense(mat, row, &nz, &cols, &vals)); 700 PetscCall(MatSetValues_MPIDense(A, 1, &row, nz, cols, vals, INSERT_VALUES)); 701 PetscCall(MatRestoreRow_MPIDense(mat, row, &nz, &cols, &vals)); 702 row++; 703 } 704 705 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 706 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 707 PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 708 if (rank == 0) { 709 PetscCall(PetscObjectSetName((PetscObject)((Mat_MPIDense *)(A->data))->A, ((PetscObject)mat)->name)); 710 PetscCall(MatView_SeqDense(((Mat_MPIDense *)(A->data))->A, sviewer)); 711 } 712 PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 713 PetscCall(PetscViewerFlush(viewer)); 714 PetscCall(MatDestroy(&A)); 715 } 716 PetscFunctionReturn(0); 717 } 718 719 PetscErrorCode MatView_MPIDense(Mat mat, PetscViewer viewer) 720 { 721 PetscBool iascii, isbinary, isdraw, issocket; 722 723 PetscFunctionBegin; 724 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 725 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 726 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket)); 727 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 728 729 if (iascii || issocket || isdraw) { 730 PetscCall(MatView_MPIDense_ASCIIorDraworSocket(mat, viewer)); 731 } else if (isbinary) PetscCall(MatView_Dense_Binary(mat, viewer)); 732 PetscFunctionReturn(0); 733 } 734 735 PetscErrorCode MatGetInfo_MPIDense(Mat A, MatInfoType flag, MatInfo *info) 736 { 737 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 738 Mat mdn = mat->A; 739 PetscLogDouble isend[5], irecv[5]; 740 741 PetscFunctionBegin; 742 info->block_size = 1.0; 743 744 PetscCall(MatGetInfo(mdn, MAT_LOCAL, info)); 745 746 isend[0] = info->nz_used; 747 isend[1] = info->nz_allocated; 748 isend[2] = info->nz_unneeded; 749 isend[3] = info->memory; 750 isend[4] = info->mallocs; 751 if (flag == MAT_LOCAL) { 752 info->nz_used = isend[0]; 753 info->nz_allocated = isend[1]; 754 info->nz_unneeded = isend[2]; 755 info->memory = isend[3]; 756 info->mallocs = isend[4]; 757 } else if (flag == MAT_GLOBAL_MAX) { 758 PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)A))); 759 760 info->nz_used = irecv[0]; 761 info->nz_allocated = irecv[1]; 762 info->nz_unneeded = irecv[2]; 763 info->memory = irecv[3]; 764 info->mallocs = irecv[4]; 765 } else if (flag == MAT_GLOBAL_SUM) { 766 PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)A))); 767 768 info->nz_used = irecv[0]; 769 info->nz_allocated = irecv[1]; 770 info->nz_unneeded = irecv[2]; 771 info->memory = irecv[3]; 772 info->mallocs = irecv[4]; 773 } 774 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 775 info->fill_ratio_needed = 0; 776 info->factor_mallocs = 0; 777 PetscFunctionReturn(0); 778 } 779 780 PetscErrorCode MatSetOption_MPIDense(Mat A, MatOption op, PetscBool flg) 781 { 782 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 783 784 PetscFunctionBegin; 785 switch (op) { 786 case MAT_NEW_NONZERO_LOCATIONS: 787 case MAT_NEW_NONZERO_LOCATION_ERR: 788 case MAT_NEW_NONZERO_ALLOCATION_ERR: 789 MatCheckPreallocated(A, 1); 790 PetscCall(MatSetOption(a->A, op, flg)); 791 break; 792 case MAT_ROW_ORIENTED: 793 MatCheckPreallocated(A, 1); 794 a->roworiented = flg; 795 PetscCall(MatSetOption(a->A, op, flg)); 796 break; 797 case MAT_FORCE_DIAGONAL_ENTRIES: 798 case MAT_KEEP_NONZERO_PATTERN: 799 case MAT_USE_HASH_TABLE: 800 case MAT_SORTED_FULL: 801 PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op])); 802 break; 803 case MAT_IGNORE_OFF_PROC_ENTRIES: 804 a->donotstash = flg; 805 break; 806 case MAT_SYMMETRIC: 807 case MAT_STRUCTURALLY_SYMMETRIC: 808 case MAT_HERMITIAN: 809 case MAT_SYMMETRY_ETERNAL: 810 case MAT_STRUCTURAL_SYMMETRY_ETERNAL: 811 case MAT_SPD: 812 case MAT_IGNORE_LOWER_TRIANGULAR: 813 case MAT_IGNORE_ZERO_ENTRIES: 814 case MAT_SPD_ETERNAL: 815 /* if the diagonal matrix is square it inherits some of the properties above */ 816 PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op])); 817 break; 818 default: 819 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %s", MatOptions[op]); 820 } 821 PetscFunctionReturn(0); 822 } 823 824 PetscErrorCode MatDiagonalScale_MPIDense(Mat A, Vec ll, Vec rr) 825 { 826 Mat_MPIDense *mdn = (Mat_MPIDense *)A->data; 827 const PetscScalar *l; 828 PetscScalar x, *v, *vv, *r; 829 PetscInt i, j, s2a, s3a, s2, s3, m = mdn->A->rmap->n, n = mdn->A->cmap->n, lda; 830 831 PetscFunctionBegin; 832 PetscCall(MatDenseGetArray(mdn->A, &vv)); 833 PetscCall(MatDenseGetLDA(mdn->A, &lda)); 834 PetscCall(MatGetLocalSize(A, &s2, &s3)); 835 if (ll) { 836 PetscCall(VecGetLocalSize(ll, &s2a)); 837 PetscCheck(s2a == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Left scaling vector non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT, s2a, s2); 838 PetscCall(VecGetArrayRead(ll, &l)); 839 for (i = 0; i < m; i++) { 840 x = l[i]; 841 v = vv + i; 842 for (j = 0; j < n; j++) { 843 (*v) *= x; 844 v += lda; 845 } 846 } 847 PetscCall(VecRestoreArrayRead(ll, &l)); 848 PetscCall(PetscLogFlops(1.0 * n * m)); 849 } 850 if (rr) { 851 const PetscScalar *ar; 852 853 PetscCall(VecGetLocalSize(rr, &s3a)); 854 PetscCheck(s3a == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Right scaling vec non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT ".", s3a, s3); 855 PetscCall(VecGetArrayRead(rr, &ar)); 856 if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A)); 857 PetscCall(VecGetArray(mdn->lvec, &r)); 858 PetscCall(PetscSFBcastBegin(mdn->Mvctx, MPIU_SCALAR, ar, r, MPI_REPLACE)); 859 PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ar, r, MPI_REPLACE)); 860 PetscCall(VecRestoreArrayRead(rr, &ar)); 861 for (i = 0; i < n; i++) { 862 x = r[i]; 863 v = vv + i * lda; 864 for (j = 0; j < m; j++) (*v++) *= x; 865 } 866 PetscCall(VecRestoreArray(mdn->lvec, &r)); 867 PetscCall(PetscLogFlops(1.0 * n * m)); 868 } 869 PetscCall(MatDenseRestoreArray(mdn->A, &vv)); 870 PetscFunctionReturn(0); 871 } 872 873 PetscErrorCode MatNorm_MPIDense(Mat A, NormType type, PetscReal *nrm) 874 { 875 Mat_MPIDense *mdn = (Mat_MPIDense *)A->data; 876 PetscInt i, j; 877 PetscMPIInt size; 878 PetscReal sum = 0.0; 879 const PetscScalar *av, *v; 880 881 PetscFunctionBegin; 882 PetscCall(MatDenseGetArrayRead(mdn->A, &av)); 883 v = av; 884 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 885 if (size == 1) { 886 PetscCall(MatNorm(mdn->A, type, nrm)); 887 } else { 888 if (type == NORM_FROBENIUS) { 889 for (i = 0; i < mdn->A->cmap->n * mdn->A->rmap->n; i++) { 890 sum += PetscRealPart(PetscConj(*v) * (*v)); 891 v++; 892 } 893 PetscCall(MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A))); 894 *nrm = PetscSqrtReal(*nrm); 895 PetscCall(PetscLogFlops(2.0 * mdn->A->cmap->n * mdn->A->rmap->n)); 896 } else if (type == NORM_1) { 897 PetscReal *tmp, *tmp2; 898 PetscCall(PetscCalloc2(A->cmap->N, &tmp, A->cmap->N, &tmp2)); 899 *nrm = 0.0; 900 v = av; 901 for (j = 0; j < mdn->A->cmap->n; j++) { 902 for (i = 0; i < mdn->A->rmap->n; i++) { 903 tmp[j] += PetscAbsScalar(*v); 904 v++; 905 } 906 } 907 PetscCall(MPIU_Allreduce(tmp, tmp2, A->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A))); 908 for (j = 0; j < A->cmap->N; j++) { 909 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 910 } 911 PetscCall(PetscFree2(tmp, tmp2)); 912 PetscCall(PetscLogFlops(A->cmap->n * A->rmap->n)); 913 } else if (type == NORM_INFINITY) { /* max row norm */ 914 PetscReal ntemp; 915 PetscCall(MatNorm(mdn->A, type, &ntemp)); 916 PetscCall(MPIU_Allreduce(&ntemp, nrm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A))); 917 } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "No support for two norm"); 918 } 919 PetscCall(MatDenseRestoreArrayRead(mdn->A, &av)); 920 PetscFunctionReturn(0); 921 } 922 923 PetscErrorCode MatTranspose_MPIDense(Mat A, MatReuse reuse, Mat *matout) 924 { 925 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 926 Mat B; 927 PetscInt M = A->rmap->N, N = A->cmap->N, m, n, *rwork, rstart = A->rmap->rstart; 928 PetscInt j, i, lda; 929 PetscScalar *v; 930 931 PetscFunctionBegin; 932 if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout)); 933 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) { 934 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 935 PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M)); 936 PetscCall(MatSetType(B, ((PetscObject)A)->type_name)); 937 PetscCall(MatMPIDenseSetPreallocation(B, NULL)); 938 } else B = *matout; 939 940 m = a->A->rmap->n; 941 n = a->A->cmap->n; 942 PetscCall(MatDenseGetArrayRead(a->A, (const PetscScalar **)&v)); 943 PetscCall(MatDenseGetLDA(a->A, &lda)); 944 PetscCall(PetscMalloc1(m, &rwork)); 945 for (i = 0; i < m; i++) rwork[i] = rstart + i; 946 for (j = 0; j < n; j++) { 947 PetscCall(MatSetValues(B, 1, &j, m, rwork, v, INSERT_VALUES)); 948 v += lda; 949 } 950 PetscCall(MatDenseRestoreArrayRead(a->A, (const PetscScalar **)&v)); 951 PetscCall(PetscFree(rwork)); 952 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 953 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 954 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 955 *matout = B; 956 } else { 957 PetscCall(MatHeaderMerge(A, &B)); 958 } 959 PetscFunctionReturn(0); 960 } 961 962 static PetscErrorCode MatDuplicate_MPIDense(Mat, MatDuplicateOption, Mat *); 963 PETSC_INTERN PetscErrorCode MatScale_MPIDense(Mat, PetscScalar); 964 965 PetscErrorCode MatSetUp_MPIDense(Mat A) 966 { 967 PetscFunctionBegin; 968 PetscCall(PetscLayoutSetUp(A->rmap)); 969 PetscCall(PetscLayoutSetUp(A->cmap)); 970 if (!A->preallocated) PetscCall(MatMPIDenseSetPreallocation(A, NULL)); 971 PetscFunctionReturn(0); 972 } 973 974 PetscErrorCode MatAXPY_MPIDense(Mat Y, PetscScalar alpha, Mat X, MatStructure str) 975 { 976 Mat_MPIDense *A = (Mat_MPIDense *)Y->data, *B = (Mat_MPIDense *)X->data; 977 978 PetscFunctionBegin; 979 PetscCall(MatAXPY(A->A, alpha, B->A, str)); 980 PetscFunctionReturn(0); 981 } 982 983 PetscErrorCode MatConjugate_MPIDense(Mat mat) 984 { 985 Mat_MPIDense *a = (Mat_MPIDense *)mat->data; 986 987 PetscFunctionBegin; 988 PetscCall(MatConjugate(a->A)); 989 PetscFunctionReturn(0); 990 } 991 992 PetscErrorCode MatRealPart_MPIDense(Mat A) 993 { 994 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 995 996 PetscFunctionBegin; 997 PetscCall(MatRealPart(a->A)); 998 PetscFunctionReturn(0); 999 } 1000 1001 PetscErrorCode MatImaginaryPart_MPIDense(Mat A) 1002 { 1003 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1004 1005 PetscFunctionBegin; 1006 PetscCall(MatImaginaryPart(a->A)); 1007 PetscFunctionReturn(0); 1008 } 1009 1010 static PetscErrorCode MatGetColumnVector_MPIDense(Mat A, Vec v, PetscInt col) 1011 { 1012 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1013 1014 PetscFunctionBegin; 1015 PetscCheck(a->A, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Missing local matrix"); 1016 PetscCheck(a->A->ops->getcolumnvector, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Missing get column operation"); 1017 PetscCall((*a->A->ops->getcolumnvector)(a->A, v, col)); 1018 PetscFunctionReturn(0); 1019 } 1020 1021 PETSC_INTERN PetscErrorCode MatGetColumnReductions_SeqDense(Mat, PetscInt, PetscReal *); 1022 1023 PetscErrorCode MatGetColumnReductions_MPIDense(Mat A, PetscInt type, PetscReal *reductions) 1024 { 1025 PetscInt i, m, n; 1026 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1027 PetscReal *work; 1028 1029 PetscFunctionBegin; 1030 PetscCall(MatGetSize(A, &m, &n)); 1031 PetscCall(PetscMalloc1(n, &work)); 1032 if (type == REDUCTION_MEAN_REALPART) { 1033 PetscCall(MatGetColumnReductions_SeqDense(a->A, (PetscInt)REDUCTION_SUM_REALPART, work)); 1034 } else if (type == REDUCTION_MEAN_IMAGINARYPART) { 1035 PetscCall(MatGetColumnReductions_SeqDense(a->A, (PetscInt)REDUCTION_SUM_IMAGINARYPART, work)); 1036 } else { 1037 PetscCall(MatGetColumnReductions_SeqDense(a->A, type, work)); 1038 } 1039 if (type == NORM_2) { 1040 for (i = 0; i < n; i++) work[i] *= work[i]; 1041 } 1042 if (type == NORM_INFINITY) { 1043 PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_MAX, A->hdr.comm)); 1044 } else { 1045 PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_SUM, A->hdr.comm)); 1046 } 1047 PetscCall(PetscFree(work)); 1048 if (type == NORM_2) { 1049 for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]); 1050 } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 1051 for (i = 0; i < n; i++) reductions[i] /= m; 1052 } 1053 PetscFunctionReturn(0); 1054 } 1055 1056 #if defined(PETSC_HAVE_CUDA) 1057 PetscErrorCode MatShift_MPIDenseCUDA(Mat A, PetscScalar alpha) 1058 { 1059 PetscScalar *da; 1060 PetscInt lda; 1061 1062 PetscFunctionBegin; 1063 PetscCall(MatDenseCUDAGetArray(A, &da)); 1064 PetscCall(MatDenseGetLDA(A, &lda)); 1065 PetscCall(PetscInfo(A, "Performing Shift on backend\n")); 1066 PetscCall(MatShift_DenseCUDA_Private(da, alpha, lda, A->rmap->rstart, A->rmap->rend, A->cmap->N)); 1067 PetscCall(MatDenseCUDARestoreArray(A, &da)); 1068 PetscFunctionReturn(0); 1069 } 1070 1071 static PetscErrorCode MatDenseGetColumnVec_MPIDenseCUDA(Mat A, PetscInt col, Vec *v) 1072 { 1073 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1074 PetscInt lda; 1075 1076 PetscFunctionBegin; 1077 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1078 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1079 if (!a->cvec) { PetscCall(VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, &a->cvec)); } 1080 a->vecinuse = col + 1; 1081 PetscCall(MatDenseGetLDA(a->A, &lda)); 1082 PetscCall(MatDenseCUDAGetArray(a->A, (PetscScalar **)&a->ptrinuse)); 1083 PetscCall(VecCUDAPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda)); 1084 *v = a->cvec; 1085 PetscFunctionReturn(0); 1086 } 1087 1088 static PetscErrorCode MatDenseRestoreColumnVec_MPIDenseCUDA(Mat A, PetscInt col, Vec *v) 1089 { 1090 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1091 1092 PetscFunctionBegin; 1093 PetscCheck(a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first"); 1094 PetscCheck(a->cvec, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing internal column vector"); 1095 a->vecinuse = 0; 1096 PetscCall(MatDenseCUDARestoreArray(a->A, (PetscScalar **)&a->ptrinuse)); 1097 PetscCall(VecCUDAResetArray(a->cvec)); 1098 if (v) *v = NULL; 1099 PetscFunctionReturn(0); 1100 } 1101 1102 static PetscErrorCode MatDenseGetColumnVecRead_MPIDenseCUDA(Mat A, PetscInt col, Vec *v) 1103 { 1104 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1105 PetscInt lda; 1106 1107 PetscFunctionBegin; 1108 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1109 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1110 if (!a->cvec) { PetscCall(VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, &a->cvec)); } 1111 a->vecinuse = col + 1; 1112 PetscCall(MatDenseGetLDA(a->A, &lda)); 1113 PetscCall(MatDenseCUDAGetArrayRead(a->A, &a->ptrinuse)); 1114 PetscCall(VecCUDAPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda)); 1115 PetscCall(VecLockReadPush(a->cvec)); 1116 *v = a->cvec; 1117 PetscFunctionReturn(0); 1118 } 1119 1120 static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDenseCUDA(Mat A, PetscInt col, Vec *v) 1121 { 1122 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1123 1124 PetscFunctionBegin; 1125 PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first"); 1126 PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector"); 1127 a->vecinuse = 0; 1128 PetscCall(MatDenseCUDARestoreArrayRead(a->A, &a->ptrinuse)); 1129 PetscCall(VecLockReadPop(a->cvec)); 1130 PetscCall(VecCUDAResetArray(a->cvec)); 1131 if (v) *v = NULL; 1132 PetscFunctionReturn(0); 1133 } 1134 1135 static PetscErrorCode MatDenseGetColumnVecWrite_MPIDenseCUDA(Mat A, PetscInt col, Vec *v) 1136 { 1137 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1138 PetscInt lda; 1139 1140 PetscFunctionBegin; 1141 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1142 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1143 if (!a->cvec) { PetscCall(VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, &a->cvec)); } 1144 a->vecinuse = col + 1; 1145 PetscCall(MatDenseGetLDA(a->A, &lda)); 1146 PetscCall(MatDenseCUDAGetArrayWrite(a->A, (PetscScalar **)&a->ptrinuse)); 1147 PetscCall(VecCUDAPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda)); 1148 *v = a->cvec; 1149 PetscFunctionReturn(0); 1150 } 1151 1152 static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDenseCUDA(Mat A, PetscInt col, Vec *v) 1153 { 1154 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1155 1156 PetscFunctionBegin; 1157 PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first"); 1158 PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector"); 1159 a->vecinuse = 0; 1160 PetscCall(MatDenseCUDARestoreArrayWrite(a->A, (PetscScalar **)&a->ptrinuse)); 1161 PetscCall(VecCUDAResetArray(a->cvec)); 1162 if (v) *v = NULL; 1163 PetscFunctionReturn(0); 1164 } 1165 1166 static PetscErrorCode MatDenseCUDAPlaceArray_MPIDenseCUDA(Mat A, const PetscScalar *a) 1167 { 1168 Mat_MPIDense *l = (Mat_MPIDense *)A->data; 1169 1170 PetscFunctionBegin; 1171 PetscCheck(!l->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1172 PetscCheck(!l->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1173 PetscCall(MatDenseCUDAPlaceArray(l->A, a)); 1174 PetscFunctionReturn(0); 1175 } 1176 1177 static PetscErrorCode MatDenseCUDAResetArray_MPIDenseCUDA(Mat A) 1178 { 1179 Mat_MPIDense *l = (Mat_MPIDense *)A->data; 1180 1181 PetscFunctionBegin; 1182 PetscCheck(!l->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1183 PetscCheck(!l->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1184 PetscCall(MatDenseCUDAResetArray(l->A)); 1185 PetscFunctionReturn(0); 1186 } 1187 1188 static PetscErrorCode MatDenseCUDAReplaceArray_MPIDenseCUDA(Mat A, const PetscScalar *a) 1189 { 1190 Mat_MPIDense *l = (Mat_MPIDense *)A->data; 1191 1192 PetscFunctionBegin; 1193 PetscCheck(!l->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1194 PetscCheck(!l->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1195 PetscCall(MatDenseCUDAReplaceArray(l->A, a)); 1196 PetscFunctionReturn(0); 1197 } 1198 1199 static PetscErrorCode MatDenseCUDAGetArrayWrite_MPIDenseCUDA(Mat A, PetscScalar **a) 1200 { 1201 Mat_MPIDense *l = (Mat_MPIDense *)A->data; 1202 1203 PetscFunctionBegin; 1204 PetscCall(MatDenseCUDAGetArrayWrite(l->A, a)); 1205 PetscFunctionReturn(0); 1206 } 1207 1208 static PetscErrorCode MatDenseCUDARestoreArrayWrite_MPIDenseCUDA(Mat A, PetscScalar **a) 1209 { 1210 Mat_MPIDense *l = (Mat_MPIDense *)A->data; 1211 1212 PetscFunctionBegin; 1213 PetscCall(MatDenseCUDARestoreArrayWrite(l->A, a)); 1214 PetscFunctionReturn(0); 1215 } 1216 1217 static PetscErrorCode MatDenseCUDAGetArrayRead_MPIDenseCUDA(Mat A, const PetscScalar **a) 1218 { 1219 Mat_MPIDense *l = (Mat_MPIDense *)A->data; 1220 1221 PetscFunctionBegin; 1222 PetscCall(MatDenseCUDAGetArrayRead(l->A, a)); 1223 PetscFunctionReturn(0); 1224 } 1225 1226 static PetscErrorCode MatDenseCUDARestoreArrayRead_MPIDenseCUDA(Mat A, const PetscScalar **a) 1227 { 1228 Mat_MPIDense *l = (Mat_MPIDense *)A->data; 1229 1230 PetscFunctionBegin; 1231 PetscCall(MatDenseCUDARestoreArrayRead(l->A, a)); 1232 PetscFunctionReturn(0); 1233 } 1234 1235 static PetscErrorCode MatDenseCUDAGetArray_MPIDenseCUDA(Mat A, PetscScalar **a) 1236 { 1237 Mat_MPIDense *l = (Mat_MPIDense *)A->data; 1238 1239 PetscFunctionBegin; 1240 PetscCall(MatDenseCUDAGetArray(l->A, a)); 1241 PetscFunctionReturn(0); 1242 } 1243 1244 static PetscErrorCode MatDenseCUDARestoreArray_MPIDenseCUDA(Mat A, PetscScalar **a) 1245 { 1246 Mat_MPIDense *l = (Mat_MPIDense *)A->data; 1247 1248 PetscFunctionBegin; 1249 PetscCall(MatDenseCUDARestoreArray(l->A, a)); 1250 PetscFunctionReturn(0); 1251 } 1252 1253 static PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat, PetscInt, Vec *); 1254 static PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat, PetscInt, Vec *); 1255 static PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat, PetscInt, Vec *); 1256 static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat, PetscInt, Vec *); 1257 static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat, PetscInt, Vec *); 1258 static PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat, PetscInt, Vec *); 1259 static PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat, Mat *); 1260 1261 static PetscErrorCode MatBindToCPU_MPIDenseCUDA(Mat mat, PetscBool bind) 1262 { 1263 Mat_MPIDense *d = (Mat_MPIDense *)mat->data; 1264 1265 PetscFunctionBegin; 1266 PetscCheck(!d->vecinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1267 PetscCheck(!d->matinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1268 if (d->A) PetscCall(MatBindToCPU(d->A, bind)); 1269 mat->boundtocpu = bind; 1270 if (!bind) { 1271 PetscBool iscuda; 1272 1273 PetscCall(PetscFree(mat->defaultrandtype)); 1274 PetscCall(PetscStrallocpy(PETSCCURAND, &mat->defaultrandtype)); 1275 PetscCall(PetscObjectTypeCompare((PetscObject)d->cvec, VECMPICUDA, &iscuda)); 1276 if (!iscuda) PetscCall(VecDestroy(&d->cvec)); 1277 PetscCall(PetscObjectTypeCompare((PetscObject)d->cmat, MATMPIDENSECUDA, &iscuda)); 1278 if (!iscuda) PetscCall(MatDestroy(&d->cmat)); 1279 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", MatDenseGetColumnVec_MPIDenseCUDA)); 1280 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", MatDenseRestoreColumnVec_MPIDenseCUDA)); 1281 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", MatDenseGetColumnVecRead_MPIDenseCUDA)); 1282 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", MatDenseRestoreColumnVecRead_MPIDenseCUDA)); 1283 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", MatDenseGetColumnVecWrite_MPIDenseCUDA)); 1284 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", MatDenseRestoreColumnVecWrite_MPIDenseCUDA)); 1285 mat->ops->shift = MatShift_MPIDenseCUDA; 1286 } else { 1287 PetscCall(PetscFree(mat->defaultrandtype)); 1288 PetscCall(PetscStrallocpy(PETSCRANDER48, &mat->defaultrandtype)); 1289 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", MatDenseGetColumnVec_MPIDense)); 1290 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", MatDenseRestoreColumnVec_MPIDense)); 1291 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", MatDenseGetColumnVecRead_MPIDense)); 1292 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", MatDenseRestoreColumnVecRead_MPIDense)); 1293 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", MatDenseGetColumnVecWrite_MPIDense)); 1294 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", MatDenseRestoreColumnVecWrite_MPIDense)); 1295 mat->ops->shift = MatShift_MPIDense; 1296 } 1297 if (d->cmat) PetscCall(MatBindToCPU(d->cmat, bind)); 1298 PetscFunctionReturn(0); 1299 } 1300 1301 PetscErrorCode MatMPIDenseCUDASetPreallocation(Mat A, PetscScalar *d_data) 1302 { 1303 Mat_MPIDense *d = (Mat_MPIDense *)A->data; 1304 PetscBool iscuda; 1305 1306 PetscFunctionBegin; 1307 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 1308 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIDENSECUDA, &iscuda)); 1309 if (!iscuda) PetscFunctionReturn(0); 1310 PetscCall(PetscLayoutSetUp(A->rmap)); 1311 PetscCall(PetscLayoutSetUp(A->cmap)); 1312 if (!d->A) { 1313 PetscCall(MatCreate(PETSC_COMM_SELF, &d->A)); 1314 PetscCall(MatSetSizes(d->A, A->rmap->n, A->cmap->N, A->rmap->n, A->cmap->N)); 1315 } 1316 PetscCall(MatSetType(d->A, MATSEQDENSECUDA)); 1317 PetscCall(MatSeqDenseCUDASetPreallocation(d->A, d_data)); 1318 A->preallocated = PETSC_TRUE; 1319 A->assembled = PETSC_TRUE; 1320 PetscFunctionReturn(0); 1321 } 1322 #endif 1323 1324 static PetscErrorCode MatSetRandom_MPIDense(Mat x, PetscRandom rctx) 1325 { 1326 Mat_MPIDense *d = (Mat_MPIDense *)x->data; 1327 1328 PetscFunctionBegin; 1329 PetscCall(MatSetRandom(d->A, rctx)); 1330 #if defined(PETSC_HAVE_DEVICE) 1331 x->offloadmask = d->A->offloadmask; 1332 #endif 1333 PetscFunctionReturn(0); 1334 } 1335 1336 static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A, PetscBool *missing, PetscInt *d) 1337 { 1338 PetscFunctionBegin; 1339 *missing = PETSC_FALSE; 1340 PetscFunctionReturn(0); 1341 } 1342 1343 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat, Mat, PetscReal, Mat); 1344 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat, Mat, Mat); 1345 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat, Mat, PetscReal, Mat); 1346 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat, Mat, Mat); 1347 static PetscErrorCode MatEqual_MPIDense(Mat, Mat, PetscBool *); 1348 static PetscErrorCode MatLoad_MPIDense(Mat, PetscViewer); 1349 1350 /* -------------------------------------------------------------------*/ 1351 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense, 1352 MatGetRow_MPIDense, 1353 MatRestoreRow_MPIDense, 1354 MatMult_MPIDense, 1355 /* 4*/ MatMultAdd_MPIDense, 1356 MatMultTranspose_MPIDense, 1357 MatMultTransposeAdd_MPIDense, 1358 NULL, 1359 NULL, 1360 NULL, 1361 /* 10*/ NULL, 1362 NULL, 1363 NULL, 1364 NULL, 1365 MatTranspose_MPIDense, 1366 /* 15*/ MatGetInfo_MPIDense, 1367 MatEqual_MPIDense, 1368 MatGetDiagonal_MPIDense, 1369 MatDiagonalScale_MPIDense, 1370 MatNorm_MPIDense, 1371 /* 20*/ MatAssemblyBegin_MPIDense, 1372 MatAssemblyEnd_MPIDense, 1373 MatSetOption_MPIDense, 1374 MatZeroEntries_MPIDense, 1375 /* 24*/ MatZeroRows_MPIDense, 1376 NULL, 1377 NULL, 1378 NULL, 1379 NULL, 1380 /* 29*/ MatSetUp_MPIDense, 1381 NULL, 1382 NULL, 1383 MatGetDiagonalBlock_MPIDense, 1384 NULL, 1385 /* 34*/ MatDuplicate_MPIDense, 1386 NULL, 1387 NULL, 1388 NULL, 1389 NULL, 1390 /* 39*/ MatAXPY_MPIDense, 1391 MatCreateSubMatrices_MPIDense, 1392 NULL, 1393 MatGetValues_MPIDense, 1394 MatCopy_MPIDense, 1395 /* 44*/ NULL, 1396 MatScale_MPIDense, 1397 MatShift_MPIDense, 1398 NULL, 1399 NULL, 1400 /* 49*/ MatSetRandom_MPIDense, 1401 NULL, 1402 NULL, 1403 NULL, 1404 NULL, 1405 /* 54*/ NULL, 1406 NULL, 1407 NULL, 1408 NULL, 1409 NULL, 1410 /* 59*/ MatCreateSubMatrix_MPIDense, 1411 MatDestroy_MPIDense, 1412 MatView_MPIDense, 1413 NULL, 1414 NULL, 1415 /* 64*/ NULL, 1416 NULL, 1417 NULL, 1418 NULL, 1419 NULL, 1420 /* 69*/ NULL, 1421 NULL, 1422 NULL, 1423 NULL, 1424 NULL, 1425 /* 74*/ NULL, 1426 NULL, 1427 NULL, 1428 NULL, 1429 NULL, 1430 /* 79*/ NULL, 1431 NULL, 1432 NULL, 1433 NULL, 1434 /* 83*/ MatLoad_MPIDense, 1435 NULL, 1436 NULL, 1437 NULL, 1438 NULL, 1439 NULL, 1440 /* 89*/ NULL, 1441 NULL, 1442 NULL, 1443 NULL, 1444 NULL, 1445 /* 94*/ NULL, 1446 NULL, 1447 MatMatTransposeMultSymbolic_MPIDense_MPIDense, 1448 MatMatTransposeMultNumeric_MPIDense_MPIDense, 1449 NULL, 1450 /* 99*/ MatProductSetFromOptions_MPIDense, 1451 NULL, 1452 NULL, 1453 MatConjugate_MPIDense, 1454 NULL, 1455 /*104*/ NULL, 1456 MatRealPart_MPIDense, 1457 MatImaginaryPart_MPIDense, 1458 NULL, 1459 NULL, 1460 /*109*/ NULL, 1461 NULL, 1462 NULL, 1463 MatGetColumnVector_MPIDense, 1464 MatMissingDiagonal_MPIDense, 1465 /*114*/ NULL, 1466 NULL, 1467 NULL, 1468 NULL, 1469 NULL, 1470 /*119*/ NULL, 1471 NULL, 1472 NULL, 1473 NULL, 1474 NULL, 1475 /*124*/ NULL, 1476 MatGetColumnReductions_MPIDense, 1477 NULL, 1478 NULL, 1479 NULL, 1480 /*129*/ NULL, 1481 NULL, 1482 MatTransposeMatMultSymbolic_MPIDense_MPIDense, 1483 MatTransposeMatMultNumeric_MPIDense_MPIDense, 1484 NULL, 1485 /*134*/ NULL, 1486 NULL, 1487 NULL, 1488 NULL, 1489 NULL, 1490 /*139*/ NULL, 1491 NULL, 1492 NULL, 1493 NULL, 1494 NULL, 1495 MatCreateMPIMatConcatenateSeqMat_MPIDense, 1496 /*145*/ NULL, 1497 NULL, 1498 NULL, 1499 NULL, 1500 NULL, 1501 /*150*/ NULL}; 1502 1503 PetscErrorCode MatMPIDenseSetPreallocation_MPIDense(Mat mat, PetscScalar *data) 1504 { 1505 Mat_MPIDense *a = (Mat_MPIDense *)mat->data; 1506 PetscBool iscuda; 1507 1508 PetscFunctionBegin; 1509 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1510 PetscCall(PetscLayoutSetUp(mat->rmap)); 1511 PetscCall(PetscLayoutSetUp(mat->cmap)); 1512 if (!a->A) { 1513 PetscCall(MatCreate(PETSC_COMM_SELF, &a->A)); 1514 PetscCall(MatSetSizes(a->A, mat->rmap->n, mat->cmap->N, mat->rmap->n, mat->cmap->N)); 1515 } 1516 PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPIDENSECUDA, &iscuda)); 1517 PetscCall(MatSetType(a->A, iscuda ? MATSEQDENSECUDA : MATSEQDENSE)); 1518 PetscCall(MatSeqDenseSetPreallocation(a->A, data)); 1519 #if defined(PETSC_HAVE_DEVICE) 1520 mat->offloadmask = a->A->offloadmask; 1521 #endif 1522 mat->preallocated = PETSC_TRUE; 1523 mat->assembled = PETSC_TRUE; 1524 PetscFunctionReturn(0); 1525 } 1526 1527 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIDense(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 1528 { 1529 Mat B, C; 1530 1531 PetscFunctionBegin; 1532 PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &C)); 1533 PetscCall(MatConvert_SeqAIJ_SeqDense(C, MATSEQDENSE, MAT_INITIAL_MATRIX, &B)); 1534 PetscCall(MatDestroy(&C)); 1535 if (reuse == MAT_REUSE_MATRIX) { 1536 C = *newmat; 1537 } else C = NULL; 1538 PetscCall(MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A), B, A->cmap->n, !C ? MAT_INITIAL_MATRIX : MAT_REUSE_MATRIX, &C)); 1539 PetscCall(MatDestroy(&B)); 1540 if (reuse == MAT_INPLACE_MATRIX) { 1541 PetscCall(MatHeaderReplace(A, &C)); 1542 } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C; 1543 PetscFunctionReturn(0); 1544 } 1545 1546 PetscErrorCode MatConvert_MPIDense_MPIAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 1547 { 1548 Mat B, C; 1549 1550 PetscFunctionBegin; 1551 PetscCall(MatDenseGetLocalMatrix(A, &C)); 1552 PetscCall(MatConvert_SeqDense_SeqAIJ(C, MATSEQAIJ, MAT_INITIAL_MATRIX, &B)); 1553 if (reuse == MAT_REUSE_MATRIX) { 1554 C = *newmat; 1555 } else C = NULL; 1556 PetscCall(MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A), B, A->cmap->n, !C ? MAT_INITIAL_MATRIX : MAT_REUSE_MATRIX, &C)); 1557 PetscCall(MatDestroy(&B)); 1558 if (reuse == MAT_INPLACE_MATRIX) { 1559 PetscCall(MatHeaderReplace(A, &C)); 1560 } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C; 1561 PetscFunctionReturn(0); 1562 } 1563 1564 #if defined(PETSC_HAVE_ELEMENTAL) 1565 PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 1566 { 1567 Mat mat_elemental; 1568 PetscScalar *v; 1569 PetscInt m = A->rmap->n, N = A->cmap->N, rstart = A->rmap->rstart, i, *rows, *cols; 1570 1571 PetscFunctionBegin; 1572 if (reuse == MAT_REUSE_MATRIX) { 1573 mat_elemental = *newmat; 1574 PetscCall(MatZeroEntries(*newmat)); 1575 } else { 1576 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental)); 1577 PetscCall(MatSetSizes(mat_elemental, PETSC_DECIDE, PETSC_DECIDE, A->rmap->N, A->cmap->N)); 1578 PetscCall(MatSetType(mat_elemental, MATELEMENTAL)); 1579 PetscCall(MatSetUp(mat_elemental)); 1580 PetscCall(MatSetOption(mat_elemental, MAT_ROW_ORIENTED, PETSC_FALSE)); 1581 } 1582 1583 PetscCall(PetscMalloc2(m, &rows, N, &cols)); 1584 for (i = 0; i < N; i++) cols[i] = i; 1585 for (i = 0; i < m; i++) rows[i] = rstart + i; 1586 1587 /* PETSc-Elemental interface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */ 1588 PetscCall(MatDenseGetArray(A, &v)); 1589 PetscCall(MatSetValues(mat_elemental, m, rows, N, cols, v, ADD_VALUES)); 1590 PetscCall(MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY)); 1591 PetscCall(MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY)); 1592 PetscCall(MatDenseRestoreArray(A, &v)); 1593 PetscCall(PetscFree2(rows, cols)); 1594 1595 if (reuse == MAT_INPLACE_MATRIX) { 1596 PetscCall(MatHeaderReplace(A, &mat_elemental)); 1597 } else { 1598 *newmat = mat_elemental; 1599 } 1600 PetscFunctionReturn(0); 1601 } 1602 #endif 1603 1604 static PetscErrorCode MatDenseGetColumn_MPIDense(Mat A, PetscInt col, PetscScalar **vals) 1605 { 1606 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 1607 1608 PetscFunctionBegin; 1609 PetscCall(MatDenseGetColumn(mat->A, col, vals)); 1610 PetscFunctionReturn(0); 1611 } 1612 1613 static PetscErrorCode MatDenseRestoreColumn_MPIDense(Mat A, PetscScalar **vals) 1614 { 1615 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 1616 1617 PetscFunctionBegin; 1618 PetscCall(MatDenseRestoreColumn(mat->A, vals)); 1619 PetscFunctionReturn(0); 1620 } 1621 1622 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIDense(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat) 1623 { 1624 Mat_MPIDense *mat; 1625 PetscInt m, nloc, N; 1626 1627 PetscFunctionBegin; 1628 PetscCall(MatGetSize(inmat, &m, &N)); 1629 PetscCall(MatGetLocalSize(inmat, NULL, &nloc)); 1630 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 1631 PetscInt sum; 1632 1633 if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnership(comm, &n, &N)); 1634 /* Check sum(n) = N */ 1635 PetscCall(MPIU_Allreduce(&n, &sum, 1, MPIU_INT, MPI_SUM, comm)); 1636 PetscCheck(sum == N, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT, sum, N); 1637 1638 PetscCall(MatCreateDense(comm, m, n, PETSC_DETERMINE, N, NULL, outmat)); 1639 PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 1640 } 1641 1642 /* numeric phase */ 1643 mat = (Mat_MPIDense *)(*outmat)->data; 1644 PetscCall(MatCopy(inmat, mat->A, SAME_NONZERO_PATTERN)); 1645 PetscFunctionReturn(0); 1646 } 1647 1648 #if defined(PETSC_HAVE_CUDA) 1649 PetscErrorCode MatConvert_MPIDenseCUDA_MPIDense(Mat M, MatType type, MatReuse reuse, Mat *newmat) 1650 { 1651 Mat B; 1652 Mat_MPIDense *m; 1653 1654 PetscFunctionBegin; 1655 if (reuse == MAT_INITIAL_MATRIX) { 1656 PetscCall(MatDuplicate(M, MAT_COPY_VALUES, newmat)); 1657 } else if (reuse == MAT_REUSE_MATRIX) { 1658 PetscCall(MatCopy(M, *newmat, SAME_NONZERO_PATTERN)); 1659 } 1660 1661 B = *newmat; 1662 PetscCall(MatBindToCPU_MPIDenseCUDA(B, PETSC_TRUE)); 1663 PetscCall(PetscFree(B->defaultvectype)); 1664 PetscCall(PetscStrallocpy(VECSTANDARD, &B->defaultvectype)); 1665 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIDENSE)); 1666 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpidensecuda_mpidense_C", NULL)); 1667 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpidensecuda_C", NULL)); 1668 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C", NULL)); 1669 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpidensecuda_mpiaij_C", NULL)); 1670 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C", NULL)); 1671 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAGetArray_C", NULL)); 1672 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAGetArrayRead_C", NULL)); 1673 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAGetArrayWrite_C", NULL)); 1674 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDARestoreArray_C", NULL)); 1675 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDARestoreArrayRead_C", NULL)); 1676 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDARestoreArrayWrite_C", NULL)); 1677 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAPlaceArray_C", NULL)); 1678 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAResetArray_C", NULL)); 1679 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAReplaceArray_C", NULL)); 1680 m = (Mat_MPIDense *)(B)->data; 1681 if (m->A) PetscCall(MatConvert(m->A, MATSEQDENSE, MAT_INPLACE_MATRIX, &m->A)); 1682 B->ops->bindtocpu = NULL; 1683 B->offloadmask = PETSC_OFFLOAD_CPU; 1684 PetscFunctionReturn(0); 1685 } 1686 1687 PetscErrorCode MatConvert_MPIDense_MPIDenseCUDA(Mat M, MatType type, MatReuse reuse, Mat *newmat) 1688 { 1689 Mat B; 1690 Mat_MPIDense *m; 1691 1692 PetscFunctionBegin; 1693 if (reuse == MAT_INITIAL_MATRIX) { 1694 PetscCall(MatDuplicate(M, MAT_COPY_VALUES, newmat)); 1695 } else if (reuse == MAT_REUSE_MATRIX) { 1696 PetscCall(MatCopy(M, *newmat, SAME_NONZERO_PATTERN)); 1697 } 1698 1699 B = *newmat; 1700 PetscCall(PetscFree(B->defaultvectype)); 1701 PetscCall(PetscStrallocpy(VECCUDA, &B->defaultvectype)); 1702 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIDENSECUDA)); 1703 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpidensecuda_mpidense_C", MatConvert_MPIDenseCUDA_MPIDense)); 1704 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpidensecuda_C", MatProductSetFromOptions_MPIAIJ_MPIDense)); 1705 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C", MatProductSetFromOptions_MPIAIJ_MPIDense)); 1706 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpidensecuda_mpiaij_C", MatProductSetFromOptions_MPIDense_MPIAIJ)); 1707 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C", MatProductSetFromOptions_MPIDense_MPIAIJ)); 1708 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAGetArray_C", MatDenseCUDAGetArray_MPIDenseCUDA)); 1709 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAGetArrayRead_C", MatDenseCUDAGetArrayRead_MPIDenseCUDA)); 1710 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAGetArrayWrite_C", MatDenseCUDAGetArrayWrite_MPIDenseCUDA)); 1711 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDARestoreArray_C", MatDenseCUDARestoreArray_MPIDenseCUDA)); 1712 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDARestoreArrayRead_C", MatDenseCUDARestoreArrayRead_MPIDenseCUDA)); 1713 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDARestoreArrayWrite_C", MatDenseCUDARestoreArrayWrite_MPIDenseCUDA)); 1714 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAPlaceArray_C", MatDenseCUDAPlaceArray_MPIDenseCUDA)); 1715 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAResetArray_C", MatDenseCUDAResetArray_MPIDenseCUDA)); 1716 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAReplaceArray_C", MatDenseCUDAReplaceArray_MPIDenseCUDA)); 1717 m = (Mat_MPIDense *)(B->data); 1718 if (m->A) { 1719 PetscCall(MatConvert(m->A, MATSEQDENSECUDA, MAT_INPLACE_MATRIX, &m->A)); 1720 B->offloadmask = PETSC_OFFLOAD_BOTH; 1721 } else { 1722 B->offloadmask = PETSC_OFFLOAD_UNALLOCATED; 1723 } 1724 PetscCall(MatBindToCPU_MPIDenseCUDA(B, PETSC_FALSE)); 1725 1726 B->ops->bindtocpu = MatBindToCPU_MPIDenseCUDA; 1727 PetscFunctionReturn(0); 1728 } 1729 #endif 1730 1731 PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat A, PetscInt col, Vec *v) 1732 { 1733 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1734 PetscInt lda; 1735 1736 PetscFunctionBegin; 1737 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1738 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1739 if (!a->cvec) PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, &a->cvec)); 1740 a->vecinuse = col + 1; 1741 PetscCall(MatDenseGetLDA(a->A, &lda)); 1742 PetscCall(MatDenseGetArray(a->A, (PetscScalar **)&a->ptrinuse)); 1743 PetscCall(VecPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda)); 1744 *v = a->cvec; 1745 PetscFunctionReturn(0); 1746 } 1747 1748 PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat A, PetscInt col, Vec *v) 1749 { 1750 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1751 1752 PetscFunctionBegin; 1753 PetscCheck(a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first"); 1754 PetscCheck(a->cvec, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing internal column vector"); 1755 a->vecinuse = 0; 1756 PetscCall(MatDenseRestoreArray(a->A, (PetscScalar **)&a->ptrinuse)); 1757 PetscCall(VecResetArray(a->cvec)); 1758 if (v) *v = NULL; 1759 PetscFunctionReturn(0); 1760 } 1761 1762 PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat A, PetscInt col, Vec *v) 1763 { 1764 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1765 PetscInt lda; 1766 1767 PetscFunctionBegin; 1768 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1769 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1770 if (!a->cvec) PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, &a->cvec)); 1771 a->vecinuse = col + 1; 1772 PetscCall(MatDenseGetLDA(a->A, &lda)); 1773 PetscCall(MatDenseGetArrayRead(a->A, &a->ptrinuse)); 1774 PetscCall(VecPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda)); 1775 PetscCall(VecLockReadPush(a->cvec)); 1776 *v = a->cvec; 1777 PetscFunctionReturn(0); 1778 } 1779 1780 PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat A, PetscInt col, Vec *v) 1781 { 1782 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1783 1784 PetscFunctionBegin; 1785 PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first"); 1786 PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector"); 1787 a->vecinuse = 0; 1788 PetscCall(MatDenseRestoreArrayRead(a->A, &a->ptrinuse)); 1789 PetscCall(VecLockReadPop(a->cvec)); 1790 PetscCall(VecResetArray(a->cvec)); 1791 if (v) *v = NULL; 1792 PetscFunctionReturn(0); 1793 } 1794 1795 PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat A, PetscInt col, Vec *v) 1796 { 1797 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1798 PetscInt lda; 1799 1800 PetscFunctionBegin; 1801 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1802 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1803 if (!a->cvec) PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, &a->cvec)); 1804 a->vecinuse = col + 1; 1805 PetscCall(MatDenseGetLDA(a->A, &lda)); 1806 PetscCall(MatDenseGetArrayWrite(a->A, (PetscScalar **)&a->ptrinuse)); 1807 PetscCall(VecPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda)); 1808 *v = a->cvec; 1809 PetscFunctionReturn(0); 1810 } 1811 1812 PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat A, PetscInt col, Vec *v) 1813 { 1814 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1815 1816 PetscFunctionBegin; 1817 PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first"); 1818 PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector"); 1819 a->vecinuse = 0; 1820 PetscCall(MatDenseRestoreArrayWrite(a->A, (PetscScalar **)&a->ptrinuse)); 1821 PetscCall(VecResetArray(a->cvec)); 1822 if (v) *v = NULL; 1823 PetscFunctionReturn(0); 1824 } 1825 1826 PetscErrorCode MatDenseGetSubMatrix_MPIDense(Mat A, PetscInt rbegin, PetscInt rend, PetscInt cbegin, PetscInt cend, Mat *v) 1827 { 1828 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1829 Mat_MPIDense *c; 1830 MPI_Comm comm; 1831 PetscInt pbegin, pend; 1832 1833 PetscFunctionBegin; 1834 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 1835 PetscCheck(!a->vecinuse, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1836 PetscCheck(!a->matinuse, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1837 pbegin = PetscMax(0, PetscMin(A->rmap->rend, rbegin) - A->rmap->rstart); 1838 pend = PetscMin(A->rmap->n, PetscMax(0, rend - A->rmap->rstart)); 1839 if (!a->cmat) { 1840 PetscCall(MatCreate(comm, &a->cmat)); 1841 PetscCall(MatSetType(a->cmat, ((PetscObject)A)->type_name)); 1842 if (rend - rbegin == A->rmap->N) PetscCall(PetscLayoutReference(A->rmap, &a->cmat->rmap)); 1843 else { 1844 PetscCall(PetscLayoutSetLocalSize(a->cmat->rmap, pend - pbegin)); 1845 PetscCall(PetscLayoutSetSize(a->cmat->rmap, rend - rbegin)); 1846 PetscCall(PetscLayoutSetUp(a->cmat->rmap)); 1847 } 1848 PetscCall(PetscLayoutSetSize(a->cmat->cmap, cend - cbegin)); 1849 PetscCall(PetscLayoutSetUp(a->cmat->cmap)); 1850 } else { 1851 PetscBool same = (PetscBool)(rend - rbegin == a->cmat->rmap->N); 1852 if (same && a->cmat->rmap->N != A->rmap->N) { 1853 same = (PetscBool)(pend - pbegin == a->cmat->rmap->n); 1854 PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &same, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A))); 1855 } 1856 if (!same) { 1857 PetscCall(PetscLayoutDestroy(&a->cmat->rmap)); 1858 PetscCall(PetscLayoutCreate(comm, &a->cmat->rmap)); 1859 PetscCall(PetscLayoutSetLocalSize(a->cmat->rmap, pend - pbegin)); 1860 PetscCall(PetscLayoutSetSize(a->cmat->rmap, rend - rbegin)); 1861 PetscCall(PetscLayoutSetUp(a->cmat->rmap)); 1862 } 1863 if (cend - cbegin != a->cmat->cmap->N) { 1864 PetscCall(PetscLayoutDestroy(&a->cmat->cmap)); 1865 PetscCall(PetscLayoutCreate(comm, &a->cmat->cmap)); 1866 PetscCall(PetscLayoutSetSize(a->cmat->cmap, cend - cbegin)); 1867 PetscCall(PetscLayoutSetUp(a->cmat->cmap)); 1868 } 1869 } 1870 c = (Mat_MPIDense *)a->cmat->data; 1871 PetscCheck(!c->A, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1872 PetscCall(MatDenseGetSubMatrix(a->A, pbegin, pend, cbegin, cend, &c->A)); 1873 1874 a->cmat->preallocated = PETSC_TRUE; 1875 a->cmat->assembled = PETSC_TRUE; 1876 #if defined(PETSC_HAVE_DEVICE) 1877 a->cmat->offloadmask = c->A->offloadmask; 1878 #endif 1879 a->matinuse = cbegin + 1; 1880 *v = a->cmat; 1881 PetscFunctionReturn(0); 1882 } 1883 1884 PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat A, Mat *v) 1885 { 1886 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1887 Mat_MPIDense *c; 1888 1889 PetscFunctionBegin; 1890 PetscCheck(a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetSubMatrix() first"); 1891 PetscCheck(a->cmat, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal matrix"); 1892 PetscCheck(*v == a->cmat, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Not the matrix obtained from MatDenseGetSubMatrix()"); 1893 a->matinuse = 0; 1894 c = (Mat_MPIDense *)a->cmat->data; 1895 PetscCall(MatDenseRestoreSubMatrix(a->A, &c->A)); 1896 if (v) *v = NULL; 1897 #if defined(PETSC_HAVE_DEVICE) 1898 A->offloadmask = a->A->offloadmask; 1899 #endif 1900 PetscFunctionReturn(0); 1901 } 1902 1903 /*MC 1904 MATMPIDENSE - MATMPIDENSE = "mpidense" - A matrix type to be used for distributed dense matrices. 1905 1906 Options Database Keys: 1907 . -mat_type mpidense - sets the matrix type to `MATMPIDENSE` during a call to `MatSetFromOptions()` 1908 1909 Level: beginner 1910 1911 .seealso: `MatCreateDense()`, `MATSEQDENSE`, `MATDENSE` 1912 M*/ 1913 PETSC_EXTERN PetscErrorCode MatCreate_MPIDense(Mat mat) 1914 { 1915 Mat_MPIDense *a; 1916 1917 PetscFunctionBegin; 1918 PetscCall(PetscNew(&a)); 1919 mat->data = (void *)a; 1920 PetscCall(PetscMemcpy(mat->ops, &MatOps_Values, sizeof(struct _MatOps))); 1921 1922 mat->insertmode = NOT_SET_VALUES; 1923 1924 /* build cache for off array entries formed */ 1925 a->donotstash = PETSC_FALSE; 1926 1927 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)mat), 1, &mat->stash)); 1928 1929 /* stuff used for matrix vector multiply */ 1930 a->lvec = NULL; 1931 a->Mvctx = NULL; 1932 a->roworiented = PETSC_TRUE; 1933 1934 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetLDA_C", MatDenseGetLDA_MPIDense)); 1935 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseSetLDA_C", MatDenseSetLDA_MPIDense)); 1936 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArray_C", MatDenseGetArray_MPIDense)); 1937 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArray_C", MatDenseRestoreArray_MPIDense)); 1938 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayRead_C", MatDenseGetArrayRead_MPIDense)); 1939 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayRead_C", MatDenseRestoreArrayRead_MPIDense)); 1940 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayWrite_C", MatDenseGetArrayWrite_MPIDense)); 1941 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayWrite_C", MatDenseRestoreArrayWrite_MPIDense)); 1942 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDensePlaceArray_C", MatDensePlaceArray_MPIDense)); 1943 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseResetArray_C", MatDenseResetArray_MPIDense)); 1944 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseReplaceArray_C", MatDenseReplaceArray_MPIDense)); 1945 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", MatDenseGetColumnVec_MPIDense)); 1946 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", MatDenseRestoreColumnVec_MPIDense)); 1947 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", MatDenseGetColumnVecRead_MPIDense)); 1948 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", MatDenseRestoreColumnVecRead_MPIDense)); 1949 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", MatDenseGetColumnVecWrite_MPIDense)); 1950 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", MatDenseRestoreColumnVecWrite_MPIDense)); 1951 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetSubMatrix_C", MatDenseGetSubMatrix_MPIDense)); 1952 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreSubMatrix_C", MatDenseRestoreSubMatrix_MPIDense)); 1953 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", MatConvert_MPIAIJ_MPIDense)); 1954 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpiaij_C", MatConvert_MPIDense_MPIAIJ)); 1955 #if defined(PETSC_HAVE_ELEMENTAL) 1956 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_elemental_C", MatConvert_MPIDense_Elemental)); 1957 #endif 1958 #if defined(PETSC_HAVE_SCALAPACK) 1959 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_scalapack_C", MatConvert_Dense_ScaLAPACK)); 1960 #endif 1961 #if defined(PETSC_HAVE_CUDA) 1962 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensecuda_C", MatConvert_MPIDense_MPIDenseCUDA)); 1963 #endif 1964 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIDenseSetPreallocation_C", MatMPIDenseSetPreallocation_MPIDense)); 1965 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense)); 1966 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaij_C", MatProductSetFromOptions_MPIDense_MPIAIJ)); 1967 #if defined(PETSC_HAVE_CUDA) 1968 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense)); 1969 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijcusparse_C", MatProductSetFromOptions_MPIDense_MPIAIJ)); 1970 #endif 1971 1972 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumn_C", MatDenseGetColumn_MPIDense)); 1973 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumn_C", MatDenseRestoreColumn_MPIDense)); 1974 PetscCall(PetscObjectChangeTypeName((PetscObject)mat, MATMPIDENSE)); 1975 PetscFunctionReturn(0); 1976 } 1977 1978 /*MC 1979 MATMPIDENSECUDA - MATMPIDENSECUDA = "mpidensecuda" - A matrix type to be used for distributed dense matrices on GPUs. 1980 1981 Options Database Keys: 1982 . -mat_type mpidensecuda - sets the matrix type to `MATMPIDENSECUDA` during a call to `MatSetFromOptions()` 1983 1984 Level: beginner 1985 1986 .seealso: `MATMPIDENSE`, `MATSEQDENSE`, `MATSEQDENSECUDA` 1987 M*/ 1988 #if defined(PETSC_HAVE_CUDA) 1989 #include <petsc/private/deviceimpl.h> 1990 PETSC_EXTERN PetscErrorCode MatCreate_MPIDenseCUDA(Mat B) 1991 { 1992 PetscFunctionBegin; 1993 PetscCall(PetscDeviceInitialize(PETSC_DEVICE_CUDA)); 1994 PetscCall(MatCreate_MPIDense(B)); 1995 PetscCall(MatConvert_MPIDense_MPIDenseCUDA(B, MATMPIDENSECUDA, MAT_INPLACE_MATRIX, &B)); 1996 PetscFunctionReturn(0); 1997 } 1998 #endif 1999 2000 /*MC 2001 MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices. 2002 2003 This matrix type is identical to `MATSEQDENSE` when constructed with a single process communicator, 2004 and `MATMPIDENSE` otherwise. 2005 2006 Options Database Keys: 2007 . -mat_type dense - sets the matrix type to `MATDENSE` during a call to `MatSetFromOptions()` 2008 2009 Level: beginner 2010 2011 .seealso: `MATSEQDENSE`, `MATMPIDENSE`, `MATDENSECUDA` 2012 M*/ 2013 2014 /*MC 2015 MATDENSECUDA - MATDENSECUDA = "densecuda" - A matrix type to be used for dense matrices on GPUs. 2016 2017 This matrix type is identical to `MATSEQDENSECUDA` when constructed with a single process communicator, 2018 and `MATMPIDENSECUDA` otherwise. 2019 2020 Options Database Keys: 2021 . -mat_type densecuda - sets the matrix type to `MATDENSECUDA` during a call to `MatSetFromOptions()` 2022 2023 Level: beginner 2024 2025 .seealso: `MATSEQDENSECUDA`, `MATMPIDENSECUDA`, `MATDENSE` 2026 M*/ 2027 2028 /*@C 2029 MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries 2030 2031 Collective 2032 2033 Input Parameters: 2034 . B - the matrix 2035 - data - optional location of matrix data. Set data=NULL for PETSc 2036 to control all matrix memory allocation. 2037 2038 Notes: 2039 The dense format is fully compatible with standard Fortran 77 2040 storage by columns. 2041 2042 The data input variable is intended primarily for Fortran programmers 2043 who wish to allocate their own matrix memory space. Most users should 2044 set data=NULL. 2045 2046 Level: intermediate 2047 2048 .seealso: `MATMPIDENSE`, `MatCreate()`, `MatCreateSeqDense()`, `MatSetValues()` 2049 @*/ 2050 PetscErrorCode MatMPIDenseSetPreallocation(Mat B, PetscScalar *data) 2051 { 2052 PetscFunctionBegin; 2053 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 2054 PetscTryMethod(B, "MatMPIDenseSetPreallocation_C", (Mat, PetscScalar *), (B, data)); 2055 PetscFunctionReturn(0); 2056 } 2057 2058 /*@ 2059 MatDensePlaceArray - Allows one to replace the array in a `MATDENSE` matrix with an 2060 array provided by the user. This is useful to avoid copying an array 2061 into a matrix 2062 2063 Not Collective 2064 2065 Input Parameters: 2066 + mat - the matrix 2067 - array - the array in column major order 2068 2069 Note: 2070 You can return to the original array with a call to `MatDenseResetArray()`. The user is responsible for freeing this array; it will not be 2071 freed when the matrix is destroyed. 2072 2073 Level: developer 2074 2075 .seealso: `MATDENSE`, `MatDenseGetArray()`, `MatDenseResetArray()`, `VecPlaceArray()`, `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`, `VecResetArray()`, 2076 `MatDenseReplaceArray()` 2077 @*/ 2078 PetscErrorCode MatDensePlaceArray(Mat mat, const PetscScalar *array) 2079 { 2080 PetscFunctionBegin; 2081 PetscValidHeaderSpecific(mat, MAT_CLASSID, 1); 2082 PetscUseMethod(mat, "MatDensePlaceArray_C", (Mat, const PetscScalar *), (mat, array)); 2083 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 2084 #if defined(PETSC_HAVE_DEVICE) 2085 mat->offloadmask = PETSC_OFFLOAD_CPU; 2086 #endif 2087 PetscFunctionReturn(0); 2088 } 2089 2090 /*@ 2091 MatDenseResetArray - Resets the matrix array to that it previously had before the call to `MatDensePlaceArray()` 2092 2093 Not Collective 2094 2095 Input Parameters: 2096 . mat - the matrix 2097 2098 Note: 2099 You can only call this after a call to `MatDensePlaceArray()` 2100 2101 Level: developer 2102 2103 .seealso: `MATDENSE`, `MatDenseGetArray()`, `MatDensePlaceArray()`, `VecPlaceArray()`, `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`, `VecResetArray()` 2104 @*/ 2105 PetscErrorCode MatDenseResetArray(Mat mat) 2106 { 2107 PetscFunctionBegin; 2108 PetscValidHeaderSpecific(mat, MAT_CLASSID, 1); 2109 PetscUseMethod(mat, "MatDenseResetArray_C", (Mat), (mat)); 2110 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 2111 PetscFunctionReturn(0); 2112 } 2113 2114 /*@ 2115 MatDenseReplaceArray - Allows one to replace the array in a dense matrix with an 2116 array provided by the user. This is useful to avoid copying an array 2117 into a matrix 2118 2119 Not Collective 2120 2121 Input Parameters: 2122 + mat - the matrix 2123 - array - the array in column major order 2124 2125 Note: 2126 The memory passed in MUST be obtained with `PetscMalloc()` and CANNOT be 2127 freed by the user. It will be freed when the matrix is destroyed. 2128 2129 Level: developer 2130 2131 .seealso: `MatDensePlaceArray()`, `MatDenseGetArray()`, `VecReplaceArray()` 2132 @*/ 2133 PetscErrorCode MatDenseReplaceArray(Mat mat, const PetscScalar *array) 2134 { 2135 PetscFunctionBegin; 2136 PetscValidHeaderSpecific(mat, MAT_CLASSID, 1); 2137 PetscUseMethod(mat, "MatDenseReplaceArray_C", (Mat, const PetscScalar *), (mat, array)); 2138 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 2139 #if defined(PETSC_HAVE_DEVICE) 2140 mat->offloadmask = PETSC_OFFLOAD_CPU; 2141 #endif 2142 PetscFunctionReturn(0); 2143 } 2144 2145 #if defined(PETSC_HAVE_CUDA) 2146 /*@C 2147 MatDenseCUDAPlaceArray - Allows one to replace the GPU array in a `MATDENSECUDA` matrix with an 2148 array provided by the user. This is useful to avoid copying an array 2149 into a matrix 2150 2151 Not Collective 2152 2153 Input Parameters: 2154 + mat - the matrix 2155 - array - the array in column major order 2156 2157 Note: 2158 You can return to the original array with a call to `MatDenseCUDAResetArray()`. The user is responsible for freeing this array; it will not be 2159 freed when the matrix is destroyed. The array must have been allocated with cudaMalloc(). 2160 2161 Level: developer 2162 2163 .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDAResetArray()`, `MatDenseCUDAReplaceArray()` 2164 @*/ 2165 PetscErrorCode MatDenseCUDAPlaceArray(Mat mat, const PetscScalar *array) 2166 { 2167 PetscFunctionBegin; 2168 PetscValidHeaderSpecific(mat, MAT_CLASSID, 1); 2169 PetscUseMethod(mat, "MatDenseCUDAPlaceArray_C", (Mat, const PetscScalar *), (mat, array)); 2170 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 2171 mat->offloadmask = PETSC_OFFLOAD_GPU; 2172 PetscFunctionReturn(0); 2173 } 2174 2175 /*@C 2176 MatDenseCUDAResetArray - Resets the matrix array to that it previously had before the call to `MatDenseCUDAPlaceArray()` 2177 2178 Not Collective 2179 2180 Input Parameters: 2181 . mat - the matrix 2182 2183 Note: 2184 You can only call this after a call to `MatDenseCUDAPlaceArray()` 2185 2186 Level: developer 2187 2188 .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDAPlaceArray()` 2189 @*/ 2190 PetscErrorCode MatDenseCUDAResetArray(Mat mat) 2191 { 2192 PetscFunctionBegin; 2193 PetscValidHeaderSpecific(mat, MAT_CLASSID, 1); 2194 PetscUseMethod(mat, "MatDenseCUDAResetArray_C", (Mat), (mat)); 2195 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 2196 PetscFunctionReturn(0); 2197 } 2198 2199 /*@C 2200 MatDenseCUDAReplaceArray - Allows one to replace the GPU array in a `MATDENSECUDA` matrix with an 2201 array provided by the user. This is useful to avoid copying an array 2202 into a matrix 2203 2204 Not Collective 2205 2206 Input Parameters: 2207 + mat - the matrix 2208 - array - the array in column major order 2209 2210 Note: 2211 This permanently replaces the GPU array and frees the memory associated with the old GPU array. 2212 The memory passed in CANNOT be freed by the user. It will be freed 2213 when the matrix is destroyed. The array should respect the matrix leading dimension. 2214 2215 Level: developer 2216 2217 .seealso: `MatDenseCUDAGetArray()`, `MatDenseCUDAPlaceArray()`, `MatDenseCUDAResetArray()` 2218 @*/ 2219 PetscErrorCode MatDenseCUDAReplaceArray(Mat mat, const PetscScalar *array) 2220 { 2221 PetscFunctionBegin; 2222 PetscValidHeaderSpecific(mat, MAT_CLASSID, 1); 2223 PetscUseMethod(mat, "MatDenseCUDAReplaceArray_C", (Mat, const PetscScalar *), (mat, array)); 2224 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 2225 mat->offloadmask = PETSC_OFFLOAD_GPU; 2226 PetscFunctionReturn(0); 2227 } 2228 2229 /*@C 2230 MatDenseCUDAGetArrayWrite - Provides write access to the CUDA buffer inside a `MATDENSECUDA` matrix. 2231 2232 Not Collective 2233 2234 Input Parameters: 2235 . A - the matrix 2236 2237 Output Parameters 2238 . array - the GPU array in column major order 2239 2240 Notes: 2241 The data on the GPU may not be updated due to operations done on the CPU. If you need updated data, use `MatDenseCUDAGetArray()`. 2242 2243 The array must be restored with `MatDenseCUDARestoreArrayWrite()` when no longer needed. 2244 2245 Level: developer 2246 2247 .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`, `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayRead()`, `MatDenseCUDARestoreArrayRead()` 2248 @*/ 2249 PetscErrorCode MatDenseCUDAGetArrayWrite(Mat A, PetscScalar **a) 2250 { 2251 PetscFunctionBegin; 2252 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 2253 PetscUseMethod(A, "MatDenseCUDAGetArrayWrite_C", (Mat, PetscScalar **), (A, a)); 2254 PetscCall(PetscObjectStateIncrease((PetscObject)A)); 2255 PetscFunctionReturn(0); 2256 } 2257 2258 /*@C 2259 MatDenseCUDARestoreArrayWrite - Restore write access to the CUDA buffer inside a `MATDENSECUDA` matrix previously obtained with `MatDenseCUDAGetArrayWrite()`. 2260 2261 Not Collective 2262 2263 Input Parameters: 2264 + A - the matrix 2265 - array - the GPU array in column major order 2266 2267 Level: developer 2268 2269 .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDARestoreArrayRead()`, `MatDenseCUDAGetArrayRead()` 2270 @*/ 2271 PetscErrorCode MatDenseCUDARestoreArrayWrite(Mat A, PetscScalar **a) 2272 { 2273 PetscFunctionBegin; 2274 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 2275 PetscUseMethod(A, "MatDenseCUDARestoreArrayWrite_C", (Mat, PetscScalar **), (A, a)); 2276 PetscCall(PetscObjectStateIncrease((PetscObject)A)); 2277 A->offloadmask = PETSC_OFFLOAD_GPU; 2278 PetscFunctionReturn(0); 2279 } 2280 2281 /*@C 2282 MatDenseCUDAGetArrayRead - Provides read-only access to the CUDA buffer inside a `MATDENSECUDA` matrix. The array must be restored with `MatDenseCUDARestoreArrayRead()` when no longer needed. 2283 2284 Not Collective 2285 2286 Input Parameters: 2287 . A - the matrix 2288 2289 Output Parameters 2290 . array - the GPU array in column major order 2291 2292 Note: 2293 Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use `MatDenseCUDAGetArrayWrite()`. 2294 2295 Level: developer 2296 2297 .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`, `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDARestoreArrayRead()` 2298 @*/ 2299 PetscErrorCode MatDenseCUDAGetArrayRead(Mat A, const PetscScalar **a) 2300 { 2301 PetscFunctionBegin; 2302 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 2303 PetscUseMethod(A, "MatDenseCUDAGetArrayRead_C", (Mat, const PetscScalar **), (A, a)); 2304 PetscFunctionReturn(0); 2305 } 2306 2307 /*@C 2308 MatDenseCUDARestoreArrayRead - Restore read-only access to the CUDA buffer inside a `MATDENSECUDA` matrix previously obtained with a call to `MatDenseCUDAGetArrayRead()`. 2309 2310 Not Collective 2311 2312 Input Parameters: 2313 + A - the matrix 2314 - array - the GPU array in column major order 2315 2316 Note: 2317 Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use `MatDenseCUDAGetArrayWrite()`. 2318 2319 Level: developer 2320 2321 .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`, `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDAGetArrayRead()` 2322 @*/ 2323 PetscErrorCode MatDenseCUDARestoreArrayRead(Mat A, const PetscScalar **a) 2324 { 2325 PetscFunctionBegin; 2326 PetscUseMethod(A, "MatDenseCUDARestoreArrayRead_C", (Mat, const PetscScalar **), (A, a)); 2327 PetscFunctionReturn(0); 2328 } 2329 2330 /*@C 2331 MatDenseCUDAGetArray - Provides access to the CUDA buffer inside a `MATDENSECUDA` matrix. The array must be restored with `MatDenseCUDARestoreArray()` when no longer needed. 2332 2333 Not Collective 2334 2335 Input Parameters: 2336 . A - the matrix 2337 2338 Output Parameters 2339 . array - the GPU array in column major order 2340 2341 Note: 2342 Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use `MatDenseCUDAGetArrayWrite()`. For read-only access, use `MatDenseCUDAGetArrayRead()`. 2343 2344 Level: developer 2345 2346 .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArrayRead()`, `MatDenseCUDARestoreArray()`, `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDARestoreArrayRead()` 2347 @*/ 2348 PetscErrorCode MatDenseCUDAGetArray(Mat A, PetscScalar **a) 2349 { 2350 PetscFunctionBegin; 2351 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 2352 PetscUseMethod(A, "MatDenseCUDAGetArray_C", (Mat, PetscScalar **), (A, a)); 2353 PetscCall(PetscObjectStateIncrease((PetscObject)A)); 2354 PetscFunctionReturn(0); 2355 } 2356 2357 /*@C 2358 MatDenseCUDARestoreArray - Restore access to the CUDA buffer inside a `MATDENSECUDA` matrix previously obtained with `MatDenseCUDAGetArray()`. 2359 2360 Not Collective 2361 2362 Input Parameters: 2363 + A - the matrix 2364 - array - the GPU array in column major order 2365 2366 Level: developer 2367 2368 .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDARestoreArrayRead()`, `MatDenseCUDAGetArrayRead()` 2369 @*/ 2370 PetscErrorCode MatDenseCUDARestoreArray(Mat A, PetscScalar **a) 2371 { 2372 PetscFunctionBegin; 2373 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 2374 PetscUseMethod(A, "MatDenseCUDARestoreArray_C", (Mat, PetscScalar **), (A, a)); 2375 PetscCall(PetscObjectStateIncrease((PetscObject)A)); 2376 A->offloadmask = PETSC_OFFLOAD_GPU; 2377 PetscFunctionReturn(0); 2378 } 2379 #endif 2380 2381 /*@C 2382 MatCreateDense - Creates a matrix in `MATDENSE` format. 2383 2384 Collective 2385 2386 Input Parameters: 2387 + comm - MPI communicator 2388 . m - number of local rows (or `PETSC_DECIDE` to have calculated if M is given) 2389 . n - number of local columns (or `PETSC_DECIDE` to have calculated if N is given) 2390 . M - number of global rows (or `PETSC_DECIDE` to have calculated if m is given) 2391 . N - number of global columns (or `PETSC_DECIDE` to have calculated if n is given) 2392 - data - optional location of matrix data. Set data to NULL (`PETSC_NULL_SCALAR` for Fortran users) for PETSc 2393 to control all matrix memory allocation. 2394 2395 Output Parameter: 2396 . A - the matrix 2397 2398 Notes: 2399 The dense format is fully compatible with standard Fortran 77 2400 storage by columns. 2401 2402 Although local portions of the matrix are stored in column-major 2403 order, the matrix is partitioned across MPI ranks by row. 2404 2405 The data input variable is intended primarily for Fortran programmers 2406 who wish to allocate their own matrix memory space. Most users should 2407 set data=NULL (`PETSC_NULL_SCALAR` for Fortran users). 2408 2409 The user MUST specify either the local or global matrix dimensions 2410 (possibly both). 2411 2412 Level: intermediate 2413 2414 .seealso: `MATDENSE`, `MatCreate()`, `MatCreateSeqDense()`, `MatSetValues()` 2415 @*/ 2416 PetscErrorCode MatCreateDense(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscScalar *data, Mat *A) 2417 { 2418 PetscFunctionBegin; 2419 PetscCall(MatCreate(comm, A)); 2420 PetscCall(MatSetSizes(*A, m, n, M, N)); 2421 PetscCall(MatSetType(*A, MATDENSE)); 2422 PetscCall(MatSeqDenseSetPreallocation(*A, data)); 2423 PetscCall(MatMPIDenseSetPreallocation(*A, data)); 2424 PetscFunctionReturn(0); 2425 } 2426 2427 #if defined(PETSC_HAVE_CUDA) 2428 /*@C 2429 MatCreateDenseCUDA - Creates a matrix in `MATDENSECUDA` format using CUDA. 2430 2431 Collective 2432 2433 Input Parameters: 2434 + comm - MPI communicator 2435 . m - number of local rows (or `PETSC_DECIDE` to have calculated if M is given) 2436 . n - number of local columns (or `PETSC_DECIDE` to have calculated if N is given) 2437 . M - number of global rows (or `PETSC_DECIDE` to have calculated if m is given) 2438 . N - number of global columns (or `PETSC_DECIDE` to have calculated if n is given) 2439 - data - optional location of GPU matrix data. Set data=NULL for PETSc 2440 to control matrix memory allocation. 2441 2442 Output Parameter: 2443 . A - the matrix 2444 2445 Level: intermediate 2446 2447 .seealso: `MATDENSECUDA`, `MatCreate()`, `MatCreateDense()` 2448 @*/ 2449 PetscErrorCode MatCreateDenseCUDA(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscScalar *data, Mat *A) 2450 { 2451 PetscFunctionBegin; 2452 PetscCall(MatCreate(comm, A)); 2453 PetscValidLogicalCollectiveBool(*A, !!data, 6); 2454 PetscCall(MatSetSizes(*A, m, n, M, N)); 2455 PetscCall(MatSetType(*A, MATDENSECUDA)); 2456 PetscCall(MatSeqDenseCUDASetPreallocation(*A, data)); 2457 PetscCall(MatMPIDenseCUDASetPreallocation(*A, data)); 2458 PetscFunctionReturn(0); 2459 } 2460 #endif 2461 2462 static PetscErrorCode MatDuplicate_MPIDense(Mat A, MatDuplicateOption cpvalues, Mat *newmat) 2463 { 2464 Mat mat; 2465 Mat_MPIDense *a, *oldmat = (Mat_MPIDense *)A->data; 2466 2467 PetscFunctionBegin; 2468 *newmat = NULL; 2469 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat)); 2470 PetscCall(MatSetSizes(mat, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 2471 PetscCall(MatSetType(mat, ((PetscObject)A)->type_name)); 2472 a = (Mat_MPIDense *)mat->data; 2473 2474 mat->factortype = A->factortype; 2475 mat->assembled = PETSC_TRUE; 2476 mat->preallocated = PETSC_TRUE; 2477 2478 mat->insertmode = NOT_SET_VALUES; 2479 a->donotstash = oldmat->donotstash; 2480 2481 PetscCall(PetscLayoutReference(A->rmap, &mat->rmap)); 2482 PetscCall(PetscLayoutReference(A->cmap, &mat->cmap)); 2483 2484 PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A)); 2485 2486 *newmat = mat; 2487 PetscFunctionReturn(0); 2488 } 2489 2490 PetscErrorCode MatLoad_MPIDense(Mat newMat, PetscViewer viewer) 2491 { 2492 PetscBool isbinary; 2493 #if defined(PETSC_HAVE_HDF5) 2494 PetscBool ishdf5; 2495 #endif 2496 2497 PetscFunctionBegin; 2498 PetscValidHeaderSpecific(newMat, MAT_CLASSID, 1); 2499 PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2); 2500 /* force binary viewer to load .info file if it has not yet done so */ 2501 PetscCall(PetscViewerSetUp(viewer)); 2502 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 2503 #if defined(PETSC_HAVE_HDF5) 2504 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5)); 2505 #endif 2506 if (isbinary) { 2507 PetscCall(MatLoad_Dense_Binary(newMat, viewer)); 2508 #if defined(PETSC_HAVE_HDF5) 2509 } else if (ishdf5) { 2510 PetscCall(MatLoad_Dense_HDF5(newMat, viewer)); 2511 #endif 2512 } else SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name); 2513 PetscFunctionReturn(0); 2514 } 2515 2516 static PetscErrorCode MatEqual_MPIDense(Mat A, Mat B, PetscBool *flag) 2517 { 2518 Mat_MPIDense *matB = (Mat_MPIDense *)B->data, *matA = (Mat_MPIDense *)A->data; 2519 Mat a, b; 2520 2521 PetscFunctionBegin; 2522 a = matA->A; 2523 b = matB->A; 2524 PetscCall(MatEqual(a, b, flag)); 2525 PetscCall(MPIU_Allreduce(MPI_IN_PLACE, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A))); 2526 PetscFunctionReturn(0); 2527 } 2528 2529 PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(void *data) 2530 { 2531 Mat_TransMatMultDense *atb = (Mat_TransMatMultDense *)data; 2532 2533 PetscFunctionBegin; 2534 PetscCall(PetscFree2(atb->sendbuf, atb->recvcounts)); 2535 PetscCall(MatDestroy(&atb->atb)); 2536 PetscCall(PetscFree(atb)); 2537 PetscFunctionReturn(0); 2538 } 2539 2540 PetscErrorCode MatDestroy_MatMatTransMult_MPIDense_MPIDense(void *data) 2541 { 2542 Mat_MatTransMultDense *abt = (Mat_MatTransMultDense *)data; 2543 2544 PetscFunctionBegin; 2545 PetscCall(PetscFree2(abt->buf[0], abt->buf[1])); 2546 PetscCall(PetscFree2(abt->recvcounts, abt->recvdispls)); 2547 PetscCall(PetscFree(abt)); 2548 PetscFunctionReturn(0); 2549 } 2550 2551 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C) 2552 { 2553 Mat_MPIDense *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data; 2554 Mat_TransMatMultDense *atb; 2555 MPI_Comm comm; 2556 PetscMPIInt size, *recvcounts; 2557 PetscScalar *carray, *sendbuf; 2558 const PetscScalar *atbarray; 2559 PetscInt i, cN = C->cmap->N, proc, k, j, lda; 2560 const PetscInt *ranges; 2561 2562 PetscFunctionBegin; 2563 MatCheckProduct(C, 3); 2564 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 2565 atb = (Mat_TransMatMultDense *)C->product->data; 2566 recvcounts = atb->recvcounts; 2567 sendbuf = atb->sendbuf; 2568 2569 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 2570 PetscCallMPI(MPI_Comm_size(comm, &size)); 2571 2572 /* compute atbarray = aseq^T * bseq */ 2573 PetscCall(MatTransposeMatMult(a->A, b->A, atb->atb ? MAT_REUSE_MATRIX : MAT_INITIAL_MATRIX, PETSC_DEFAULT, &atb->atb)); 2574 2575 PetscCall(MatGetOwnershipRanges(C, &ranges)); 2576 2577 /* arrange atbarray into sendbuf */ 2578 PetscCall(MatDenseGetArrayRead(atb->atb, &atbarray)); 2579 PetscCall(MatDenseGetLDA(atb->atb, &lda)); 2580 for (proc = 0, k = 0; proc < size; proc++) { 2581 for (j = 0; j < cN; j++) { 2582 for (i = ranges[proc]; i < ranges[proc + 1]; i++) sendbuf[k++] = atbarray[i + j * lda]; 2583 } 2584 } 2585 PetscCall(MatDenseRestoreArrayRead(atb->atb, &atbarray)); 2586 2587 /* sum all atbarray to local values of C */ 2588 PetscCall(MatDenseGetArrayWrite(c->A, &carray)); 2589 PetscCallMPI(MPI_Reduce_scatter(sendbuf, carray, recvcounts, MPIU_SCALAR, MPIU_SUM, comm)); 2590 PetscCall(MatDenseRestoreArrayWrite(c->A, &carray)); 2591 PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY)); 2592 PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY)); 2593 PetscFunctionReturn(0); 2594 } 2595 2596 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C) 2597 { 2598 MPI_Comm comm; 2599 PetscMPIInt size; 2600 PetscInt cm = A->cmap->n, cM, cN = B->cmap->N; 2601 Mat_TransMatMultDense *atb; 2602 PetscBool cisdense; 2603 PetscInt i; 2604 const PetscInt *ranges; 2605 2606 PetscFunctionBegin; 2607 MatCheckProduct(C, 4); 2608 PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty"); 2609 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 2610 if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) { 2611 SETERRQ(comm, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend); 2612 } 2613 2614 /* create matrix product C */ 2615 PetscCall(MatSetSizes(C, cm, B->cmap->n, A->cmap->N, B->cmap->N)); 2616 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSECUDA, "")); 2617 if (!cisdense) PetscCall(MatSetType(C, ((PetscObject)A)->type_name)); 2618 PetscCall(MatSetUp(C)); 2619 2620 /* create data structure for reuse C */ 2621 PetscCallMPI(MPI_Comm_size(comm, &size)); 2622 PetscCall(PetscNew(&atb)); 2623 cM = C->rmap->N; 2624 PetscCall(PetscMalloc2(cM * cN, &atb->sendbuf, size, &atb->recvcounts)); 2625 PetscCall(MatGetOwnershipRanges(C, &ranges)); 2626 for (i = 0; i < size; i++) atb->recvcounts[i] = (ranges[i + 1] - ranges[i]) * cN; 2627 2628 C->product->data = atb; 2629 C->product->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense; 2630 PetscFunctionReturn(0); 2631 } 2632 2633 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C) 2634 { 2635 MPI_Comm comm; 2636 PetscMPIInt i, size; 2637 PetscInt maxRows, bufsiz; 2638 PetscMPIInt tag; 2639 PetscInt alg; 2640 Mat_MatTransMultDense *abt; 2641 Mat_Product *product = C->product; 2642 PetscBool flg; 2643 2644 PetscFunctionBegin; 2645 MatCheckProduct(C, 4); 2646 PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty"); 2647 /* check local size of A and B */ 2648 PetscCheck(A->cmap->n == B->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local column dimensions are incompatible, A (%" PetscInt_FMT ") != B (%" PetscInt_FMT ")", A->cmap->n, B->cmap->n); 2649 2650 PetscCall(PetscStrcmp(product->alg, "allgatherv", &flg)); 2651 alg = flg ? 0 : 1; 2652 2653 /* setup matrix product C */ 2654 PetscCall(MatSetSizes(C, A->rmap->n, B->rmap->n, A->rmap->N, B->rmap->N)); 2655 PetscCall(MatSetType(C, MATMPIDENSE)); 2656 PetscCall(MatSetUp(C)); 2657 PetscCall(PetscObjectGetNewTag((PetscObject)C, &tag)); 2658 2659 /* create data structure for reuse C */ 2660 PetscCall(PetscObjectGetComm((PetscObject)C, &comm)); 2661 PetscCallMPI(MPI_Comm_size(comm, &size)); 2662 PetscCall(PetscNew(&abt)); 2663 abt->tag = tag; 2664 abt->alg = alg; 2665 switch (alg) { 2666 case 1: /* alg: "cyclic" */ 2667 for (maxRows = 0, i = 0; i < size; i++) maxRows = PetscMax(maxRows, (B->rmap->range[i + 1] - B->rmap->range[i])); 2668 bufsiz = A->cmap->N * maxRows; 2669 PetscCall(PetscMalloc2(bufsiz, &(abt->buf[0]), bufsiz, &(abt->buf[1]))); 2670 break; 2671 default: /* alg: "allgatherv" */ 2672 PetscCall(PetscMalloc2(B->rmap->n * B->cmap->N, &(abt->buf[0]), B->rmap->N * B->cmap->N, &(abt->buf[1]))); 2673 PetscCall(PetscMalloc2(size, &(abt->recvcounts), size + 1, &(abt->recvdispls))); 2674 for (i = 0; i <= size; i++) abt->recvdispls[i] = B->rmap->range[i] * A->cmap->N; 2675 for (i = 0; i < size; i++) abt->recvcounts[i] = abt->recvdispls[i + 1] - abt->recvdispls[i]; 2676 break; 2677 } 2678 2679 C->product->data = abt; 2680 C->product->destroy = MatDestroy_MatMatTransMult_MPIDense_MPIDense; 2681 C->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_MPIDense_MPIDense; 2682 PetscFunctionReturn(0); 2683 } 2684 2685 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(Mat A, Mat B, Mat C) 2686 { 2687 Mat_MPIDense *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data; 2688 Mat_MatTransMultDense *abt; 2689 MPI_Comm comm; 2690 PetscMPIInt rank, size, sendsiz, recvsiz, sendto, recvfrom, recvisfrom; 2691 PetscScalar *sendbuf, *recvbuf = NULL, *cv; 2692 PetscInt i, cK = A->cmap->N, k, j, bn; 2693 PetscScalar _DOne = 1.0, _DZero = 0.0; 2694 const PetscScalar *av, *bv; 2695 PetscBLASInt cm, cn, ck, alda, blda = 0, clda; 2696 MPI_Request reqs[2]; 2697 const PetscInt *ranges; 2698 2699 PetscFunctionBegin; 2700 MatCheckProduct(C, 3); 2701 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 2702 abt = (Mat_MatTransMultDense *)C->product->data; 2703 PetscCall(PetscObjectGetComm((PetscObject)C, &comm)); 2704 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 2705 PetscCallMPI(MPI_Comm_size(comm, &size)); 2706 PetscCall(MatDenseGetArrayRead(a->A, &av)); 2707 PetscCall(MatDenseGetArrayRead(b->A, &bv)); 2708 PetscCall(MatDenseGetArrayWrite(c->A, &cv)); 2709 PetscCall(MatDenseGetLDA(a->A, &i)); 2710 PetscCall(PetscBLASIntCast(i, &alda)); 2711 PetscCall(MatDenseGetLDA(b->A, &i)); 2712 PetscCall(PetscBLASIntCast(i, &blda)); 2713 PetscCall(MatDenseGetLDA(c->A, &i)); 2714 PetscCall(PetscBLASIntCast(i, &clda)); 2715 PetscCall(MatGetOwnershipRanges(B, &ranges)); 2716 bn = B->rmap->n; 2717 if (blda == bn) { 2718 sendbuf = (PetscScalar *)bv; 2719 } else { 2720 sendbuf = abt->buf[0]; 2721 for (k = 0, i = 0; i < cK; i++) { 2722 for (j = 0; j < bn; j++, k++) sendbuf[k] = bv[i * blda + j]; 2723 } 2724 } 2725 if (size > 1) { 2726 sendto = (rank + size - 1) % size; 2727 recvfrom = (rank + size + 1) % size; 2728 } else { 2729 sendto = recvfrom = 0; 2730 } 2731 PetscCall(PetscBLASIntCast(cK, &ck)); 2732 PetscCall(PetscBLASIntCast(c->A->rmap->n, &cm)); 2733 recvisfrom = rank; 2734 for (i = 0; i < size; i++) { 2735 /* we have finished receiving in sending, bufs can be read/modified */ 2736 PetscInt nextrecvisfrom = (recvisfrom + 1) % size; /* which process the next recvbuf will originate on */ 2737 PetscInt nextbn = ranges[nextrecvisfrom + 1] - ranges[nextrecvisfrom]; 2738 2739 if (nextrecvisfrom != rank) { 2740 /* start the cyclic sends from sendbuf, to recvbuf (which will switch to sendbuf) */ 2741 sendsiz = cK * bn; 2742 recvsiz = cK * nextbn; 2743 recvbuf = (i & 1) ? abt->buf[0] : abt->buf[1]; 2744 PetscCallMPI(MPI_Isend(sendbuf, sendsiz, MPIU_SCALAR, sendto, abt->tag, comm, &reqs[0])); 2745 PetscCallMPI(MPI_Irecv(recvbuf, recvsiz, MPIU_SCALAR, recvfrom, abt->tag, comm, &reqs[1])); 2746 } 2747 2748 /* local aseq * sendbuf^T */ 2749 PetscCall(PetscBLASIntCast(ranges[recvisfrom + 1] - ranges[recvisfrom], &cn)); 2750 if (cm && cn && ck) PetscCallBLAS("BLASgemm", BLASgemm_("N", "T", &cm, &cn, &ck, &_DOne, av, &alda, sendbuf, &cn, &_DZero, cv + clda * ranges[recvisfrom], &clda)); 2751 2752 if (nextrecvisfrom != rank) { 2753 /* wait for the sends and receives to complete, swap sendbuf and recvbuf */ 2754 PetscCallMPI(MPI_Waitall(2, reqs, MPI_STATUSES_IGNORE)); 2755 } 2756 bn = nextbn; 2757 recvisfrom = nextrecvisfrom; 2758 sendbuf = recvbuf; 2759 } 2760 PetscCall(MatDenseRestoreArrayRead(a->A, &av)); 2761 PetscCall(MatDenseRestoreArrayRead(b->A, &bv)); 2762 PetscCall(MatDenseRestoreArrayWrite(c->A, &cv)); 2763 PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY)); 2764 PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY)); 2765 PetscFunctionReturn(0); 2766 } 2767 2768 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(Mat A, Mat B, Mat C) 2769 { 2770 Mat_MPIDense *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data; 2771 Mat_MatTransMultDense *abt; 2772 MPI_Comm comm; 2773 PetscMPIInt size; 2774 PetscScalar *cv, *sendbuf, *recvbuf; 2775 const PetscScalar *av, *bv; 2776 PetscInt blda, i, cK = A->cmap->N, k, j, bn; 2777 PetscScalar _DOne = 1.0, _DZero = 0.0; 2778 PetscBLASInt cm, cn, ck, alda, clda; 2779 2780 PetscFunctionBegin; 2781 MatCheckProduct(C, 3); 2782 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 2783 abt = (Mat_MatTransMultDense *)C->product->data; 2784 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 2785 PetscCallMPI(MPI_Comm_size(comm, &size)); 2786 PetscCall(MatDenseGetArrayRead(a->A, &av)); 2787 PetscCall(MatDenseGetArrayRead(b->A, &bv)); 2788 PetscCall(MatDenseGetArrayWrite(c->A, &cv)); 2789 PetscCall(MatDenseGetLDA(a->A, &i)); 2790 PetscCall(PetscBLASIntCast(i, &alda)); 2791 PetscCall(MatDenseGetLDA(b->A, &blda)); 2792 PetscCall(MatDenseGetLDA(c->A, &i)); 2793 PetscCall(PetscBLASIntCast(i, &clda)); 2794 /* copy transpose of B into buf[0] */ 2795 bn = B->rmap->n; 2796 sendbuf = abt->buf[0]; 2797 recvbuf = abt->buf[1]; 2798 for (k = 0, j = 0; j < bn; j++) { 2799 for (i = 0; i < cK; i++, k++) sendbuf[k] = bv[i * blda + j]; 2800 } 2801 PetscCall(MatDenseRestoreArrayRead(b->A, &bv)); 2802 PetscCallMPI(MPI_Allgatherv(sendbuf, bn * cK, MPIU_SCALAR, recvbuf, abt->recvcounts, abt->recvdispls, MPIU_SCALAR, comm)); 2803 PetscCall(PetscBLASIntCast(cK, &ck)); 2804 PetscCall(PetscBLASIntCast(c->A->rmap->n, &cm)); 2805 PetscCall(PetscBLASIntCast(c->A->cmap->n, &cn)); 2806 if (cm && cn && ck) PetscCallBLAS("BLASgemm", BLASgemm_("N", "N", &cm, &cn, &ck, &_DOne, av, &alda, recvbuf, &ck, &_DZero, cv, &clda)); 2807 PetscCall(MatDenseRestoreArrayRead(a->A, &av)); 2808 PetscCall(MatDenseRestoreArrayRead(b->A, &bv)); 2809 PetscCall(MatDenseRestoreArrayWrite(c->A, &cv)); 2810 PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY)); 2811 PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY)); 2812 PetscFunctionReturn(0); 2813 } 2814 2815 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C) 2816 { 2817 Mat_MatTransMultDense *abt; 2818 2819 PetscFunctionBegin; 2820 MatCheckProduct(C, 3); 2821 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 2822 abt = (Mat_MatTransMultDense *)C->product->data; 2823 switch (abt->alg) { 2824 case 1: 2825 PetscCall(MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(A, B, C)); 2826 break; 2827 default: 2828 PetscCall(MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(A, B, C)); 2829 break; 2830 } 2831 PetscFunctionReturn(0); 2832 } 2833 2834 PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(void *data) 2835 { 2836 Mat_MatMultDense *ab = (Mat_MatMultDense *)data; 2837 2838 PetscFunctionBegin; 2839 PetscCall(MatDestroy(&ab->Ce)); 2840 PetscCall(MatDestroy(&ab->Ae)); 2841 PetscCall(MatDestroy(&ab->Be)); 2842 PetscCall(PetscFree(ab)); 2843 PetscFunctionReturn(0); 2844 } 2845 2846 #if defined(PETSC_HAVE_ELEMENTAL) 2847 PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C) 2848 { 2849 Mat_MatMultDense *ab; 2850 2851 PetscFunctionBegin; 2852 MatCheckProduct(C, 3); 2853 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Missing product data"); 2854 ab = (Mat_MatMultDense *)C->product->data; 2855 PetscCall(MatConvert_MPIDense_Elemental(A, MATELEMENTAL, MAT_REUSE_MATRIX, &ab->Ae)); 2856 PetscCall(MatConvert_MPIDense_Elemental(B, MATELEMENTAL, MAT_REUSE_MATRIX, &ab->Be)); 2857 PetscCall(MatMatMultNumeric_Elemental(ab->Ae, ab->Be, ab->Ce)); 2858 PetscCall(MatConvert(ab->Ce, MATMPIDENSE, MAT_REUSE_MATRIX, &C)); 2859 PetscFunctionReturn(0); 2860 } 2861 2862 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C) 2863 { 2864 Mat Ae, Be, Ce; 2865 Mat_MatMultDense *ab; 2866 2867 PetscFunctionBegin; 2868 MatCheckProduct(C, 4); 2869 PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty"); 2870 /* check local size of A and B */ 2871 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 2872 SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend); 2873 } 2874 2875 /* create elemental matrices Ae and Be */ 2876 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &Ae)); 2877 PetscCall(MatSetSizes(Ae, PETSC_DECIDE, PETSC_DECIDE, A->rmap->N, A->cmap->N)); 2878 PetscCall(MatSetType(Ae, MATELEMENTAL)); 2879 PetscCall(MatSetUp(Ae)); 2880 PetscCall(MatSetOption(Ae, MAT_ROW_ORIENTED, PETSC_FALSE)); 2881 2882 PetscCall(MatCreate(PetscObjectComm((PetscObject)B), &Be)); 2883 PetscCall(MatSetSizes(Be, PETSC_DECIDE, PETSC_DECIDE, B->rmap->N, B->cmap->N)); 2884 PetscCall(MatSetType(Be, MATELEMENTAL)); 2885 PetscCall(MatSetUp(Be)); 2886 PetscCall(MatSetOption(Be, MAT_ROW_ORIENTED, PETSC_FALSE)); 2887 2888 /* compute symbolic Ce = Ae*Be */ 2889 PetscCall(MatCreate(PetscObjectComm((PetscObject)C), &Ce)); 2890 PetscCall(MatMatMultSymbolic_Elemental(Ae, Be, fill, Ce)); 2891 2892 /* setup C */ 2893 PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, PETSC_DECIDE, PETSC_DECIDE)); 2894 PetscCall(MatSetType(C, MATDENSE)); 2895 PetscCall(MatSetUp(C)); 2896 2897 /* create data structure for reuse Cdense */ 2898 PetscCall(PetscNew(&ab)); 2899 ab->Ae = Ae; 2900 ab->Be = Be; 2901 ab->Ce = Ce; 2902 2903 C->product->data = ab; 2904 C->product->destroy = MatDestroy_MatMatMult_MPIDense_MPIDense; 2905 C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense; 2906 PetscFunctionReturn(0); 2907 } 2908 #endif 2909 /* ----------------------------------------------- */ 2910 #if defined(PETSC_HAVE_ELEMENTAL) 2911 static PetscErrorCode MatProductSetFromOptions_MPIDense_AB(Mat C) 2912 { 2913 PetscFunctionBegin; 2914 C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIDense; 2915 C->ops->productsymbolic = MatProductSymbolic_AB; 2916 PetscFunctionReturn(0); 2917 } 2918 #endif 2919 2920 static PetscErrorCode MatProductSetFromOptions_MPIDense_AtB(Mat C) 2921 { 2922 Mat_Product *product = C->product; 2923 Mat A = product->A, B = product->B; 2924 2925 PetscFunctionBegin; 2926 if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) 2927 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend); 2928 C->ops->transposematmultsymbolic = MatTransposeMatMultSymbolic_MPIDense_MPIDense; 2929 C->ops->productsymbolic = MatProductSymbolic_AtB; 2930 PetscFunctionReturn(0); 2931 } 2932 2933 static PetscErrorCode MatProductSetFromOptions_MPIDense_ABt(Mat C) 2934 { 2935 Mat_Product *product = C->product; 2936 const char *algTypes[2] = {"allgatherv", "cyclic"}; 2937 PetscInt alg, nalg = 2; 2938 PetscBool flg = PETSC_FALSE; 2939 2940 PetscFunctionBegin; 2941 /* Set default algorithm */ 2942 alg = 0; /* default is allgatherv */ 2943 PetscCall(PetscStrcmp(product->alg, "default", &flg)); 2944 if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg])); 2945 2946 /* Get runtime option */ 2947 if (product->api_user) { 2948 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatMatTransposeMult", "Mat"); 2949 PetscCall(PetscOptionsEList("-matmattransmult_mpidense_mpidense_via", "Algorithmic approach", "MatMatTransposeMult", algTypes, nalg, algTypes[alg], &alg, &flg)); 2950 PetscOptionsEnd(); 2951 } else { 2952 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_ABt", "Mat"); 2953 PetscCall(PetscOptionsEList("-mat_product_algorithm", "Algorithmic approach", "MatProduct_ABt", algTypes, nalg, algTypes[alg], &alg, &flg)); 2954 PetscOptionsEnd(); 2955 } 2956 if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg])); 2957 2958 C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_MPIDense_MPIDense; 2959 C->ops->productsymbolic = MatProductSymbolic_ABt; 2960 PetscFunctionReturn(0); 2961 } 2962 2963 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense(Mat C) 2964 { 2965 Mat_Product *product = C->product; 2966 2967 PetscFunctionBegin; 2968 switch (product->type) { 2969 #if defined(PETSC_HAVE_ELEMENTAL) 2970 case MATPRODUCT_AB: 2971 PetscCall(MatProductSetFromOptions_MPIDense_AB(C)); 2972 break; 2973 #endif 2974 case MATPRODUCT_AtB: 2975 PetscCall(MatProductSetFromOptions_MPIDense_AtB(C)); 2976 break; 2977 case MATPRODUCT_ABt: 2978 PetscCall(MatProductSetFromOptions_MPIDense_ABt(C)); 2979 break; 2980 default: 2981 break; 2982 } 2983 PetscFunctionReturn(0); 2984 } 2985