1 2 /* 3 Basic functions for basic parallel dense matrices. 4 */ 5 6 #include <../src/mat/impls/dense/mpi/mpidense.h> /*I "petscmat.h" I*/ 7 #include <../src/mat/impls/aij/mpi/mpiaij.h> 8 #include <petscblaslapack.h> 9 10 /*@ 11 12 MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential 13 matrix that represents the operator. For sequential matrices it returns itself. 14 15 Input Parameter: 16 . A - the Seq or MPI dense matrix 17 18 Output Parameter: 19 . B - the inner matrix 20 21 Level: intermediate 22 23 @*/ 24 PetscErrorCode MatDenseGetLocalMatrix(Mat A, Mat *B) { 25 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 26 PetscBool flg; 27 28 PetscFunctionBegin; 29 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 30 PetscValidPointer(B, 2); 31 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIDENSE, &flg)); 32 if (flg) *B = mat->A; 33 else { 34 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQDENSE, &flg)); 35 PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Not for matrix type %s", ((PetscObject)A)->type_name); 36 *B = A; 37 } 38 PetscFunctionReturn(0); 39 } 40 41 PetscErrorCode MatCopy_MPIDense(Mat A, Mat B, MatStructure s) { 42 Mat_MPIDense *Amat = (Mat_MPIDense *)A->data; 43 Mat_MPIDense *Bmat = (Mat_MPIDense *)B->data; 44 45 PetscFunctionBegin; 46 PetscCall(MatCopy(Amat->A, Bmat->A, s)); 47 PetscFunctionReturn(0); 48 } 49 50 PetscErrorCode MatShift_MPIDense(Mat A, PetscScalar alpha) { 51 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 52 PetscInt j, lda, rstart = A->rmap->rstart, rend = A->rmap->rend, rend2; 53 PetscScalar *v; 54 55 PetscFunctionBegin; 56 PetscCall(MatDenseGetArray(mat->A, &v)); 57 PetscCall(MatDenseGetLDA(mat->A, &lda)); 58 rend2 = PetscMin(rend, A->cmap->N); 59 if (rend2 > rstart) { 60 for (j = rstart; j < rend2; j++) v[j - rstart + j * lda] += alpha; 61 PetscCall(PetscLogFlops(rend2 - rstart)); 62 } 63 PetscCall(MatDenseRestoreArray(mat->A, &v)); 64 PetscFunctionReturn(0); 65 } 66 67 PetscErrorCode MatGetRow_MPIDense(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) { 68 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 69 PetscInt lrow, rstart = A->rmap->rstart, rend = A->rmap->rend; 70 71 PetscFunctionBegin; 72 PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "only local rows"); 73 lrow = row - rstart; 74 PetscCall(MatGetRow(mat->A, lrow, nz, (const PetscInt **)idx, (const PetscScalar **)v)); 75 PetscFunctionReturn(0); 76 } 77 78 PetscErrorCode MatRestoreRow_MPIDense(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) { 79 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 80 PetscInt lrow, rstart = A->rmap->rstart, rend = A->rmap->rend; 81 82 PetscFunctionBegin; 83 PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "only local rows"); 84 lrow = row - rstart; 85 PetscCall(MatRestoreRow(mat->A, lrow, nz, (const PetscInt **)idx, (const PetscScalar **)v)); 86 PetscFunctionReturn(0); 87 } 88 89 PetscErrorCode MatGetDiagonalBlock_MPIDense(Mat A, Mat *a) { 90 Mat_MPIDense *mdn = (Mat_MPIDense *)A->data; 91 PetscInt m = A->rmap->n, rstart = A->rmap->rstart; 92 PetscScalar *array; 93 MPI_Comm comm; 94 PetscBool flg; 95 Mat B; 96 97 PetscFunctionBegin; 98 PetscCall(MatHasCongruentLayouts(A, &flg)); 99 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only square matrices supported."); 100 PetscCall(PetscObjectQuery((PetscObject)A, "DiagonalBlock", (PetscObject *)&B)); 101 if (!B) { /* This should use MatDenseGetSubMatrix (not create), but we would need a call like MatRestoreDiagonalBlock */ 102 103 PetscCall(PetscObjectTypeCompare((PetscObject)mdn->A, MATSEQDENSECUDA, &flg)); 104 PetscCheck(!flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature", MATSEQDENSECUDA); 105 PetscCall(PetscObjectGetComm((PetscObject)(mdn->A), &comm)); 106 PetscCall(MatCreate(comm, &B)); 107 PetscCall(MatSetSizes(B, m, m, m, m)); 108 PetscCall(MatSetType(B, ((PetscObject)mdn->A)->type_name)); 109 PetscCall(MatDenseGetArrayRead(mdn->A, (const PetscScalar **)&array)); 110 PetscCall(MatSeqDenseSetPreallocation(B, array + m * rstart)); 111 PetscCall(MatDenseRestoreArrayRead(mdn->A, (const PetscScalar **)&array)); 112 PetscCall(PetscObjectCompose((PetscObject)A, "DiagonalBlock", (PetscObject)B)); 113 *a = B; 114 PetscCall(MatDestroy(&B)); 115 } else *a = B; 116 PetscFunctionReturn(0); 117 } 118 119 PetscErrorCode MatSetValues_MPIDense(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], const PetscScalar v[], InsertMode addv) { 120 Mat_MPIDense *A = (Mat_MPIDense *)mat->data; 121 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend, row; 122 PetscBool roworiented = A->roworiented; 123 124 PetscFunctionBegin; 125 for (i = 0; i < m; i++) { 126 if (idxm[i] < 0) continue; 127 PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large"); 128 if (idxm[i] >= rstart && idxm[i] < rend) { 129 row = idxm[i] - rstart; 130 if (roworiented) { 131 PetscCall(MatSetValues(A->A, 1, &row, n, idxn, v + i * n, addv)); 132 } else { 133 for (j = 0; j < n; j++) { 134 if (idxn[j] < 0) continue; 135 PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large"); 136 PetscCall(MatSetValues(A->A, 1, &row, 1, &idxn[j], v + i + j * m, addv)); 137 } 138 } 139 } else if (!A->donotstash) { 140 mat->assembled = PETSC_FALSE; 141 if (roworiented) { 142 PetscCall(MatStashValuesRow_Private(&mat->stash, idxm[i], n, idxn, v + i * n, PETSC_FALSE)); 143 } else { 144 PetscCall(MatStashValuesCol_Private(&mat->stash, idxm[i], n, idxn, v + i, m, PETSC_FALSE)); 145 } 146 } 147 } 148 PetscFunctionReturn(0); 149 } 150 151 PetscErrorCode MatGetValues_MPIDense(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[]) { 152 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 153 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend, row; 154 155 PetscFunctionBegin; 156 for (i = 0; i < m; i++) { 157 if (idxm[i] < 0) continue; /* negative row */ 158 PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large"); 159 if (idxm[i] >= rstart && idxm[i] < rend) { 160 row = idxm[i] - rstart; 161 for (j = 0; j < n; j++) { 162 if (idxn[j] < 0) continue; /* negative column */ 163 PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large"); 164 PetscCall(MatGetValues(mdn->A, 1, &row, 1, &idxn[j], v + i * n + j)); 165 } 166 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported"); 167 } 168 PetscFunctionReturn(0); 169 } 170 171 static PetscErrorCode MatDenseGetLDA_MPIDense(Mat A, PetscInt *lda) { 172 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 173 174 PetscFunctionBegin; 175 PetscCall(MatDenseGetLDA(a->A, lda)); 176 PetscFunctionReturn(0); 177 } 178 179 static PetscErrorCode MatDenseSetLDA_MPIDense(Mat A, PetscInt lda) { 180 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 181 PetscBool iscuda; 182 183 PetscFunctionBegin; 184 if (!a->A) { 185 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 186 PetscCall(PetscLayoutSetUp(A->rmap)); 187 PetscCall(PetscLayoutSetUp(A->cmap)); 188 PetscCall(MatCreate(PETSC_COMM_SELF, &a->A)); 189 PetscCall(PetscLogObjectParent((PetscObject)A, (PetscObject)a->A)); 190 PetscCall(MatSetSizes(a->A, A->rmap->n, A->cmap->N, A->rmap->n, A->cmap->N)); 191 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIDENSECUDA, &iscuda)); 192 PetscCall(MatSetType(a->A, iscuda ? MATSEQDENSECUDA : MATSEQDENSE)); 193 } 194 PetscCall(MatDenseSetLDA(a->A, lda)); 195 PetscFunctionReturn(0); 196 } 197 198 static PetscErrorCode MatDenseGetArray_MPIDense(Mat A, PetscScalar **array) { 199 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 200 201 PetscFunctionBegin; 202 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 203 PetscCall(MatDenseGetArray(a->A, array)); 204 PetscFunctionReturn(0); 205 } 206 207 static PetscErrorCode MatDenseGetArrayRead_MPIDense(Mat A, const PetscScalar **array) { 208 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 209 210 PetscFunctionBegin; 211 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 212 PetscCall(MatDenseGetArrayRead(a->A, array)); 213 PetscFunctionReturn(0); 214 } 215 216 static PetscErrorCode MatDenseGetArrayWrite_MPIDense(Mat A, PetscScalar **array) { 217 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 218 219 PetscFunctionBegin; 220 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 221 PetscCall(MatDenseGetArrayWrite(a->A, array)); 222 PetscFunctionReturn(0); 223 } 224 225 static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A, const PetscScalar *array) { 226 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 227 228 PetscFunctionBegin; 229 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 230 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 231 PetscCall(MatDensePlaceArray(a->A, array)); 232 PetscFunctionReturn(0); 233 } 234 235 static PetscErrorCode MatDenseResetArray_MPIDense(Mat A) { 236 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 237 238 PetscFunctionBegin; 239 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 240 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 241 PetscCall(MatDenseResetArray(a->A)); 242 PetscFunctionReturn(0); 243 } 244 245 static PetscErrorCode MatDenseReplaceArray_MPIDense(Mat A, const PetscScalar *array) { 246 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 247 248 PetscFunctionBegin; 249 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 250 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 251 PetscCall(MatDenseReplaceArray(a->A, array)); 252 PetscFunctionReturn(0); 253 } 254 255 static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A, IS isrow, IS iscol, MatReuse scall, Mat *B) { 256 Mat_MPIDense *mat = (Mat_MPIDense *)A->data, *newmatd; 257 PetscInt lda, i, j, rstart, rend, nrows, ncols, Ncols, nlrows, nlcols; 258 const PetscInt *irow, *icol; 259 const PetscScalar *v; 260 PetscScalar *bv; 261 Mat newmat; 262 IS iscol_local; 263 MPI_Comm comm_is, comm_mat; 264 265 PetscFunctionBegin; 266 PetscCall(PetscObjectGetComm((PetscObject)A, &comm_mat)); 267 PetscCall(PetscObjectGetComm((PetscObject)iscol, &comm_is)); 268 PetscCheck(comm_mat == comm_is, PETSC_COMM_SELF, PETSC_ERR_ARG_NOTSAMECOMM, "IS communicator must match matrix communicator"); 269 270 PetscCall(ISAllGather(iscol, &iscol_local)); 271 PetscCall(ISGetIndices(isrow, &irow)); 272 PetscCall(ISGetIndices(iscol_local, &icol)); 273 PetscCall(ISGetLocalSize(isrow, &nrows)); 274 PetscCall(ISGetLocalSize(iscol, &ncols)); 275 PetscCall(ISGetSize(iscol, &Ncols)); /* global number of columns, size of iscol_local */ 276 277 /* No parallel redistribution currently supported! Should really check each index set 278 to comfirm that it is OK. ... Currently supports only submatrix same partitioning as 279 original matrix! */ 280 281 PetscCall(MatGetLocalSize(A, &nlrows, &nlcols)); 282 PetscCall(MatGetOwnershipRange(A, &rstart, &rend)); 283 284 /* Check submatrix call */ 285 if (scall == MAT_REUSE_MATRIX) { 286 /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */ 287 /* Really need to test rows and column sizes! */ 288 newmat = *B; 289 } else { 290 /* Create and fill new matrix */ 291 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &newmat)); 292 PetscCall(MatSetSizes(newmat, nrows, ncols, PETSC_DECIDE, Ncols)); 293 PetscCall(MatSetType(newmat, ((PetscObject)A)->type_name)); 294 PetscCall(MatMPIDenseSetPreallocation(newmat, NULL)); 295 } 296 297 /* Now extract the data pointers and do the copy, column at a time */ 298 newmatd = (Mat_MPIDense *)newmat->data; 299 PetscCall(MatDenseGetArray(newmatd->A, &bv)); 300 PetscCall(MatDenseGetArrayRead(mat->A, &v)); 301 PetscCall(MatDenseGetLDA(mat->A, &lda)); 302 for (i = 0; i < Ncols; i++) { 303 const PetscScalar *av = v + lda * icol[i]; 304 for (j = 0; j < nrows; j++) *bv++ = av[irow[j] - rstart]; 305 } 306 PetscCall(MatDenseRestoreArrayRead(mat->A, &v)); 307 PetscCall(MatDenseRestoreArray(newmatd->A, &bv)); 308 309 /* Assemble the matrices so that the correct flags are set */ 310 PetscCall(MatAssemblyBegin(newmat, MAT_FINAL_ASSEMBLY)); 311 PetscCall(MatAssemblyEnd(newmat, MAT_FINAL_ASSEMBLY)); 312 313 /* Free work space */ 314 PetscCall(ISRestoreIndices(isrow, &irow)); 315 PetscCall(ISRestoreIndices(iscol_local, &icol)); 316 PetscCall(ISDestroy(&iscol_local)); 317 *B = newmat; 318 PetscFunctionReturn(0); 319 } 320 321 PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A, PetscScalar **array) { 322 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 323 324 PetscFunctionBegin; 325 PetscCall(MatDenseRestoreArray(a->A, array)); 326 PetscFunctionReturn(0); 327 } 328 329 PetscErrorCode MatDenseRestoreArrayRead_MPIDense(Mat A, const PetscScalar **array) { 330 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 331 332 PetscFunctionBegin; 333 PetscCall(MatDenseRestoreArrayRead(a->A, array)); 334 PetscFunctionReturn(0); 335 } 336 337 PetscErrorCode MatDenseRestoreArrayWrite_MPIDense(Mat A, PetscScalar **array) { 338 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 339 340 PetscFunctionBegin; 341 PetscCall(MatDenseRestoreArrayWrite(a->A, array)); 342 PetscFunctionReturn(0); 343 } 344 345 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat, MatAssemblyType mode) { 346 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 347 PetscInt nstash, reallocs; 348 349 PetscFunctionBegin; 350 if (mdn->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 351 352 PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range)); 353 PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs)); 354 PetscCall(PetscInfo(mdn->A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 355 PetscFunctionReturn(0); 356 } 357 358 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat, MatAssemblyType mode) { 359 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 360 PetscInt i, *row, *col, flg, j, rstart, ncols; 361 PetscMPIInt n; 362 PetscScalar *val; 363 364 PetscFunctionBegin; 365 if (!mdn->donotstash && !mat->nooffprocentries) { 366 /* wait on receives */ 367 while (1) { 368 PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg)); 369 if (!flg) break; 370 371 for (i = 0; i < n;) { 372 /* Now identify the consecutive vals belonging to the same row */ 373 for (j = i, rstart = row[j]; j < n; j++) { 374 if (row[j] != rstart) break; 375 } 376 if (j < n) ncols = j - i; 377 else ncols = n - i; 378 /* Now assemble all these values with a single function call */ 379 PetscCall(MatSetValues_MPIDense(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode)); 380 i = j; 381 } 382 } 383 PetscCall(MatStashScatterEnd_Private(&mat->stash)); 384 } 385 386 PetscCall(MatAssemblyBegin(mdn->A, mode)); 387 PetscCall(MatAssemblyEnd(mdn->A, mode)); 388 PetscFunctionReturn(0); 389 } 390 391 PetscErrorCode MatZeroEntries_MPIDense(Mat A) { 392 Mat_MPIDense *l = (Mat_MPIDense *)A->data; 393 394 PetscFunctionBegin; 395 PetscCall(MatZeroEntries(l->A)); 396 PetscFunctionReturn(0); 397 } 398 399 PetscErrorCode MatZeroRows_MPIDense(Mat A, PetscInt n, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) { 400 Mat_MPIDense *l = (Mat_MPIDense *)A->data; 401 PetscInt i, len, *lrows; 402 403 PetscFunctionBegin; 404 /* get locally owned rows */ 405 PetscCall(PetscLayoutMapLocal(A->rmap, n, rows, &len, &lrows, NULL)); 406 /* fix right hand side if needed */ 407 if (x && b) { 408 const PetscScalar *xx; 409 PetscScalar *bb; 410 411 PetscCall(VecGetArrayRead(x, &xx)); 412 PetscCall(VecGetArrayWrite(b, &bb)); 413 for (i = 0; i < len; ++i) bb[lrows[i]] = diag * xx[lrows[i]]; 414 PetscCall(VecRestoreArrayRead(x, &xx)); 415 PetscCall(VecRestoreArrayWrite(b, &bb)); 416 } 417 PetscCall(MatZeroRows(l->A, len, lrows, 0.0, NULL, NULL)); 418 if (diag != 0.0) { 419 Vec d; 420 421 PetscCall(MatCreateVecs(A, NULL, &d)); 422 PetscCall(VecSet(d, diag)); 423 PetscCall(MatDiagonalSet(A, d, INSERT_VALUES)); 424 PetscCall(VecDestroy(&d)); 425 } 426 PetscCall(PetscFree(lrows)); 427 PetscFunctionReturn(0); 428 } 429 430 PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat, Vec, Vec); 431 PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat, Vec, Vec, Vec); 432 PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat, Vec, Vec); 433 PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat, Vec, Vec, Vec); 434 435 PetscErrorCode MatMult_MPIDense(Mat mat, Vec xx, Vec yy) { 436 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 437 const PetscScalar *ax; 438 PetscScalar *ay; 439 PetscMemType axmtype, aymtype; 440 441 PetscFunctionBegin; 442 if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat)); 443 PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype)); 444 PetscCall(VecGetArrayAndMemType(mdn->lvec, &ay, &aymtype)); 445 PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE)); 446 PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE)); 447 PetscCall(VecRestoreArrayAndMemType(mdn->lvec, &ay)); 448 PetscCall(VecRestoreArrayReadAndMemType(xx, &ax)); 449 PetscCall((*mdn->A->ops->mult)(mdn->A, mdn->lvec, yy)); 450 PetscFunctionReturn(0); 451 } 452 453 PetscErrorCode MatMultAdd_MPIDense(Mat mat, Vec xx, Vec yy, Vec zz) { 454 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 455 const PetscScalar *ax; 456 PetscScalar *ay; 457 PetscMemType axmtype, aymtype; 458 459 PetscFunctionBegin; 460 if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat)); 461 PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype)); 462 PetscCall(VecGetArrayAndMemType(mdn->lvec, &ay, &aymtype)); 463 PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE)); 464 PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE)); 465 PetscCall(VecRestoreArrayAndMemType(mdn->lvec, &ay)); 466 PetscCall(VecRestoreArrayReadAndMemType(xx, &ax)); 467 PetscCall((*mdn->A->ops->multadd)(mdn->A, mdn->lvec, yy, zz)); 468 PetscFunctionReturn(0); 469 } 470 471 PetscErrorCode MatMultTranspose_MPIDense(Mat A, Vec xx, Vec yy) { 472 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 473 const PetscScalar *ax; 474 PetscScalar *ay; 475 PetscMemType axmtype, aymtype; 476 477 PetscFunctionBegin; 478 if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A)); 479 PetscCall(VecSet(yy, 0.0)); 480 PetscCall((*a->A->ops->multtranspose)(a->A, xx, a->lvec)); 481 PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype)); 482 PetscCall(VecGetArrayAndMemType(yy, &ay, &aymtype)); 483 PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM)); 484 PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM)); 485 PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax)); 486 PetscCall(VecRestoreArrayAndMemType(yy, &ay)); 487 PetscFunctionReturn(0); 488 } 489 490 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A, Vec xx, Vec yy, Vec zz) { 491 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 492 const PetscScalar *ax; 493 PetscScalar *ay; 494 PetscMemType axmtype, aymtype; 495 496 PetscFunctionBegin; 497 if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A)); 498 PetscCall(VecCopy(yy, zz)); 499 PetscCall((*a->A->ops->multtranspose)(a->A, xx, a->lvec)); 500 PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype)); 501 PetscCall(VecGetArrayAndMemType(zz, &ay, &aymtype)); 502 PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM)); 503 PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM)); 504 PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax)); 505 PetscCall(VecRestoreArrayAndMemType(zz, &ay)); 506 PetscFunctionReturn(0); 507 } 508 509 PetscErrorCode MatGetDiagonal_MPIDense(Mat A, Vec v) { 510 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 511 PetscInt lda, len, i, n, m = A->rmap->n, radd; 512 PetscScalar *x, zero = 0.0; 513 const PetscScalar *av; 514 515 PetscFunctionBegin; 516 PetscCall(VecSet(v, zero)); 517 PetscCall(VecGetArray(v, &x)); 518 PetscCall(VecGetSize(v, &n)); 519 PetscCheck(n == A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming mat and vec"); 520 len = PetscMin(a->A->rmap->n, a->A->cmap->n); 521 radd = A->rmap->rstart * m; 522 PetscCall(MatDenseGetArrayRead(a->A, &av)); 523 PetscCall(MatDenseGetLDA(a->A, &lda)); 524 for (i = 0; i < len; i++) x[i] = av[radd + i * lda + i]; 525 PetscCall(MatDenseRestoreArrayRead(a->A, &av)); 526 PetscCall(VecRestoreArray(v, &x)); 527 PetscFunctionReturn(0); 528 } 529 530 PetscErrorCode MatDestroy_MPIDense(Mat mat) { 531 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 532 533 PetscFunctionBegin; 534 #if defined(PETSC_USE_LOG) 535 PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N); 536 #endif 537 PetscCall(MatStashDestroy_Private(&mat->stash)); 538 PetscCheck(!mdn->vecinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 539 PetscCheck(!mdn->matinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 540 PetscCall(MatDestroy(&mdn->A)); 541 PetscCall(VecDestroy(&mdn->lvec)); 542 PetscCall(PetscSFDestroy(&mdn->Mvctx)); 543 PetscCall(VecDestroy(&mdn->cvec)); 544 PetscCall(MatDestroy(&mdn->cmat)); 545 546 PetscCall(PetscFree(mat->data)); 547 PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL)); 548 549 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetLDA_C", NULL)); 550 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseSetLDA_C", NULL)); 551 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArray_C", NULL)); 552 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArray_C", NULL)); 553 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayRead_C", NULL)); 554 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayRead_C", NULL)); 555 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayWrite_C", NULL)); 556 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayWrite_C", NULL)); 557 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDensePlaceArray_C", NULL)); 558 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseResetArray_C", NULL)); 559 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseReplaceArray_C", NULL)); 560 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", NULL)); 561 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpiaij_C", NULL)); 562 #if defined(PETSC_HAVE_ELEMENTAL) 563 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_elemental_C", NULL)); 564 #endif 565 #if defined(PETSC_HAVE_SCALAPACK) 566 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_scalapack_C", NULL)); 567 #endif 568 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIDenseSetPreallocation_C", NULL)); 569 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidense_C", NULL)); 570 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaij_C", NULL)); 571 #if defined(PETSC_HAVE_CUDA) 572 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidense_C", NULL)); 573 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijcusparse_C", NULL)); 574 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensecuda_C", NULL)); 575 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidensecuda_mpidense_C", NULL)); 576 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidensecuda_C", NULL)); 577 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C", NULL)); 578 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensecuda_mpiaij_C", NULL)); 579 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C", NULL)); 580 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArray_C", NULL)); 581 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArrayRead_C", NULL)); 582 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArrayWrite_C", NULL)); 583 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArray_C", NULL)); 584 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArrayRead_C", NULL)); 585 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArrayWrite_C", NULL)); 586 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAPlaceArray_C", NULL)); 587 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAResetArray_C", NULL)); 588 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAReplaceArray_C", NULL)); 589 #endif 590 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumn_C", NULL)); 591 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumn_C", NULL)); 592 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", NULL)); 593 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", NULL)); 594 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", NULL)); 595 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", NULL)); 596 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", NULL)); 597 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", NULL)); 598 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetSubMatrix_C", NULL)); 599 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreSubMatrix_C", NULL)); 600 601 PetscCall(PetscObjectCompose((PetscObject)mat, "DiagonalBlock", NULL)); 602 PetscFunctionReturn(0); 603 } 604 605 PETSC_INTERN PetscErrorCode MatView_SeqDense(Mat, PetscViewer); 606 607 #include <petscdraw.h> 608 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat, PetscViewer viewer) { 609 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 610 PetscMPIInt rank; 611 PetscViewerType vtype; 612 PetscBool iascii, isdraw; 613 PetscViewer sviewer; 614 PetscViewerFormat format; 615 616 PetscFunctionBegin; 617 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 618 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 619 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 620 if (iascii) { 621 PetscCall(PetscViewerGetType(viewer, &vtype)); 622 PetscCall(PetscViewerGetFormat(viewer, &format)); 623 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 624 MatInfo info; 625 PetscCall(MatGetInfo(mat, MAT_LOCAL, &info)); 626 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 627 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %" PetscInt_FMT " \n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, 628 (PetscInt)info.memory)); 629 PetscCall(PetscViewerFlush(viewer)); 630 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 631 if (mdn->Mvctx) PetscCall(PetscSFView(mdn->Mvctx, viewer)); 632 PetscFunctionReturn(0); 633 } else if (format == PETSC_VIEWER_ASCII_INFO) { 634 PetscFunctionReturn(0); 635 } 636 } else if (isdraw) { 637 PetscDraw draw; 638 PetscBool isnull; 639 640 PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw)); 641 PetscCall(PetscDrawIsNull(draw, &isnull)); 642 if (isnull) PetscFunctionReturn(0); 643 } 644 645 { 646 /* assemble the entire matrix onto first processor. */ 647 Mat A; 648 PetscInt M = mat->rmap->N, N = mat->cmap->N, m, row, i, nz; 649 PetscInt *cols; 650 PetscScalar *vals; 651 652 PetscCall(MatCreate(PetscObjectComm((PetscObject)mat), &A)); 653 if (rank == 0) { 654 PetscCall(MatSetSizes(A, M, N, M, N)); 655 } else { 656 PetscCall(MatSetSizes(A, 0, 0, M, N)); 657 } 658 /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */ 659 PetscCall(MatSetType(A, MATMPIDENSE)); 660 PetscCall(MatMPIDenseSetPreallocation(A, NULL)); 661 PetscCall(PetscLogObjectParent((PetscObject)mat, (PetscObject)A)); 662 663 /* Copy the matrix ... This isn't the most efficient means, 664 but it's quick for now */ 665 A->insertmode = INSERT_VALUES; 666 667 row = mat->rmap->rstart; 668 m = mdn->A->rmap->n; 669 for (i = 0; i < m; i++) { 670 PetscCall(MatGetRow_MPIDense(mat, row, &nz, &cols, &vals)); 671 PetscCall(MatSetValues_MPIDense(A, 1, &row, nz, cols, vals, INSERT_VALUES)); 672 PetscCall(MatRestoreRow_MPIDense(mat, row, &nz, &cols, &vals)); 673 row++; 674 } 675 676 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 677 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 678 PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 679 if (rank == 0) { 680 PetscCall(PetscObjectSetName((PetscObject)((Mat_MPIDense *)(A->data))->A, ((PetscObject)mat)->name)); 681 PetscCall(MatView_SeqDense(((Mat_MPIDense *)(A->data))->A, sviewer)); 682 } 683 PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 684 PetscCall(PetscViewerFlush(viewer)); 685 PetscCall(MatDestroy(&A)); 686 } 687 PetscFunctionReturn(0); 688 } 689 690 PetscErrorCode MatView_MPIDense(Mat mat, PetscViewer viewer) { 691 PetscBool iascii, isbinary, isdraw, issocket; 692 693 PetscFunctionBegin; 694 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 695 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 696 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket)); 697 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 698 699 if (iascii || issocket || isdraw) { 700 PetscCall(MatView_MPIDense_ASCIIorDraworSocket(mat, viewer)); 701 } else if (isbinary) PetscCall(MatView_Dense_Binary(mat, viewer)); 702 PetscFunctionReturn(0); 703 } 704 705 PetscErrorCode MatGetInfo_MPIDense(Mat A, MatInfoType flag, MatInfo *info) { 706 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 707 Mat mdn = mat->A; 708 PetscLogDouble isend[5], irecv[5]; 709 710 PetscFunctionBegin; 711 info->block_size = 1.0; 712 713 PetscCall(MatGetInfo(mdn, MAT_LOCAL, info)); 714 715 isend[0] = info->nz_used; 716 isend[1] = info->nz_allocated; 717 isend[2] = info->nz_unneeded; 718 isend[3] = info->memory; 719 isend[4] = info->mallocs; 720 if (flag == MAT_LOCAL) { 721 info->nz_used = isend[0]; 722 info->nz_allocated = isend[1]; 723 info->nz_unneeded = isend[2]; 724 info->memory = isend[3]; 725 info->mallocs = isend[4]; 726 } else if (flag == MAT_GLOBAL_MAX) { 727 PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)A))); 728 729 info->nz_used = irecv[0]; 730 info->nz_allocated = irecv[1]; 731 info->nz_unneeded = irecv[2]; 732 info->memory = irecv[3]; 733 info->mallocs = irecv[4]; 734 } else if (flag == MAT_GLOBAL_SUM) { 735 PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)A))); 736 737 info->nz_used = irecv[0]; 738 info->nz_allocated = irecv[1]; 739 info->nz_unneeded = irecv[2]; 740 info->memory = irecv[3]; 741 info->mallocs = irecv[4]; 742 } 743 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 744 info->fill_ratio_needed = 0; 745 info->factor_mallocs = 0; 746 PetscFunctionReturn(0); 747 } 748 749 PetscErrorCode MatSetOption_MPIDense(Mat A, MatOption op, PetscBool flg) { 750 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 751 752 PetscFunctionBegin; 753 switch (op) { 754 case MAT_NEW_NONZERO_LOCATIONS: 755 case MAT_NEW_NONZERO_LOCATION_ERR: 756 case MAT_NEW_NONZERO_ALLOCATION_ERR: 757 MatCheckPreallocated(A, 1); 758 PetscCall(MatSetOption(a->A, op, flg)); 759 break; 760 case MAT_ROW_ORIENTED: 761 MatCheckPreallocated(A, 1); 762 a->roworiented = flg; 763 PetscCall(MatSetOption(a->A, op, flg)); 764 break; 765 case MAT_FORCE_DIAGONAL_ENTRIES: 766 case MAT_KEEP_NONZERO_PATTERN: 767 case MAT_USE_HASH_TABLE: 768 case MAT_SORTED_FULL: PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op])); break; 769 case MAT_IGNORE_OFF_PROC_ENTRIES: a->donotstash = flg; break; 770 case MAT_SYMMETRIC: 771 case MAT_STRUCTURALLY_SYMMETRIC: 772 case MAT_HERMITIAN: 773 case MAT_SYMMETRY_ETERNAL: 774 case MAT_STRUCTURAL_SYMMETRY_ETERNAL: 775 case MAT_SPD: 776 case MAT_IGNORE_LOWER_TRIANGULAR: 777 case MAT_IGNORE_ZERO_ENTRIES: 778 case MAT_SPD_ETERNAL: 779 /* if the diagonal matrix is square it inherits some of the properties above */ 780 PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op])); 781 break; 782 default: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %s", MatOptions[op]); 783 } 784 PetscFunctionReturn(0); 785 } 786 787 PetscErrorCode MatDiagonalScale_MPIDense(Mat A, Vec ll, Vec rr) { 788 Mat_MPIDense *mdn = (Mat_MPIDense *)A->data; 789 const PetscScalar *l; 790 PetscScalar x, *v, *vv, *r; 791 PetscInt i, j, s2a, s3a, s2, s3, m = mdn->A->rmap->n, n = mdn->A->cmap->n, lda; 792 793 PetscFunctionBegin; 794 PetscCall(MatDenseGetArray(mdn->A, &vv)); 795 PetscCall(MatDenseGetLDA(mdn->A, &lda)); 796 PetscCall(MatGetLocalSize(A, &s2, &s3)); 797 if (ll) { 798 PetscCall(VecGetLocalSize(ll, &s2a)); 799 PetscCheck(s2a == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Left scaling vector non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT, s2a, s2); 800 PetscCall(VecGetArrayRead(ll, &l)); 801 for (i = 0; i < m; i++) { 802 x = l[i]; 803 v = vv + i; 804 for (j = 0; j < n; j++) { 805 (*v) *= x; 806 v += lda; 807 } 808 } 809 PetscCall(VecRestoreArrayRead(ll, &l)); 810 PetscCall(PetscLogFlops(1.0 * n * m)); 811 } 812 if (rr) { 813 const PetscScalar *ar; 814 815 PetscCall(VecGetLocalSize(rr, &s3a)); 816 PetscCheck(s3a == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Right scaling vec non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT ".", s3a, s3); 817 PetscCall(VecGetArrayRead(rr, &ar)); 818 if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A)); 819 PetscCall(VecGetArray(mdn->lvec, &r)); 820 PetscCall(PetscSFBcastBegin(mdn->Mvctx, MPIU_SCALAR, ar, r, MPI_REPLACE)); 821 PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ar, r, MPI_REPLACE)); 822 PetscCall(VecRestoreArrayRead(rr, &ar)); 823 for (i = 0; i < n; i++) { 824 x = r[i]; 825 v = vv + i * lda; 826 for (j = 0; j < m; j++) (*v++) *= x; 827 } 828 PetscCall(VecRestoreArray(mdn->lvec, &r)); 829 PetscCall(PetscLogFlops(1.0 * n * m)); 830 } 831 PetscCall(MatDenseRestoreArray(mdn->A, &vv)); 832 PetscFunctionReturn(0); 833 } 834 835 PetscErrorCode MatNorm_MPIDense(Mat A, NormType type, PetscReal *nrm) { 836 Mat_MPIDense *mdn = (Mat_MPIDense *)A->data; 837 PetscInt i, j; 838 PetscMPIInt size; 839 PetscReal sum = 0.0; 840 const PetscScalar *av, *v; 841 842 PetscFunctionBegin; 843 PetscCall(MatDenseGetArrayRead(mdn->A, &av)); 844 v = av; 845 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 846 if (size == 1) { 847 PetscCall(MatNorm(mdn->A, type, nrm)); 848 } else { 849 if (type == NORM_FROBENIUS) { 850 for (i = 0; i < mdn->A->cmap->n * mdn->A->rmap->n; i++) { 851 sum += PetscRealPart(PetscConj(*v) * (*v)); 852 v++; 853 } 854 PetscCall(MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A))); 855 *nrm = PetscSqrtReal(*nrm); 856 PetscCall(PetscLogFlops(2.0 * mdn->A->cmap->n * mdn->A->rmap->n)); 857 } else if (type == NORM_1) { 858 PetscReal *tmp, *tmp2; 859 PetscCall(PetscCalloc2(A->cmap->N, &tmp, A->cmap->N, &tmp2)); 860 *nrm = 0.0; 861 v = av; 862 for (j = 0; j < mdn->A->cmap->n; j++) { 863 for (i = 0; i < mdn->A->rmap->n; i++) { 864 tmp[j] += PetscAbsScalar(*v); 865 v++; 866 } 867 } 868 PetscCall(MPIU_Allreduce(tmp, tmp2, A->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A))); 869 for (j = 0; j < A->cmap->N; j++) { 870 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 871 } 872 PetscCall(PetscFree2(tmp, tmp2)); 873 PetscCall(PetscLogFlops(A->cmap->n * A->rmap->n)); 874 } else if (type == NORM_INFINITY) { /* max row norm */ 875 PetscReal ntemp; 876 PetscCall(MatNorm(mdn->A, type, &ntemp)); 877 PetscCall(MPIU_Allreduce(&ntemp, nrm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A))); 878 } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "No support for two norm"); 879 } 880 PetscCall(MatDenseRestoreArrayRead(mdn->A, &av)); 881 PetscFunctionReturn(0); 882 } 883 884 PetscErrorCode MatTranspose_MPIDense(Mat A, MatReuse reuse, Mat *matout) { 885 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 886 Mat B; 887 PetscInt M = A->rmap->N, N = A->cmap->N, m, n, *rwork, rstart = A->rmap->rstart; 888 PetscInt j, i, lda; 889 PetscScalar *v; 890 891 PetscFunctionBegin; 892 if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout)); 893 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) { 894 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 895 PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M)); 896 PetscCall(MatSetType(B, ((PetscObject)A)->type_name)); 897 PetscCall(MatMPIDenseSetPreallocation(B, NULL)); 898 } else B = *matout; 899 900 m = a->A->rmap->n; 901 n = a->A->cmap->n; 902 PetscCall(MatDenseGetArrayRead(a->A, (const PetscScalar **)&v)); 903 PetscCall(MatDenseGetLDA(a->A, &lda)); 904 PetscCall(PetscMalloc1(m, &rwork)); 905 for (i = 0; i < m; i++) rwork[i] = rstart + i; 906 for (j = 0; j < n; j++) { 907 PetscCall(MatSetValues(B, 1, &j, m, rwork, v, INSERT_VALUES)); 908 v += lda; 909 } 910 PetscCall(MatDenseRestoreArrayRead(a->A, (const PetscScalar **)&v)); 911 PetscCall(PetscFree(rwork)); 912 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 913 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 914 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 915 *matout = B; 916 } else { 917 PetscCall(MatHeaderMerge(A, &B)); 918 } 919 PetscFunctionReturn(0); 920 } 921 922 static PetscErrorCode MatDuplicate_MPIDense(Mat, MatDuplicateOption, Mat *); 923 PETSC_INTERN PetscErrorCode MatScale_MPIDense(Mat, PetscScalar); 924 925 PetscErrorCode MatSetUp_MPIDense(Mat A) { 926 PetscFunctionBegin; 927 PetscCall(PetscLayoutSetUp(A->rmap)); 928 PetscCall(PetscLayoutSetUp(A->cmap)); 929 if (!A->preallocated) PetscCall(MatMPIDenseSetPreallocation(A, NULL)); 930 PetscFunctionReturn(0); 931 } 932 933 PetscErrorCode MatAXPY_MPIDense(Mat Y, PetscScalar alpha, Mat X, MatStructure str) { 934 Mat_MPIDense *A = (Mat_MPIDense *)Y->data, *B = (Mat_MPIDense *)X->data; 935 936 PetscFunctionBegin; 937 PetscCall(MatAXPY(A->A, alpha, B->A, str)); 938 PetscFunctionReturn(0); 939 } 940 941 PetscErrorCode MatConjugate_MPIDense(Mat mat) { 942 Mat_MPIDense *a = (Mat_MPIDense *)mat->data; 943 944 PetscFunctionBegin; 945 PetscCall(MatConjugate(a->A)); 946 PetscFunctionReturn(0); 947 } 948 949 PetscErrorCode MatRealPart_MPIDense(Mat A) { 950 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 951 952 PetscFunctionBegin; 953 PetscCall(MatRealPart(a->A)); 954 PetscFunctionReturn(0); 955 } 956 957 PetscErrorCode MatImaginaryPart_MPIDense(Mat A) { 958 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 959 960 PetscFunctionBegin; 961 PetscCall(MatImaginaryPart(a->A)); 962 PetscFunctionReturn(0); 963 } 964 965 static PetscErrorCode MatGetColumnVector_MPIDense(Mat A, Vec v, PetscInt col) { 966 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 967 968 PetscFunctionBegin; 969 PetscCheck(a->A, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Missing local matrix"); 970 PetscCheck(a->A->ops->getcolumnvector, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Missing get column operation"); 971 PetscCall((*a->A->ops->getcolumnvector)(a->A, v, col)); 972 PetscFunctionReturn(0); 973 } 974 975 PETSC_INTERN PetscErrorCode MatGetColumnReductions_SeqDense(Mat, PetscInt, PetscReal *); 976 977 PetscErrorCode MatGetColumnReductions_MPIDense(Mat A, PetscInt type, PetscReal *reductions) { 978 PetscInt i, m, n; 979 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 980 PetscReal *work; 981 982 PetscFunctionBegin; 983 PetscCall(MatGetSize(A, &m, &n)); 984 PetscCall(PetscMalloc1(n, &work)); 985 if (type == REDUCTION_MEAN_REALPART) { 986 PetscCall(MatGetColumnReductions_SeqDense(a->A, (PetscInt)REDUCTION_SUM_REALPART, work)); 987 } else if (type == REDUCTION_MEAN_IMAGINARYPART) { 988 PetscCall(MatGetColumnReductions_SeqDense(a->A, (PetscInt)REDUCTION_SUM_IMAGINARYPART, work)); 989 } else { 990 PetscCall(MatGetColumnReductions_SeqDense(a->A, type, work)); 991 } 992 if (type == NORM_2) { 993 for (i = 0; i < n; i++) work[i] *= work[i]; 994 } 995 if (type == NORM_INFINITY) { 996 PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_MAX, A->hdr.comm)); 997 } else { 998 PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_SUM, A->hdr.comm)); 999 } 1000 PetscCall(PetscFree(work)); 1001 if (type == NORM_2) { 1002 for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]); 1003 } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 1004 for (i = 0; i < n; i++) reductions[i] /= m; 1005 } 1006 PetscFunctionReturn(0); 1007 } 1008 1009 #if defined(PETSC_HAVE_CUDA) 1010 PetscErrorCode MatShift_MPIDenseCUDA(Mat A, PetscScalar alpha) { 1011 PetscScalar *da; 1012 PetscInt lda; 1013 1014 PetscFunctionBegin; 1015 PetscCall(MatDenseCUDAGetArray(A, &da)); 1016 PetscCall(MatDenseGetLDA(A, &lda)); 1017 PetscCall(PetscInfo(A, "Performing Shift on backend\n")); 1018 PetscCall(MatShift_DenseCUDA_Private(da, alpha, lda, A->rmap->rstart, A->rmap->rend, A->cmap->N)); 1019 PetscCall(MatDenseCUDARestoreArray(A, &da)); 1020 PetscFunctionReturn(0); 1021 } 1022 1023 static PetscErrorCode MatDenseGetColumnVec_MPIDenseCUDA(Mat A, PetscInt col, Vec *v) { 1024 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1025 PetscInt lda; 1026 1027 PetscFunctionBegin; 1028 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1029 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1030 if (!a->cvec) { 1031 PetscCall(VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, &a->cvec)); 1032 PetscCall(PetscLogObjectParent((PetscObject)A, (PetscObject)a->cvec)); 1033 } 1034 a->vecinuse = col + 1; 1035 PetscCall(MatDenseGetLDA(a->A, &lda)); 1036 PetscCall(MatDenseCUDAGetArray(a->A, (PetscScalar **)&a->ptrinuse)); 1037 PetscCall(VecCUDAPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda)); 1038 *v = a->cvec; 1039 PetscFunctionReturn(0); 1040 } 1041 1042 static PetscErrorCode MatDenseRestoreColumnVec_MPIDenseCUDA(Mat A, PetscInt col, Vec *v) { 1043 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1044 1045 PetscFunctionBegin; 1046 PetscCheck(a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first"); 1047 PetscCheck(a->cvec, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing internal column vector"); 1048 a->vecinuse = 0; 1049 PetscCall(MatDenseCUDARestoreArray(a->A, (PetscScalar **)&a->ptrinuse)); 1050 PetscCall(VecCUDAResetArray(a->cvec)); 1051 if (v) *v = NULL; 1052 PetscFunctionReturn(0); 1053 } 1054 1055 static PetscErrorCode MatDenseGetColumnVecRead_MPIDenseCUDA(Mat A, PetscInt col, Vec *v) { 1056 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1057 PetscInt lda; 1058 1059 PetscFunctionBegin; 1060 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1061 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1062 if (!a->cvec) { 1063 PetscCall(VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, &a->cvec)); 1064 PetscCall(PetscLogObjectParent((PetscObject)A, (PetscObject)a->cvec)); 1065 } 1066 a->vecinuse = col + 1; 1067 PetscCall(MatDenseGetLDA(a->A, &lda)); 1068 PetscCall(MatDenseCUDAGetArrayRead(a->A, &a->ptrinuse)); 1069 PetscCall(VecCUDAPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda)); 1070 PetscCall(VecLockReadPush(a->cvec)); 1071 *v = a->cvec; 1072 PetscFunctionReturn(0); 1073 } 1074 1075 static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDenseCUDA(Mat A, PetscInt col, Vec *v) { 1076 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1077 1078 PetscFunctionBegin; 1079 PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first"); 1080 PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector"); 1081 a->vecinuse = 0; 1082 PetscCall(MatDenseCUDARestoreArrayRead(a->A, &a->ptrinuse)); 1083 PetscCall(VecLockReadPop(a->cvec)); 1084 PetscCall(VecCUDAResetArray(a->cvec)); 1085 if (v) *v = NULL; 1086 PetscFunctionReturn(0); 1087 } 1088 1089 static PetscErrorCode MatDenseGetColumnVecWrite_MPIDenseCUDA(Mat A, PetscInt col, Vec *v) { 1090 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1091 PetscInt lda; 1092 1093 PetscFunctionBegin; 1094 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1095 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1096 if (!a->cvec) { 1097 PetscCall(VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, &a->cvec)); 1098 PetscCall(PetscLogObjectParent((PetscObject)A, (PetscObject)a->cvec)); 1099 } 1100 a->vecinuse = col + 1; 1101 PetscCall(MatDenseGetLDA(a->A, &lda)); 1102 PetscCall(MatDenseCUDAGetArrayWrite(a->A, (PetscScalar **)&a->ptrinuse)); 1103 PetscCall(VecCUDAPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda)); 1104 *v = a->cvec; 1105 PetscFunctionReturn(0); 1106 } 1107 1108 static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDenseCUDA(Mat A, PetscInt col, Vec *v) { 1109 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1110 1111 PetscFunctionBegin; 1112 PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first"); 1113 PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector"); 1114 a->vecinuse = 0; 1115 PetscCall(MatDenseCUDARestoreArrayWrite(a->A, (PetscScalar **)&a->ptrinuse)); 1116 PetscCall(VecCUDAResetArray(a->cvec)); 1117 if (v) *v = NULL; 1118 PetscFunctionReturn(0); 1119 } 1120 1121 static PetscErrorCode MatDenseCUDAPlaceArray_MPIDenseCUDA(Mat A, const PetscScalar *a) { 1122 Mat_MPIDense *l = (Mat_MPIDense *)A->data; 1123 1124 PetscFunctionBegin; 1125 PetscCheck(!l->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1126 PetscCheck(!l->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1127 PetscCall(MatDenseCUDAPlaceArray(l->A, a)); 1128 PetscFunctionReturn(0); 1129 } 1130 1131 static PetscErrorCode MatDenseCUDAResetArray_MPIDenseCUDA(Mat A) { 1132 Mat_MPIDense *l = (Mat_MPIDense *)A->data; 1133 1134 PetscFunctionBegin; 1135 PetscCheck(!l->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1136 PetscCheck(!l->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1137 PetscCall(MatDenseCUDAResetArray(l->A)); 1138 PetscFunctionReturn(0); 1139 } 1140 1141 static PetscErrorCode MatDenseCUDAReplaceArray_MPIDenseCUDA(Mat A, const PetscScalar *a) { 1142 Mat_MPIDense *l = (Mat_MPIDense *)A->data; 1143 1144 PetscFunctionBegin; 1145 PetscCheck(!l->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1146 PetscCheck(!l->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1147 PetscCall(MatDenseCUDAReplaceArray(l->A, a)); 1148 PetscFunctionReturn(0); 1149 } 1150 1151 static PetscErrorCode MatDenseCUDAGetArrayWrite_MPIDenseCUDA(Mat A, PetscScalar **a) { 1152 Mat_MPIDense *l = (Mat_MPIDense *)A->data; 1153 1154 PetscFunctionBegin; 1155 PetscCall(MatDenseCUDAGetArrayWrite(l->A, a)); 1156 PetscFunctionReturn(0); 1157 } 1158 1159 static PetscErrorCode MatDenseCUDARestoreArrayWrite_MPIDenseCUDA(Mat A, PetscScalar **a) { 1160 Mat_MPIDense *l = (Mat_MPIDense *)A->data; 1161 1162 PetscFunctionBegin; 1163 PetscCall(MatDenseCUDARestoreArrayWrite(l->A, a)); 1164 PetscFunctionReturn(0); 1165 } 1166 1167 static PetscErrorCode MatDenseCUDAGetArrayRead_MPIDenseCUDA(Mat A, const PetscScalar **a) { 1168 Mat_MPIDense *l = (Mat_MPIDense *)A->data; 1169 1170 PetscFunctionBegin; 1171 PetscCall(MatDenseCUDAGetArrayRead(l->A, a)); 1172 PetscFunctionReturn(0); 1173 } 1174 1175 static PetscErrorCode MatDenseCUDARestoreArrayRead_MPIDenseCUDA(Mat A, const PetscScalar **a) { 1176 Mat_MPIDense *l = (Mat_MPIDense *)A->data; 1177 1178 PetscFunctionBegin; 1179 PetscCall(MatDenseCUDARestoreArrayRead(l->A, a)); 1180 PetscFunctionReturn(0); 1181 } 1182 1183 static PetscErrorCode MatDenseCUDAGetArray_MPIDenseCUDA(Mat A, PetscScalar **a) { 1184 Mat_MPIDense *l = (Mat_MPIDense *)A->data; 1185 1186 PetscFunctionBegin; 1187 PetscCall(MatDenseCUDAGetArray(l->A, a)); 1188 PetscFunctionReturn(0); 1189 } 1190 1191 static PetscErrorCode MatDenseCUDARestoreArray_MPIDenseCUDA(Mat A, PetscScalar **a) { 1192 Mat_MPIDense *l = (Mat_MPIDense *)A->data; 1193 1194 PetscFunctionBegin; 1195 PetscCall(MatDenseCUDARestoreArray(l->A, a)); 1196 PetscFunctionReturn(0); 1197 } 1198 1199 static PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat, PetscInt, Vec *); 1200 static PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat, PetscInt, Vec *); 1201 static PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat, PetscInt, Vec *); 1202 static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat, PetscInt, Vec *); 1203 static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat, PetscInt, Vec *); 1204 static PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat, PetscInt, Vec *); 1205 static PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat, Mat *); 1206 1207 static PetscErrorCode MatBindToCPU_MPIDenseCUDA(Mat mat, PetscBool bind) { 1208 Mat_MPIDense *d = (Mat_MPIDense *)mat->data; 1209 1210 PetscFunctionBegin; 1211 PetscCheck(!d->vecinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1212 PetscCheck(!d->matinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1213 if (d->A) PetscCall(MatBindToCPU(d->A, bind)); 1214 mat->boundtocpu = bind; 1215 if (!bind) { 1216 PetscBool iscuda; 1217 1218 PetscCall(PetscObjectTypeCompare((PetscObject)d->cvec, VECMPICUDA, &iscuda)); 1219 if (!iscuda) PetscCall(VecDestroy(&d->cvec)); 1220 PetscCall(PetscObjectTypeCompare((PetscObject)d->cmat, MATMPIDENSECUDA, &iscuda)); 1221 if (!iscuda) PetscCall(MatDestroy(&d->cmat)); 1222 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", MatDenseGetColumnVec_MPIDenseCUDA)); 1223 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", MatDenseRestoreColumnVec_MPIDenseCUDA)); 1224 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", MatDenseGetColumnVecRead_MPIDenseCUDA)); 1225 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", MatDenseRestoreColumnVecRead_MPIDenseCUDA)); 1226 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", MatDenseGetColumnVecWrite_MPIDenseCUDA)); 1227 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", MatDenseRestoreColumnVecWrite_MPIDenseCUDA)); 1228 mat->ops->shift = MatShift_MPIDenseCUDA; 1229 } else { 1230 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", MatDenseGetColumnVec_MPIDense)); 1231 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", MatDenseRestoreColumnVec_MPIDense)); 1232 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", MatDenseGetColumnVecRead_MPIDense)); 1233 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", MatDenseRestoreColumnVecRead_MPIDense)); 1234 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", MatDenseGetColumnVecWrite_MPIDense)); 1235 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", MatDenseRestoreColumnVecWrite_MPIDense)); 1236 mat->ops->shift = MatShift_MPIDense; 1237 } 1238 if (d->cmat) PetscCall(MatBindToCPU(d->cmat, bind)); 1239 PetscFunctionReturn(0); 1240 } 1241 1242 PetscErrorCode MatMPIDenseCUDASetPreallocation(Mat A, PetscScalar *d_data) { 1243 Mat_MPIDense *d = (Mat_MPIDense *)A->data; 1244 PetscBool iscuda; 1245 1246 PetscFunctionBegin; 1247 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 1248 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIDENSECUDA, &iscuda)); 1249 if (!iscuda) PetscFunctionReturn(0); 1250 PetscCall(PetscLayoutSetUp(A->rmap)); 1251 PetscCall(PetscLayoutSetUp(A->cmap)); 1252 if (!d->A) { 1253 PetscCall(MatCreate(PETSC_COMM_SELF, &d->A)); 1254 PetscCall(PetscLogObjectParent((PetscObject)A, (PetscObject)d->A)); 1255 PetscCall(MatSetSizes(d->A, A->rmap->n, A->cmap->N, A->rmap->n, A->cmap->N)); 1256 } 1257 PetscCall(MatSetType(d->A, MATSEQDENSECUDA)); 1258 PetscCall(MatSeqDenseCUDASetPreallocation(d->A, d_data)); 1259 A->preallocated = PETSC_TRUE; 1260 A->assembled = PETSC_TRUE; 1261 PetscFunctionReturn(0); 1262 } 1263 #endif 1264 1265 static PetscErrorCode MatSetRandom_MPIDense(Mat x, PetscRandom rctx) { 1266 Mat_MPIDense *d = (Mat_MPIDense *)x->data; 1267 1268 PetscFunctionBegin; 1269 PetscCall(MatSetRandom(d->A, rctx)); 1270 PetscFunctionReturn(0); 1271 } 1272 1273 static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A, PetscBool *missing, PetscInt *d) { 1274 PetscFunctionBegin; 1275 *missing = PETSC_FALSE; 1276 PetscFunctionReturn(0); 1277 } 1278 1279 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat, Mat, PetscReal, Mat); 1280 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat, Mat, Mat); 1281 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat, Mat, PetscReal, Mat); 1282 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat, Mat, Mat); 1283 static PetscErrorCode MatEqual_MPIDense(Mat, Mat, PetscBool *); 1284 static PetscErrorCode MatLoad_MPIDense(Mat, PetscViewer); 1285 1286 /* -------------------------------------------------------------------*/ 1287 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense, 1288 MatGetRow_MPIDense, 1289 MatRestoreRow_MPIDense, 1290 MatMult_MPIDense, 1291 /* 4*/ MatMultAdd_MPIDense, 1292 MatMultTranspose_MPIDense, 1293 MatMultTransposeAdd_MPIDense, 1294 NULL, 1295 NULL, 1296 NULL, 1297 /* 10*/ NULL, 1298 NULL, 1299 NULL, 1300 NULL, 1301 MatTranspose_MPIDense, 1302 /* 15*/ MatGetInfo_MPIDense, 1303 MatEqual_MPIDense, 1304 MatGetDiagonal_MPIDense, 1305 MatDiagonalScale_MPIDense, 1306 MatNorm_MPIDense, 1307 /* 20*/ MatAssemblyBegin_MPIDense, 1308 MatAssemblyEnd_MPIDense, 1309 MatSetOption_MPIDense, 1310 MatZeroEntries_MPIDense, 1311 /* 24*/ MatZeroRows_MPIDense, 1312 NULL, 1313 NULL, 1314 NULL, 1315 NULL, 1316 /* 29*/ MatSetUp_MPIDense, 1317 NULL, 1318 NULL, 1319 MatGetDiagonalBlock_MPIDense, 1320 NULL, 1321 /* 34*/ MatDuplicate_MPIDense, 1322 NULL, 1323 NULL, 1324 NULL, 1325 NULL, 1326 /* 39*/ MatAXPY_MPIDense, 1327 MatCreateSubMatrices_MPIDense, 1328 NULL, 1329 MatGetValues_MPIDense, 1330 MatCopy_MPIDense, 1331 /* 44*/ NULL, 1332 MatScale_MPIDense, 1333 MatShift_MPIDense, 1334 NULL, 1335 NULL, 1336 /* 49*/ MatSetRandom_MPIDense, 1337 NULL, 1338 NULL, 1339 NULL, 1340 NULL, 1341 /* 54*/ NULL, 1342 NULL, 1343 NULL, 1344 NULL, 1345 NULL, 1346 /* 59*/ MatCreateSubMatrix_MPIDense, 1347 MatDestroy_MPIDense, 1348 MatView_MPIDense, 1349 NULL, 1350 NULL, 1351 /* 64*/ NULL, 1352 NULL, 1353 NULL, 1354 NULL, 1355 NULL, 1356 /* 69*/ NULL, 1357 NULL, 1358 NULL, 1359 NULL, 1360 NULL, 1361 /* 74*/ NULL, 1362 NULL, 1363 NULL, 1364 NULL, 1365 NULL, 1366 /* 79*/ NULL, 1367 NULL, 1368 NULL, 1369 NULL, 1370 /* 83*/ MatLoad_MPIDense, 1371 NULL, 1372 NULL, 1373 NULL, 1374 NULL, 1375 NULL, 1376 /* 89*/ NULL, 1377 NULL, 1378 NULL, 1379 NULL, 1380 NULL, 1381 /* 94*/ NULL, 1382 NULL, 1383 MatMatTransposeMultSymbolic_MPIDense_MPIDense, 1384 MatMatTransposeMultNumeric_MPIDense_MPIDense, 1385 NULL, 1386 /* 99*/ MatProductSetFromOptions_MPIDense, 1387 NULL, 1388 NULL, 1389 MatConjugate_MPIDense, 1390 NULL, 1391 /*104*/ NULL, 1392 MatRealPart_MPIDense, 1393 MatImaginaryPart_MPIDense, 1394 NULL, 1395 NULL, 1396 /*109*/ NULL, 1397 NULL, 1398 NULL, 1399 MatGetColumnVector_MPIDense, 1400 MatMissingDiagonal_MPIDense, 1401 /*114*/ NULL, 1402 NULL, 1403 NULL, 1404 NULL, 1405 NULL, 1406 /*119*/ NULL, 1407 NULL, 1408 NULL, 1409 NULL, 1410 NULL, 1411 /*124*/ NULL, 1412 MatGetColumnReductions_MPIDense, 1413 NULL, 1414 NULL, 1415 NULL, 1416 /*129*/ NULL, 1417 NULL, 1418 MatTransposeMatMultSymbolic_MPIDense_MPIDense, 1419 MatTransposeMatMultNumeric_MPIDense_MPIDense, 1420 NULL, 1421 /*134*/ NULL, 1422 NULL, 1423 NULL, 1424 NULL, 1425 NULL, 1426 /*139*/ NULL, 1427 NULL, 1428 NULL, 1429 NULL, 1430 NULL, 1431 MatCreateMPIMatConcatenateSeqMat_MPIDense, 1432 /*145*/ NULL, 1433 NULL, 1434 NULL, 1435 NULL, 1436 NULL, 1437 /*150*/ NULL}; 1438 1439 PetscErrorCode MatMPIDenseSetPreallocation_MPIDense(Mat mat, PetscScalar *data) { 1440 Mat_MPIDense *a = (Mat_MPIDense *)mat->data; 1441 PetscBool iscuda; 1442 1443 PetscFunctionBegin; 1444 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1445 PetscCall(PetscLayoutSetUp(mat->rmap)); 1446 PetscCall(PetscLayoutSetUp(mat->cmap)); 1447 if (!a->A) { 1448 PetscCall(MatCreate(PETSC_COMM_SELF, &a->A)); 1449 PetscCall(PetscLogObjectParent((PetscObject)mat, (PetscObject)a->A)); 1450 PetscCall(MatSetSizes(a->A, mat->rmap->n, mat->cmap->N, mat->rmap->n, mat->cmap->N)); 1451 } 1452 PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPIDENSECUDA, &iscuda)); 1453 PetscCall(MatSetType(a->A, iscuda ? MATSEQDENSECUDA : MATSEQDENSE)); 1454 PetscCall(MatSeqDenseSetPreallocation(a->A, data)); 1455 mat->preallocated = PETSC_TRUE; 1456 mat->assembled = PETSC_TRUE; 1457 PetscFunctionReturn(0); 1458 } 1459 1460 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIDense(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) { 1461 Mat B, C; 1462 1463 PetscFunctionBegin; 1464 PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &C)); 1465 PetscCall(MatConvert_SeqAIJ_SeqDense(C, MATSEQDENSE, MAT_INITIAL_MATRIX, &B)); 1466 PetscCall(MatDestroy(&C)); 1467 if (reuse == MAT_REUSE_MATRIX) { 1468 C = *newmat; 1469 } else C = NULL; 1470 PetscCall(MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A), B, A->cmap->n, !C ? MAT_INITIAL_MATRIX : MAT_REUSE_MATRIX, &C)); 1471 PetscCall(MatDestroy(&B)); 1472 if (reuse == MAT_INPLACE_MATRIX) { 1473 PetscCall(MatHeaderReplace(A, &C)); 1474 } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C; 1475 PetscFunctionReturn(0); 1476 } 1477 1478 PetscErrorCode MatConvert_MPIDense_MPIAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) { 1479 Mat B, C; 1480 1481 PetscFunctionBegin; 1482 PetscCall(MatDenseGetLocalMatrix(A, &C)); 1483 PetscCall(MatConvert_SeqDense_SeqAIJ(C, MATSEQAIJ, MAT_INITIAL_MATRIX, &B)); 1484 if (reuse == MAT_REUSE_MATRIX) { 1485 C = *newmat; 1486 } else C = NULL; 1487 PetscCall(MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A), B, A->cmap->n, !C ? MAT_INITIAL_MATRIX : MAT_REUSE_MATRIX, &C)); 1488 PetscCall(MatDestroy(&B)); 1489 if (reuse == MAT_INPLACE_MATRIX) { 1490 PetscCall(MatHeaderReplace(A, &C)); 1491 } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C; 1492 PetscFunctionReturn(0); 1493 } 1494 1495 #if defined(PETSC_HAVE_ELEMENTAL) 1496 PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) { 1497 Mat mat_elemental; 1498 PetscScalar *v; 1499 PetscInt m = A->rmap->n, N = A->cmap->N, rstart = A->rmap->rstart, i, *rows, *cols; 1500 1501 PetscFunctionBegin; 1502 if (reuse == MAT_REUSE_MATRIX) { 1503 mat_elemental = *newmat; 1504 PetscCall(MatZeroEntries(*newmat)); 1505 } else { 1506 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental)); 1507 PetscCall(MatSetSizes(mat_elemental, PETSC_DECIDE, PETSC_DECIDE, A->rmap->N, A->cmap->N)); 1508 PetscCall(MatSetType(mat_elemental, MATELEMENTAL)); 1509 PetscCall(MatSetUp(mat_elemental)); 1510 PetscCall(MatSetOption(mat_elemental, MAT_ROW_ORIENTED, PETSC_FALSE)); 1511 } 1512 1513 PetscCall(PetscMalloc2(m, &rows, N, &cols)); 1514 for (i = 0; i < N; i++) cols[i] = i; 1515 for (i = 0; i < m; i++) rows[i] = rstart + i; 1516 1517 /* PETSc-Elemental interface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */ 1518 PetscCall(MatDenseGetArray(A, &v)); 1519 PetscCall(MatSetValues(mat_elemental, m, rows, N, cols, v, ADD_VALUES)); 1520 PetscCall(MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY)); 1521 PetscCall(MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY)); 1522 PetscCall(MatDenseRestoreArray(A, &v)); 1523 PetscCall(PetscFree2(rows, cols)); 1524 1525 if (reuse == MAT_INPLACE_MATRIX) { 1526 PetscCall(MatHeaderReplace(A, &mat_elemental)); 1527 } else { 1528 *newmat = mat_elemental; 1529 } 1530 PetscFunctionReturn(0); 1531 } 1532 #endif 1533 1534 static PetscErrorCode MatDenseGetColumn_MPIDense(Mat A, PetscInt col, PetscScalar **vals) { 1535 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 1536 1537 PetscFunctionBegin; 1538 PetscCall(MatDenseGetColumn(mat->A, col, vals)); 1539 PetscFunctionReturn(0); 1540 } 1541 1542 static PetscErrorCode MatDenseRestoreColumn_MPIDense(Mat A, PetscScalar **vals) { 1543 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 1544 1545 PetscFunctionBegin; 1546 PetscCall(MatDenseRestoreColumn(mat->A, vals)); 1547 PetscFunctionReturn(0); 1548 } 1549 1550 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIDense(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat) { 1551 Mat_MPIDense *mat; 1552 PetscInt m, nloc, N; 1553 1554 PetscFunctionBegin; 1555 PetscCall(MatGetSize(inmat, &m, &N)); 1556 PetscCall(MatGetLocalSize(inmat, NULL, &nloc)); 1557 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 1558 PetscInt sum; 1559 1560 if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnership(comm, &n, &N)); 1561 /* Check sum(n) = N */ 1562 PetscCall(MPIU_Allreduce(&n, &sum, 1, MPIU_INT, MPI_SUM, comm)); 1563 PetscCheck(sum == N, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT, sum, N); 1564 1565 PetscCall(MatCreateDense(comm, m, n, PETSC_DETERMINE, N, NULL, outmat)); 1566 PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 1567 } 1568 1569 /* numeric phase */ 1570 mat = (Mat_MPIDense *)(*outmat)->data; 1571 PetscCall(MatCopy(inmat, mat->A, SAME_NONZERO_PATTERN)); 1572 PetscFunctionReturn(0); 1573 } 1574 1575 #if defined(PETSC_HAVE_CUDA) 1576 PetscErrorCode MatConvert_MPIDenseCUDA_MPIDense(Mat M, MatType type, MatReuse reuse, Mat *newmat) { 1577 Mat B; 1578 Mat_MPIDense *m; 1579 1580 PetscFunctionBegin; 1581 if (reuse == MAT_INITIAL_MATRIX) { 1582 PetscCall(MatDuplicate(M, MAT_COPY_VALUES, newmat)); 1583 } else if (reuse == MAT_REUSE_MATRIX) { 1584 PetscCall(MatCopy(M, *newmat, SAME_NONZERO_PATTERN)); 1585 } 1586 1587 B = *newmat; 1588 PetscCall(MatBindToCPU_MPIDenseCUDA(B, PETSC_TRUE)); 1589 PetscCall(PetscFree(B->defaultvectype)); 1590 PetscCall(PetscStrallocpy(VECSTANDARD, &B->defaultvectype)); 1591 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIDENSE)); 1592 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpidensecuda_mpidense_C", NULL)); 1593 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpidensecuda_C", NULL)); 1594 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C", NULL)); 1595 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpidensecuda_mpiaij_C", NULL)); 1596 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C", NULL)); 1597 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAGetArray_C", NULL)); 1598 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAGetArrayRead_C", NULL)); 1599 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAGetArrayWrite_C", NULL)); 1600 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDARestoreArray_C", NULL)); 1601 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDARestoreArrayRead_C", NULL)); 1602 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDARestoreArrayWrite_C", NULL)); 1603 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAPlaceArray_C", NULL)); 1604 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAResetArray_C", NULL)); 1605 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAReplaceArray_C", NULL)); 1606 m = (Mat_MPIDense *)(B)->data; 1607 if (m->A) PetscCall(MatConvert(m->A, MATSEQDENSE, MAT_INPLACE_MATRIX, &m->A)); 1608 B->ops->bindtocpu = NULL; 1609 B->offloadmask = PETSC_OFFLOAD_CPU; 1610 PetscFunctionReturn(0); 1611 } 1612 1613 PetscErrorCode MatConvert_MPIDense_MPIDenseCUDA(Mat M, MatType type, MatReuse reuse, Mat *newmat) { 1614 Mat B; 1615 Mat_MPIDense *m; 1616 1617 PetscFunctionBegin; 1618 if (reuse == MAT_INITIAL_MATRIX) { 1619 PetscCall(MatDuplicate(M, MAT_COPY_VALUES, newmat)); 1620 } else if (reuse == MAT_REUSE_MATRIX) { 1621 PetscCall(MatCopy(M, *newmat, SAME_NONZERO_PATTERN)); 1622 } 1623 1624 B = *newmat; 1625 PetscCall(PetscFree(B->defaultvectype)); 1626 PetscCall(PetscStrallocpy(VECCUDA, &B->defaultvectype)); 1627 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIDENSECUDA)); 1628 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpidensecuda_mpidense_C", MatConvert_MPIDenseCUDA_MPIDense)); 1629 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpidensecuda_C", MatProductSetFromOptions_MPIAIJ_MPIDense)); 1630 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C", MatProductSetFromOptions_MPIAIJ_MPIDense)); 1631 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpidensecuda_mpiaij_C", MatProductSetFromOptions_MPIDense_MPIAIJ)); 1632 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C", MatProductSetFromOptions_MPIDense_MPIAIJ)); 1633 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAGetArray_C", MatDenseCUDAGetArray_MPIDenseCUDA)); 1634 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAGetArrayRead_C", MatDenseCUDAGetArrayRead_MPIDenseCUDA)); 1635 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAGetArrayWrite_C", MatDenseCUDAGetArrayWrite_MPIDenseCUDA)); 1636 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDARestoreArray_C", MatDenseCUDARestoreArray_MPIDenseCUDA)); 1637 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDARestoreArrayRead_C", MatDenseCUDARestoreArrayRead_MPIDenseCUDA)); 1638 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDARestoreArrayWrite_C", MatDenseCUDARestoreArrayWrite_MPIDenseCUDA)); 1639 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAPlaceArray_C", MatDenseCUDAPlaceArray_MPIDenseCUDA)); 1640 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAResetArray_C", MatDenseCUDAResetArray_MPIDenseCUDA)); 1641 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAReplaceArray_C", MatDenseCUDAReplaceArray_MPIDenseCUDA)); 1642 m = (Mat_MPIDense *)(B->data); 1643 if (m->A) { 1644 PetscCall(MatConvert(m->A, MATSEQDENSECUDA, MAT_INPLACE_MATRIX, &m->A)); 1645 B->offloadmask = PETSC_OFFLOAD_BOTH; 1646 } else { 1647 B->offloadmask = PETSC_OFFLOAD_UNALLOCATED; 1648 } 1649 PetscCall(MatBindToCPU_MPIDenseCUDA(B, PETSC_FALSE)); 1650 1651 B->ops->bindtocpu = MatBindToCPU_MPIDenseCUDA; 1652 PetscFunctionReturn(0); 1653 } 1654 #endif 1655 1656 PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat A, PetscInt col, Vec *v) { 1657 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1658 PetscInt lda; 1659 1660 PetscFunctionBegin; 1661 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1662 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1663 if (!a->cvec) PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, &a->cvec)); 1664 a->vecinuse = col + 1; 1665 PetscCall(MatDenseGetLDA(a->A, &lda)); 1666 PetscCall(MatDenseGetArray(a->A, (PetscScalar **)&a->ptrinuse)); 1667 PetscCall(VecPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda)); 1668 *v = a->cvec; 1669 PetscFunctionReturn(0); 1670 } 1671 1672 PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat A, PetscInt col, Vec *v) { 1673 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1674 1675 PetscFunctionBegin; 1676 PetscCheck(a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first"); 1677 PetscCheck(a->cvec, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing internal column vector"); 1678 a->vecinuse = 0; 1679 PetscCall(MatDenseRestoreArray(a->A, (PetscScalar **)&a->ptrinuse)); 1680 PetscCall(VecResetArray(a->cvec)); 1681 if (v) *v = NULL; 1682 PetscFunctionReturn(0); 1683 } 1684 1685 PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat A, PetscInt col, Vec *v) { 1686 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1687 PetscInt lda; 1688 1689 PetscFunctionBegin; 1690 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1691 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1692 if (!a->cvec) PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, &a->cvec)); 1693 a->vecinuse = col + 1; 1694 PetscCall(MatDenseGetLDA(a->A, &lda)); 1695 PetscCall(MatDenseGetArrayRead(a->A, &a->ptrinuse)); 1696 PetscCall(VecPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda)); 1697 PetscCall(VecLockReadPush(a->cvec)); 1698 *v = a->cvec; 1699 PetscFunctionReturn(0); 1700 } 1701 1702 PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat A, PetscInt col, Vec *v) { 1703 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1704 1705 PetscFunctionBegin; 1706 PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first"); 1707 PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector"); 1708 a->vecinuse = 0; 1709 PetscCall(MatDenseRestoreArrayRead(a->A, &a->ptrinuse)); 1710 PetscCall(VecLockReadPop(a->cvec)); 1711 PetscCall(VecResetArray(a->cvec)); 1712 if (v) *v = NULL; 1713 PetscFunctionReturn(0); 1714 } 1715 1716 PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat A, PetscInt col, Vec *v) { 1717 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1718 PetscInt lda; 1719 1720 PetscFunctionBegin; 1721 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1722 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1723 if (!a->cvec) PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, &a->cvec)); 1724 a->vecinuse = col + 1; 1725 PetscCall(MatDenseGetLDA(a->A, &lda)); 1726 PetscCall(MatDenseGetArrayWrite(a->A, (PetscScalar **)&a->ptrinuse)); 1727 PetscCall(VecPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda)); 1728 *v = a->cvec; 1729 PetscFunctionReturn(0); 1730 } 1731 1732 PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat A, PetscInt col, Vec *v) { 1733 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1734 1735 PetscFunctionBegin; 1736 PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first"); 1737 PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector"); 1738 a->vecinuse = 0; 1739 PetscCall(MatDenseRestoreArrayWrite(a->A, (PetscScalar **)&a->ptrinuse)); 1740 PetscCall(VecResetArray(a->cvec)); 1741 if (v) *v = NULL; 1742 PetscFunctionReturn(0); 1743 } 1744 1745 PetscErrorCode MatDenseGetSubMatrix_MPIDense(Mat A, PetscInt rbegin, PetscInt rend, PetscInt cbegin, PetscInt cend, Mat *v) { 1746 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1747 Mat_MPIDense *c; 1748 MPI_Comm comm; 1749 PetscInt pbegin, pend; 1750 1751 PetscFunctionBegin; 1752 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 1753 PetscCheck(!a->vecinuse, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1754 PetscCheck(!a->matinuse, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1755 pbegin = PetscMax(0, PetscMin(A->rmap->rend, rbegin) - A->rmap->rstart); 1756 pend = PetscMin(A->rmap->n, PetscMax(0, rend - A->rmap->rstart)); 1757 if (!a->cmat) { 1758 PetscCall(MatCreate(comm, &a->cmat)); 1759 PetscCall(PetscLogObjectParent((PetscObject)A, (PetscObject)a->cmat)); 1760 PetscCall(MatSetType(a->cmat, ((PetscObject)A)->type_name)); 1761 if (rend - rbegin == A->rmap->N) PetscCall(PetscLayoutReference(A->rmap, &a->cmat->rmap)); 1762 else { 1763 PetscCall(PetscLayoutSetLocalSize(a->cmat->rmap, pend - pbegin)); 1764 PetscCall(PetscLayoutSetSize(a->cmat->rmap, rend - rbegin)); 1765 PetscCall(PetscLayoutSetUp(a->cmat->rmap)); 1766 } 1767 PetscCall(PetscLayoutSetSize(a->cmat->cmap, cend - cbegin)); 1768 PetscCall(PetscLayoutSetUp(a->cmat->cmap)); 1769 } else { 1770 PetscBool same = (PetscBool)(rend - rbegin == a->cmat->rmap->N); 1771 if (same && a->cmat->rmap->N != A->rmap->N) { 1772 same = (PetscBool)(pend - pbegin == a->cmat->rmap->n); 1773 PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &same, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A))); 1774 } 1775 if (!same) { 1776 PetscCall(PetscLayoutDestroy(&a->cmat->rmap)); 1777 PetscCall(PetscLayoutCreate(comm, &a->cmat->rmap)); 1778 PetscCall(PetscLayoutSetLocalSize(a->cmat->rmap, pend - pbegin)); 1779 PetscCall(PetscLayoutSetSize(a->cmat->rmap, rend - rbegin)); 1780 PetscCall(PetscLayoutSetUp(a->cmat->rmap)); 1781 } 1782 if (cend - cbegin != a->cmat->cmap->N) { 1783 PetscCall(PetscLayoutDestroy(&a->cmat->cmap)); 1784 PetscCall(PetscLayoutCreate(comm, &a->cmat->cmap)); 1785 PetscCall(PetscLayoutSetSize(a->cmat->cmap, cend - cbegin)); 1786 PetscCall(PetscLayoutSetUp(a->cmat->cmap)); 1787 } 1788 } 1789 c = (Mat_MPIDense *)a->cmat->data; 1790 PetscCheck(!c->A, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1791 PetscCall(MatDenseGetSubMatrix(a->A, pbegin, pend, cbegin, cend, &c->A)); 1792 a->cmat->preallocated = PETSC_TRUE; 1793 a->cmat->assembled = PETSC_TRUE; 1794 a->matinuse = cbegin + 1; 1795 *v = a->cmat; 1796 PetscFunctionReturn(0); 1797 } 1798 1799 PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat A, Mat *v) { 1800 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1801 Mat_MPIDense *c; 1802 1803 PetscFunctionBegin; 1804 PetscCheck(a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetSubMatrix() first"); 1805 PetscCheck(a->cmat, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal matrix"); 1806 PetscCheck(*v == a->cmat, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Not the matrix obtained from MatDenseGetSubMatrix()"); 1807 a->matinuse = 0; 1808 c = (Mat_MPIDense *)a->cmat->data; 1809 PetscCall(MatDenseRestoreSubMatrix(a->A, &c->A)); 1810 if (v) *v = NULL; 1811 PetscFunctionReturn(0); 1812 } 1813 1814 /*MC 1815 MATMPIDENSE - MATMPIDENSE = "mpidense" - A matrix type to be used for distributed dense matrices. 1816 1817 Options Database Keys: 1818 . -mat_type mpidense - sets the matrix type to "mpidense" during a call to MatSetFromOptions() 1819 1820 Level: beginner 1821 1822 .seealso: `MatCreateDense()` 1823 1824 M*/ 1825 PETSC_EXTERN PetscErrorCode MatCreate_MPIDense(Mat mat) { 1826 Mat_MPIDense *a; 1827 1828 PetscFunctionBegin; 1829 PetscCall(PetscNewLog(mat, &a)); 1830 mat->data = (void *)a; 1831 PetscCall(PetscMemcpy(mat->ops, &MatOps_Values, sizeof(struct _MatOps))); 1832 1833 mat->insertmode = NOT_SET_VALUES; 1834 1835 /* build cache for off array entries formed */ 1836 a->donotstash = PETSC_FALSE; 1837 1838 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)mat), 1, &mat->stash)); 1839 1840 /* stuff used for matrix vector multiply */ 1841 a->lvec = NULL; 1842 a->Mvctx = NULL; 1843 a->roworiented = PETSC_TRUE; 1844 1845 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetLDA_C", MatDenseGetLDA_MPIDense)); 1846 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseSetLDA_C", MatDenseSetLDA_MPIDense)); 1847 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArray_C", MatDenseGetArray_MPIDense)); 1848 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArray_C", MatDenseRestoreArray_MPIDense)); 1849 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayRead_C", MatDenseGetArrayRead_MPIDense)); 1850 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayRead_C", MatDenseRestoreArrayRead_MPIDense)); 1851 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayWrite_C", MatDenseGetArrayWrite_MPIDense)); 1852 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayWrite_C", MatDenseRestoreArrayWrite_MPIDense)); 1853 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDensePlaceArray_C", MatDensePlaceArray_MPIDense)); 1854 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseResetArray_C", MatDenseResetArray_MPIDense)); 1855 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseReplaceArray_C", MatDenseReplaceArray_MPIDense)); 1856 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", MatDenseGetColumnVec_MPIDense)); 1857 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", MatDenseRestoreColumnVec_MPIDense)); 1858 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", MatDenseGetColumnVecRead_MPIDense)); 1859 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", MatDenseRestoreColumnVecRead_MPIDense)); 1860 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", MatDenseGetColumnVecWrite_MPIDense)); 1861 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", MatDenseRestoreColumnVecWrite_MPIDense)); 1862 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetSubMatrix_C", MatDenseGetSubMatrix_MPIDense)); 1863 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreSubMatrix_C", MatDenseRestoreSubMatrix_MPIDense)); 1864 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", MatConvert_MPIAIJ_MPIDense)); 1865 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpiaij_C", MatConvert_MPIDense_MPIAIJ)); 1866 #if defined(PETSC_HAVE_ELEMENTAL) 1867 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_elemental_C", MatConvert_MPIDense_Elemental)); 1868 #endif 1869 #if defined(PETSC_HAVE_SCALAPACK) 1870 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_scalapack_C", MatConvert_Dense_ScaLAPACK)); 1871 #endif 1872 #if defined(PETSC_HAVE_CUDA) 1873 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensecuda_C", MatConvert_MPIDense_MPIDenseCUDA)); 1874 #endif 1875 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIDenseSetPreallocation_C", MatMPIDenseSetPreallocation_MPIDense)); 1876 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense)); 1877 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaij_C", MatProductSetFromOptions_MPIDense_MPIAIJ)); 1878 #if defined(PETSC_HAVE_CUDA) 1879 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense)); 1880 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijcusparse_C", MatProductSetFromOptions_MPIDense_MPIAIJ)); 1881 #endif 1882 1883 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumn_C", MatDenseGetColumn_MPIDense)); 1884 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumn_C", MatDenseRestoreColumn_MPIDense)); 1885 PetscCall(PetscObjectChangeTypeName((PetscObject)mat, MATMPIDENSE)); 1886 PetscFunctionReturn(0); 1887 } 1888 1889 /*MC 1890 MATMPIDENSECUDA - MATMPIDENSECUDA = "mpidensecuda" - A matrix type to be used for distributed dense matrices on GPUs. 1891 1892 Options Database Keys: 1893 . -mat_type mpidensecuda - sets the matrix type to "mpidensecuda" during a call to MatSetFromOptions() 1894 1895 Level: beginner 1896 1897 .seealso: 1898 1899 M*/ 1900 #if defined(PETSC_HAVE_CUDA) 1901 #include <petsc/private/deviceimpl.h> 1902 PETSC_EXTERN PetscErrorCode MatCreate_MPIDenseCUDA(Mat B) { 1903 PetscFunctionBegin; 1904 PetscCall(PetscDeviceInitialize(PETSC_DEVICE_CUDA)); 1905 PetscCall(MatCreate_MPIDense(B)); 1906 PetscCall(MatConvert_MPIDense_MPIDenseCUDA(B, MATMPIDENSECUDA, MAT_INPLACE_MATRIX, &B)); 1907 PetscFunctionReturn(0); 1908 } 1909 #endif 1910 1911 /*MC 1912 MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices. 1913 1914 This matrix type is identical to MATSEQDENSE when constructed with a single process communicator, 1915 and MATMPIDENSE otherwise. 1916 1917 Options Database Keys: 1918 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions() 1919 1920 Level: beginner 1921 1922 .seealso: `MATSEQDENSE`, `MATMPIDENSE`, `MATDENSECUDA` 1923 M*/ 1924 1925 /*MC 1926 MATDENSECUDA - MATDENSECUDA = "densecuda" - A matrix type to be used for dense matrices on GPUs. 1927 1928 This matrix type is identical to MATSEQDENSECUDA when constructed with a single process communicator, 1929 and MATMPIDENSECUDA otherwise. 1930 1931 Options Database Keys: 1932 . -mat_type densecuda - sets the matrix type to "densecuda" during a call to MatSetFromOptions() 1933 1934 Level: beginner 1935 1936 .seealso: `MATSEQDENSECUDA`, `MATMPIDENSECUDA`, `MATDENSE` 1937 M*/ 1938 1939 /*@C 1940 MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries 1941 1942 Collective 1943 1944 Input Parameters: 1945 . B - the matrix 1946 - data - optional location of matrix data. Set data=NULL for PETSc 1947 to control all matrix memory allocation. 1948 1949 Notes: 1950 The dense format is fully compatible with standard Fortran 77 1951 storage by columns. 1952 1953 The data input variable is intended primarily for Fortran programmers 1954 who wish to allocate their own matrix memory space. Most users should 1955 set data=NULL. 1956 1957 Level: intermediate 1958 1959 .seealso: `MatCreate()`, `MatCreateSeqDense()`, `MatSetValues()` 1960 @*/ 1961 PetscErrorCode MatMPIDenseSetPreallocation(Mat B, PetscScalar *data) { 1962 PetscFunctionBegin; 1963 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 1964 PetscTryMethod(B, "MatMPIDenseSetPreallocation_C", (Mat, PetscScalar *), (B, data)); 1965 PetscFunctionReturn(0); 1966 } 1967 1968 /*@ 1969 MatDensePlaceArray - Allows one to replace the array in a dense matrix with an 1970 array provided by the user. This is useful to avoid copying an array 1971 into a matrix 1972 1973 Not Collective 1974 1975 Input Parameters: 1976 + mat - the matrix 1977 - array - the array in column major order 1978 1979 Notes: 1980 You can return to the original array with a call to MatDenseResetArray(). The user is responsible for freeing this array; it will not be 1981 freed when the matrix is destroyed. 1982 1983 Level: developer 1984 1985 .seealso: `MatDenseGetArray()`, `MatDenseResetArray()`, `VecPlaceArray()`, `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`, `VecResetArray()` 1986 1987 @*/ 1988 PetscErrorCode MatDensePlaceArray(Mat mat, const PetscScalar *array) { 1989 PetscFunctionBegin; 1990 PetscValidHeaderSpecific(mat, MAT_CLASSID, 1); 1991 PetscUseMethod(mat, "MatDensePlaceArray_C", (Mat, const PetscScalar *), (mat, array)); 1992 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 1993 #if defined(PETSC_HAVE_CUDA) 1994 mat->offloadmask = PETSC_OFFLOAD_CPU; 1995 #endif 1996 PetscFunctionReturn(0); 1997 } 1998 1999 /*@ 2000 MatDenseResetArray - Resets the matrix array to that it previously had before the call to MatDensePlaceArray() 2001 2002 Not Collective 2003 2004 Input Parameters: 2005 . mat - the matrix 2006 2007 Notes: 2008 You can only call this after a call to MatDensePlaceArray() 2009 2010 Level: developer 2011 2012 .seealso: `MatDenseGetArray()`, `MatDensePlaceArray()`, `VecPlaceArray()`, `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`, `VecResetArray()` 2013 2014 @*/ 2015 PetscErrorCode MatDenseResetArray(Mat mat) { 2016 PetscFunctionBegin; 2017 PetscValidHeaderSpecific(mat, MAT_CLASSID, 1); 2018 PetscUseMethod(mat, "MatDenseResetArray_C", (Mat), (mat)); 2019 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 2020 PetscFunctionReturn(0); 2021 } 2022 2023 /*@ 2024 MatDenseReplaceArray - Allows one to replace the array in a dense matrix with an 2025 array provided by the user. This is useful to avoid copying an array 2026 into a matrix 2027 2028 Not Collective 2029 2030 Input Parameters: 2031 + mat - the matrix 2032 - array - the array in column major order 2033 2034 Notes: 2035 The memory passed in MUST be obtained with PetscMalloc() and CANNOT be 2036 freed by the user. It will be freed when the matrix is destroyed. 2037 2038 Level: developer 2039 2040 .seealso: `MatDenseGetArray()`, `VecReplaceArray()` 2041 @*/ 2042 PetscErrorCode MatDenseReplaceArray(Mat mat, const PetscScalar *array) { 2043 PetscFunctionBegin; 2044 PetscValidHeaderSpecific(mat, MAT_CLASSID, 1); 2045 PetscUseMethod(mat, "MatDenseReplaceArray_C", (Mat, const PetscScalar *), (mat, array)); 2046 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 2047 #if defined(PETSC_HAVE_CUDA) 2048 mat->offloadmask = PETSC_OFFLOAD_CPU; 2049 #endif 2050 PetscFunctionReturn(0); 2051 } 2052 2053 #if defined(PETSC_HAVE_CUDA) 2054 /*@C 2055 MatDenseCUDAPlaceArray - Allows one to replace the GPU array in a dense matrix with an 2056 array provided by the user. This is useful to avoid copying an array 2057 into a matrix 2058 2059 Not Collective 2060 2061 Input Parameters: 2062 + mat - the matrix 2063 - array - the array in column major order 2064 2065 Notes: 2066 You can return to the original array with a call to MatDenseCUDAResetArray(). The user is responsible for freeing this array; it will not be 2067 freed when the matrix is destroyed. The array must have been allocated with cudaMalloc(). 2068 2069 Level: developer 2070 2071 .seealso: `MatDenseCUDAGetArray()`, `MatDenseCUDAResetArray()` 2072 @*/ 2073 PetscErrorCode MatDenseCUDAPlaceArray(Mat mat, const PetscScalar *array) { 2074 PetscFunctionBegin; 2075 PetscValidHeaderSpecific(mat, MAT_CLASSID, 1); 2076 PetscUseMethod(mat, "MatDenseCUDAPlaceArray_C", (Mat, const PetscScalar *), (mat, array)); 2077 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 2078 mat->offloadmask = PETSC_OFFLOAD_GPU; 2079 PetscFunctionReturn(0); 2080 } 2081 2082 /*@C 2083 MatDenseCUDAResetArray - Resets the matrix array to that it previously had before the call to MatDenseCUDAPlaceArray() 2084 2085 Not Collective 2086 2087 Input Parameters: 2088 . mat - the matrix 2089 2090 Notes: 2091 You can only call this after a call to MatDenseCUDAPlaceArray() 2092 2093 Level: developer 2094 2095 .seealso: `MatDenseCUDAGetArray()`, `MatDenseCUDAPlaceArray()` 2096 2097 @*/ 2098 PetscErrorCode MatDenseCUDAResetArray(Mat mat) { 2099 PetscFunctionBegin; 2100 PetscValidHeaderSpecific(mat, MAT_CLASSID, 1); 2101 PetscUseMethod(mat, "MatDenseCUDAResetArray_C", (Mat), (mat)); 2102 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 2103 PetscFunctionReturn(0); 2104 } 2105 2106 /*@C 2107 MatDenseCUDAReplaceArray - Allows one to replace the GPU array in a dense matrix with an 2108 array provided by the user. This is useful to avoid copying an array 2109 into a matrix 2110 2111 Not Collective 2112 2113 Input Parameters: 2114 + mat - the matrix 2115 - array - the array in column major order 2116 2117 Notes: 2118 This permanently replaces the GPU array and frees the memory associated with the old GPU array. 2119 The memory passed in CANNOT be freed by the user. It will be freed 2120 when the matrix is destroyed. The array should respect the matrix leading dimension. 2121 2122 Level: developer 2123 2124 .seealso: `MatDenseCUDAGetArray()`, `MatDenseCUDAPlaceArray()`, `MatDenseCUDAResetArray()` 2125 @*/ 2126 PetscErrorCode MatDenseCUDAReplaceArray(Mat mat, const PetscScalar *array) { 2127 PetscFunctionBegin; 2128 PetscValidHeaderSpecific(mat, MAT_CLASSID, 1); 2129 PetscUseMethod(mat, "MatDenseCUDAReplaceArray_C", (Mat, const PetscScalar *), (mat, array)); 2130 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 2131 mat->offloadmask = PETSC_OFFLOAD_GPU; 2132 PetscFunctionReturn(0); 2133 } 2134 2135 /*@C 2136 MatDenseCUDAGetArrayWrite - Provides write access to the CUDA buffer inside a dense matrix. 2137 2138 Not Collective 2139 2140 Input Parameters: 2141 . A - the matrix 2142 2143 Output Parameters 2144 . array - the GPU array in column major order 2145 2146 Notes: 2147 The data on the GPU may not be updated due to operations done on the CPU. If you need updated data, use MatDenseCUDAGetArray(). The array must be restored with MatDenseCUDARestoreArrayWrite() when no longer needed. 2148 2149 Level: developer 2150 2151 .seealso: `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`, `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayRead()`, `MatDenseCUDARestoreArrayRead()` 2152 @*/ 2153 PetscErrorCode MatDenseCUDAGetArrayWrite(Mat A, PetscScalar **a) { 2154 PetscFunctionBegin; 2155 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 2156 PetscUseMethod(A, "MatDenseCUDAGetArrayWrite_C", (Mat, PetscScalar **), (A, a)); 2157 PetscCall(PetscObjectStateIncrease((PetscObject)A)); 2158 PetscFunctionReturn(0); 2159 } 2160 2161 /*@C 2162 MatDenseCUDARestoreArrayWrite - Restore write access to the CUDA buffer inside a dense matrix previously obtained with MatDenseCUDAGetArrayWrite(). 2163 2164 Not Collective 2165 2166 Input Parameters: 2167 + A - the matrix 2168 - array - the GPU array in column major order 2169 2170 Notes: 2171 2172 Level: developer 2173 2174 .seealso: `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDARestoreArrayRead()`, `MatDenseCUDAGetArrayRead()` 2175 @*/ 2176 PetscErrorCode MatDenseCUDARestoreArrayWrite(Mat A, PetscScalar **a) { 2177 PetscFunctionBegin; 2178 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 2179 PetscUseMethod(A, "MatDenseCUDARestoreArrayWrite_C", (Mat, PetscScalar **), (A, a)); 2180 PetscCall(PetscObjectStateIncrease((PetscObject)A)); 2181 A->offloadmask = PETSC_OFFLOAD_GPU; 2182 PetscFunctionReturn(0); 2183 } 2184 2185 /*@C 2186 MatDenseCUDAGetArrayRead - Provides read-only access to the CUDA buffer inside a dense matrix. The array must be restored with MatDenseCUDARestoreArrayRead() when no longer needed. 2187 2188 Not Collective 2189 2190 Input Parameters: 2191 . A - the matrix 2192 2193 Output Parameters 2194 . array - the GPU array in column major order 2195 2196 Notes: 2197 Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite(). 2198 2199 Level: developer 2200 2201 .seealso: `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`, `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDARestoreArrayRead()` 2202 @*/ 2203 PetscErrorCode MatDenseCUDAGetArrayRead(Mat A, const PetscScalar **a) { 2204 PetscFunctionBegin; 2205 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 2206 PetscUseMethod(A, "MatDenseCUDAGetArrayRead_C", (Mat, const PetscScalar **), (A, a)); 2207 PetscFunctionReturn(0); 2208 } 2209 2210 /*@C 2211 MatDenseCUDARestoreArrayRead - Restore read-only access to the CUDA buffer inside a dense matrix previously obtained with a call to MatDenseCUDAGetArrayRead(). 2212 2213 Not Collective 2214 2215 Input Parameters: 2216 + A - the matrix 2217 - array - the GPU array in column major order 2218 2219 Notes: 2220 Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite(). 2221 2222 Level: developer 2223 2224 .seealso: `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`, `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDAGetArrayRead()` 2225 @*/ 2226 PetscErrorCode MatDenseCUDARestoreArrayRead(Mat A, const PetscScalar **a) { 2227 PetscFunctionBegin; 2228 PetscUseMethod(A, "MatDenseCUDARestoreArrayRead_C", (Mat, const PetscScalar **), (A, a)); 2229 PetscFunctionReturn(0); 2230 } 2231 2232 /*@C 2233 MatDenseCUDAGetArray - Provides access to the CUDA buffer inside a dense matrix. The array must be restored with MatDenseCUDARestoreArray() when no longer needed. 2234 2235 Not Collective 2236 2237 Input Parameters: 2238 . A - the matrix 2239 2240 Output Parameters 2241 . array - the GPU array in column major order 2242 2243 Notes: 2244 Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite(). For read-only access, use MatDenseCUDAGetArrayRead(). 2245 2246 Level: developer 2247 2248 .seealso: `MatDenseCUDAGetArrayRead()`, `MatDenseCUDARestoreArray()`, `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDARestoreArrayRead()` 2249 @*/ 2250 PetscErrorCode MatDenseCUDAGetArray(Mat A, PetscScalar **a) { 2251 PetscFunctionBegin; 2252 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 2253 PetscUseMethod(A, "MatDenseCUDAGetArray_C", (Mat, PetscScalar **), (A, a)); 2254 PetscCall(PetscObjectStateIncrease((PetscObject)A)); 2255 PetscFunctionReturn(0); 2256 } 2257 2258 /*@C 2259 MatDenseCUDARestoreArray - Restore access to the CUDA buffer inside a dense matrix previously obtained with MatDenseCUDAGetArray(). 2260 2261 Not Collective 2262 2263 Input Parameters: 2264 + A - the matrix 2265 - array - the GPU array in column major order 2266 2267 Notes: 2268 2269 Level: developer 2270 2271 .seealso: `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDARestoreArrayRead()`, `MatDenseCUDAGetArrayRead()` 2272 @*/ 2273 PetscErrorCode MatDenseCUDARestoreArray(Mat A, PetscScalar **a) { 2274 PetscFunctionBegin; 2275 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 2276 PetscUseMethod(A, "MatDenseCUDARestoreArray_C", (Mat, PetscScalar **), (A, a)); 2277 PetscCall(PetscObjectStateIncrease((PetscObject)A)); 2278 A->offloadmask = PETSC_OFFLOAD_GPU; 2279 PetscFunctionReturn(0); 2280 } 2281 #endif 2282 2283 /*@C 2284 MatCreateDense - Creates a matrix in dense format. 2285 2286 Collective 2287 2288 Input Parameters: 2289 + comm - MPI communicator 2290 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2291 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 2292 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 2293 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 2294 - data - optional location of matrix data. Set data=NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc 2295 to control all matrix memory allocation. 2296 2297 Output Parameter: 2298 . A - the matrix 2299 2300 Notes: 2301 The dense format is fully compatible with standard Fortran 77 2302 storage by columns. 2303 Note that, although local portions of the matrix are stored in column-major 2304 order, the matrix is partitioned across MPI ranks by row. 2305 2306 The data input variable is intended primarily for Fortran programmers 2307 who wish to allocate their own matrix memory space. Most users should 2308 set data=NULL (PETSC_NULL_SCALAR for Fortran users). 2309 2310 The user MUST specify either the local or global matrix dimensions 2311 (possibly both). 2312 2313 Level: intermediate 2314 2315 .seealso: `MatCreate()`, `MatCreateSeqDense()`, `MatSetValues()` 2316 @*/ 2317 PetscErrorCode MatCreateDense(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscScalar *data, Mat *A) { 2318 PetscFunctionBegin; 2319 PetscCall(MatCreate(comm, A)); 2320 PetscCall(MatSetSizes(*A, m, n, M, N)); 2321 PetscCall(MatSetType(*A, MATDENSE)); 2322 PetscCall(MatSeqDenseSetPreallocation(*A, data)); 2323 PetscCall(MatMPIDenseSetPreallocation(*A, data)); 2324 PetscFunctionReturn(0); 2325 } 2326 2327 #if defined(PETSC_HAVE_CUDA) 2328 /*@C 2329 MatCreateDenseCUDA - Creates a matrix in dense format using CUDA. 2330 2331 Collective 2332 2333 Input Parameters: 2334 + comm - MPI communicator 2335 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2336 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 2337 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 2338 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 2339 - data - optional location of GPU matrix data. Set data=NULL for PETSc 2340 to control matrix memory allocation. 2341 2342 Output Parameter: 2343 . A - the matrix 2344 2345 Notes: 2346 2347 Level: intermediate 2348 2349 .seealso: `MatCreate()`, `MatCreateDense()` 2350 @*/ 2351 PetscErrorCode MatCreateDenseCUDA(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscScalar *data, Mat *A) { 2352 PetscFunctionBegin; 2353 PetscCall(MatCreate(comm, A)); 2354 PetscValidLogicalCollectiveBool(*A, !!data, 6); 2355 PetscCall(MatSetSizes(*A, m, n, M, N)); 2356 PetscCall(MatSetType(*A, MATDENSECUDA)); 2357 PetscCall(MatSeqDenseCUDASetPreallocation(*A, data)); 2358 PetscCall(MatMPIDenseCUDASetPreallocation(*A, data)); 2359 PetscFunctionReturn(0); 2360 } 2361 #endif 2362 2363 static PetscErrorCode MatDuplicate_MPIDense(Mat A, MatDuplicateOption cpvalues, Mat *newmat) { 2364 Mat mat; 2365 Mat_MPIDense *a, *oldmat = (Mat_MPIDense *)A->data; 2366 2367 PetscFunctionBegin; 2368 *newmat = NULL; 2369 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat)); 2370 PetscCall(MatSetSizes(mat, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 2371 PetscCall(MatSetType(mat, ((PetscObject)A)->type_name)); 2372 a = (Mat_MPIDense *)mat->data; 2373 2374 mat->factortype = A->factortype; 2375 mat->assembled = PETSC_TRUE; 2376 mat->preallocated = PETSC_TRUE; 2377 2378 mat->insertmode = NOT_SET_VALUES; 2379 a->donotstash = oldmat->donotstash; 2380 2381 PetscCall(PetscLayoutReference(A->rmap, &mat->rmap)); 2382 PetscCall(PetscLayoutReference(A->cmap, &mat->cmap)); 2383 2384 PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A)); 2385 PetscCall(PetscLogObjectParent((PetscObject)mat, (PetscObject)a->A)); 2386 2387 *newmat = mat; 2388 PetscFunctionReturn(0); 2389 } 2390 2391 PetscErrorCode MatLoad_MPIDense(Mat newMat, PetscViewer viewer) { 2392 PetscBool isbinary; 2393 #if defined(PETSC_HAVE_HDF5) 2394 PetscBool ishdf5; 2395 #endif 2396 2397 PetscFunctionBegin; 2398 PetscValidHeaderSpecific(newMat, MAT_CLASSID, 1); 2399 PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2); 2400 /* force binary viewer to load .info file if it has not yet done so */ 2401 PetscCall(PetscViewerSetUp(viewer)); 2402 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 2403 #if defined(PETSC_HAVE_HDF5) 2404 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5)); 2405 #endif 2406 if (isbinary) { 2407 PetscCall(MatLoad_Dense_Binary(newMat, viewer)); 2408 #if defined(PETSC_HAVE_HDF5) 2409 } else if (ishdf5) { 2410 PetscCall(MatLoad_Dense_HDF5(newMat, viewer)); 2411 #endif 2412 } else SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name); 2413 PetscFunctionReturn(0); 2414 } 2415 2416 static PetscErrorCode MatEqual_MPIDense(Mat A, Mat B, PetscBool *flag) { 2417 Mat_MPIDense *matB = (Mat_MPIDense *)B->data, *matA = (Mat_MPIDense *)A->data; 2418 Mat a, b; 2419 2420 PetscFunctionBegin; 2421 a = matA->A; 2422 b = matB->A; 2423 PetscCall(MatEqual(a, b, flag)); 2424 PetscCall(MPIU_Allreduce(MPI_IN_PLACE, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A))); 2425 PetscFunctionReturn(0); 2426 } 2427 2428 PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(void *data) { 2429 Mat_TransMatMultDense *atb = (Mat_TransMatMultDense *)data; 2430 2431 PetscFunctionBegin; 2432 PetscCall(PetscFree2(atb->sendbuf, atb->recvcounts)); 2433 PetscCall(MatDestroy(&atb->atb)); 2434 PetscCall(PetscFree(atb)); 2435 PetscFunctionReturn(0); 2436 } 2437 2438 PetscErrorCode MatDestroy_MatMatTransMult_MPIDense_MPIDense(void *data) { 2439 Mat_MatTransMultDense *abt = (Mat_MatTransMultDense *)data; 2440 2441 PetscFunctionBegin; 2442 PetscCall(PetscFree2(abt->buf[0], abt->buf[1])); 2443 PetscCall(PetscFree2(abt->recvcounts, abt->recvdispls)); 2444 PetscCall(PetscFree(abt)); 2445 PetscFunctionReturn(0); 2446 } 2447 2448 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C) { 2449 Mat_MPIDense *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data; 2450 Mat_TransMatMultDense *atb; 2451 MPI_Comm comm; 2452 PetscMPIInt size, *recvcounts; 2453 PetscScalar *carray, *sendbuf; 2454 const PetscScalar *atbarray; 2455 PetscInt i, cN = C->cmap->N, proc, k, j, lda; 2456 const PetscInt *ranges; 2457 2458 PetscFunctionBegin; 2459 MatCheckProduct(C, 3); 2460 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 2461 atb = (Mat_TransMatMultDense *)C->product->data; 2462 recvcounts = atb->recvcounts; 2463 sendbuf = atb->sendbuf; 2464 2465 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 2466 PetscCallMPI(MPI_Comm_size(comm, &size)); 2467 2468 /* compute atbarray = aseq^T * bseq */ 2469 PetscCall(MatTransposeMatMult(a->A, b->A, atb->atb ? MAT_REUSE_MATRIX : MAT_INITIAL_MATRIX, PETSC_DEFAULT, &atb->atb)); 2470 2471 PetscCall(MatGetOwnershipRanges(C, &ranges)); 2472 2473 /* arrange atbarray into sendbuf */ 2474 PetscCall(MatDenseGetArrayRead(atb->atb, &atbarray)); 2475 PetscCall(MatDenseGetLDA(atb->atb, &lda)); 2476 for (proc = 0, k = 0; proc < size; proc++) { 2477 for (j = 0; j < cN; j++) { 2478 for (i = ranges[proc]; i < ranges[proc + 1]; i++) sendbuf[k++] = atbarray[i + j * lda]; 2479 } 2480 } 2481 PetscCall(MatDenseRestoreArrayRead(atb->atb, &atbarray)); 2482 2483 /* sum all atbarray to local values of C */ 2484 PetscCall(MatDenseGetArrayWrite(c->A, &carray)); 2485 PetscCallMPI(MPI_Reduce_scatter(sendbuf, carray, recvcounts, MPIU_SCALAR, MPIU_SUM, comm)); 2486 PetscCall(MatDenseRestoreArrayWrite(c->A, &carray)); 2487 PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY)); 2488 PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY)); 2489 PetscFunctionReturn(0); 2490 } 2491 2492 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C) { 2493 MPI_Comm comm; 2494 PetscMPIInt size; 2495 PetscInt cm = A->cmap->n, cM, cN = B->cmap->N; 2496 Mat_TransMatMultDense *atb; 2497 PetscBool cisdense; 2498 PetscInt i; 2499 const PetscInt *ranges; 2500 2501 PetscFunctionBegin; 2502 MatCheckProduct(C, 4); 2503 PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty"); 2504 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 2505 if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) { 2506 SETERRQ(comm, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend); 2507 } 2508 2509 /* create matrix product C */ 2510 PetscCall(MatSetSizes(C, cm, B->cmap->n, A->cmap->N, B->cmap->N)); 2511 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSECUDA, "")); 2512 if (!cisdense) PetscCall(MatSetType(C, ((PetscObject)A)->type_name)); 2513 PetscCall(MatSetUp(C)); 2514 2515 /* create data structure for reuse C */ 2516 PetscCallMPI(MPI_Comm_size(comm, &size)); 2517 PetscCall(PetscNew(&atb)); 2518 cM = C->rmap->N; 2519 PetscCall(PetscMalloc2(cM * cN, &atb->sendbuf, size, &atb->recvcounts)); 2520 PetscCall(MatGetOwnershipRanges(C, &ranges)); 2521 for (i = 0; i < size; i++) atb->recvcounts[i] = (ranges[i + 1] - ranges[i]) * cN; 2522 2523 C->product->data = atb; 2524 C->product->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense; 2525 PetscFunctionReturn(0); 2526 } 2527 2528 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C) { 2529 MPI_Comm comm; 2530 PetscMPIInt i, size; 2531 PetscInt maxRows, bufsiz; 2532 PetscMPIInt tag; 2533 PetscInt alg; 2534 Mat_MatTransMultDense *abt; 2535 Mat_Product *product = C->product; 2536 PetscBool flg; 2537 2538 PetscFunctionBegin; 2539 MatCheckProduct(C, 4); 2540 PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty"); 2541 /* check local size of A and B */ 2542 PetscCheck(A->cmap->n == B->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local column dimensions are incompatible, A (%" PetscInt_FMT ") != B (%" PetscInt_FMT ")", A->cmap->n, B->cmap->n); 2543 2544 PetscCall(PetscStrcmp(product->alg, "allgatherv", &flg)); 2545 alg = flg ? 0 : 1; 2546 2547 /* setup matrix product C */ 2548 PetscCall(MatSetSizes(C, A->rmap->n, B->rmap->n, A->rmap->N, B->rmap->N)); 2549 PetscCall(MatSetType(C, MATMPIDENSE)); 2550 PetscCall(MatSetUp(C)); 2551 PetscCall(PetscObjectGetNewTag((PetscObject)C, &tag)); 2552 2553 /* create data structure for reuse C */ 2554 PetscCall(PetscObjectGetComm((PetscObject)C, &comm)); 2555 PetscCallMPI(MPI_Comm_size(comm, &size)); 2556 PetscCall(PetscNew(&abt)); 2557 abt->tag = tag; 2558 abt->alg = alg; 2559 switch (alg) { 2560 case 1: /* alg: "cyclic" */ 2561 for (maxRows = 0, i = 0; i < size; i++) maxRows = PetscMax(maxRows, (B->rmap->range[i + 1] - B->rmap->range[i])); 2562 bufsiz = A->cmap->N * maxRows; 2563 PetscCall(PetscMalloc2(bufsiz, &(abt->buf[0]), bufsiz, &(abt->buf[1]))); 2564 break; 2565 default: /* alg: "allgatherv" */ 2566 PetscCall(PetscMalloc2(B->rmap->n * B->cmap->N, &(abt->buf[0]), B->rmap->N * B->cmap->N, &(abt->buf[1]))); 2567 PetscCall(PetscMalloc2(size, &(abt->recvcounts), size + 1, &(abt->recvdispls))); 2568 for (i = 0; i <= size; i++) abt->recvdispls[i] = B->rmap->range[i] * A->cmap->N; 2569 for (i = 0; i < size; i++) abt->recvcounts[i] = abt->recvdispls[i + 1] - abt->recvdispls[i]; 2570 break; 2571 } 2572 2573 C->product->data = abt; 2574 C->product->destroy = MatDestroy_MatMatTransMult_MPIDense_MPIDense; 2575 C->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_MPIDense_MPIDense; 2576 PetscFunctionReturn(0); 2577 } 2578 2579 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(Mat A, Mat B, Mat C) { 2580 Mat_MPIDense *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data; 2581 Mat_MatTransMultDense *abt; 2582 MPI_Comm comm; 2583 PetscMPIInt rank, size, sendsiz, recvsiz, sendto, recvfrom, recvisfrom; 2584 PetscScalar *sendbuf, *recvbuf = NULL, *cv; 2585 PetscInt i, cK = A->cmap->N, k, j, bn; 2586 PetscScalar _DOne = 1.0, _DZero = 0.0; 2587 const PetscScalar *av, *bv; 2588 PetscBLASInt cm, cn, ck, alda, blda = 0, clda; 2589 MPI_Request reqs[2]; 2590 const PetscInt *ranges; 2591 2592 PetscFunctionBegin; 2593 MatCheckProduct(C, 3); 2594 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 2595 abt = (Mat_MatTransMultDense *)C->product->data; 2596 PetscCall(PetscObjectGetComm((PetscObject)C, &comm)); 2597 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 2598 PetscCallMPI(MPI_Comm_size(comm, &size)); 2599 PetscCall(MatDenseGetArrayRead(a->A, &av)); 2600 PetscCall(MatDenseGetArrayRead(b->A, &bv)); 2601 PetscCall(MatDenseGetArrayWrite(c->A, &cv)); 2602 PetscCall(MatDenseGetLDA(a->A, &i)); 2603 PetscCall(PetscBLASIntCast(i, &alda)); 2604 PetscCall(MatDenseGetLDA(b->A, &i)); 2605 PetscCall(PetscBLASIntCast(i, &blda)); 2606 PetscCall(MatDenseGetLDA(c->A, &i)); 2607 PetscCall(PetscBLASIntCast(i, &clda)); 2608 PetscCall(MatGetOwnershipRanges(B, &ranges)); 2609 bn = B->rmap->n; 2610 if (blda == bn) { 2611 sendbuf = (PetscScalar *)bv; 2612 } else { 2613 sendbuf = abt->buf[0]; 2614 for (k = 0, i = 0; i < cK; i++) { 2615 for (j = 0; j < bn; j++, k++) sendbuf[k] = bv[i * blda + j]; 2616 } 2617 } 2618 if (size > 1) { 2619 sendto = (rank + size - 1) % size; 2620 recvfrom = (rank + size + 1) % size; 2621 } else { 2622 sendto = recvfrom = 0; 2623 } 2624 PetscCall(PetscBLASIntCast(cK, &ck)); 2625 PetscCall(PetscBLASIntCast(c->A->rmap->n, &cm)); 2626 recvisfrom = rank; 2627 for (i = 0; i < size; i++) { 2628 /* we have finished receiving in sending, bufs can be read/modified */ 2629 PetscInt nextrecvisfrom = (recvisfrom + 1) % size; /* which process the next recvbuf will originate on */ 2630 PetscInt nextbn = ranges[nextrecvisfrom + 1] - ranges[nextrecvisfrom]; 2631 2632 if (nextrecvisfrom != rank) { 2633 /* start the cyclic sends from sendbuf, to recvbuf (which will switch to sendbuf) */ 2634 sendsiz = cK * bn; 2635 recvsiz = cK * nextbn; 2636 recvbuf = (i & 1) ? abt->buf[0] : abt->buf[1]; 2637 PetscCallMPI(MPI_Isend(sendbuf, sendsiz, MPIU_SCALAR, sendto, abt->tag, comm, &reqs[0])); 2638 PetscCallMPI(MPI_Irecv(recvbuf, recvsiz, MPIU_SCALAR, recvfrom, abt->tag, comm, &reqs[1])); 2639 } 2640 2641 /* local aseq * sendbuf^T */ 2642 PetscCall(PetscBLASIntCast(ranges[recvisfrom + 1] - ranges[recvisfrom], &cn)); 2643 if (cm && cn && ck) PetscCallBLAS("BLASgemm", BLASgemm_("N", "T", &cm, &cn, &ck, &_DOne, av, &alda, sendbuf, &cn, &_DZero, cv + clda * ranges[recvisfrom], &clda)); 2644 2645 if (nextrecvisfrom != rank) { 2646 /* wait for the sends and receives to complete, swap sendbuf and recvbuf */ 2647 PetscCallMPI(MPI_Waitall(2, reqs, MPI_STATUSES_IGNORE)); 2648 } 2649 bn = nextbn; 2650 recvisfrom = nextrecvisfrom; 2651 sendbuf = recvbuf; 2652 } 2653 PetscCall(MatDenseRestoreArrayRead(a->A, &av)); 2654 PetscCall(MatDenseRestoreArrayRead(b->A, &bv)); 2655 PetscCall(MatDenseRestoreArrayWrite(c->A, &cv)); 2656 PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY)); 2657 PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY)); 2658 PetscFunctionReturn(0); 2659 } 2660 2661 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(Mat A, Mat B, Mat C) { 2662 Mat_MPIDense *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data; 2663 Mat_MatTransMultDense *abt; 2664 MPI_Comm comm; 2665 PetscMPIInt size; 2666 PetscScalar *cv, *sendbuf, *recvbuf; 2667 const PetscScalar *av, *bv; 2668 PetscInt blda, i, cK = A->cmap->N, k, j, bn; 2669 PetscScalar _DOne = 1.0, _DZero = 0.0; 2670 PetscBLASInt cm, cn, ck, alda, clda; 2671 2672 PetscFunctionBegin; 2673 MatCheckProduct(C, 3); 2674 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 2675 abt = (Mat_MatTransMultDense *)C->product->data; 2676 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 2677 PetscCallMPI(MPI_Comm_size(comm, &size)); 2678 PetscCall(MatDenseGetArrayRead(a->A, &av)); 2679 PetscCall(MatDenseGetArrayRead(b->A, &bv)); 2680 PetscCall(MatDenseGetArrayWrite(c->A, &cv)); 2681 PetscCall(MatDenseGetLDA(a->A, &i)); 2682 PetscCall(PetscBLASIntCast(i, &alda)); 2683 PetscCall(MatDenseGetLDA(b->A, &blda)); 2684 PetscCall(MatDenseGetLDA(c->A, &i)); 2685 PetscCall(PetscBLASIntCast(i, &clda)); 2686 /* copy transpose of B into buf[0] */ 2687 bn = B->rmap->n; 2688 sendbuf = abt->buf[0]; 2689 recvbuf = abt->buf[1]; 2690 for (k = 0, j = 0; j < bn; j++) { 2691 for (i = 0; i < cK; i++, k++) sendbuf[k] = bv[i * blda + j]; 2692 } 2693 PetscCall(MatDenseRestoreArrayRead(b->A, &bv)); 2694 PetscCallMPI(MPI_Allgatherv(sendbuf, bn * cK, MPIU_SCALAR, recvbuf, abt->recvcounts, abt->recvdispls, MPIU_SCALAR, comm)); 2695 PetscCall(PetscBLASIntCast(cK, &ck)); 2696 PetscCall(PetscBLASIntCast(c->A->rmap->n, &cm)); 2697 PetscCall(PetscBLASIntCast(c->A->cmap->n, &cn)); 2698 if (cm && cn && ck) PetscCallBLAS("BLASgemm", BLASgemm_("N", "N", &cm, &cn, &ck, &_DOne, av, &alda, recvbuf, &ck, &_DZero, cv, &clda)); 2699 PetscCall(MatDenseRestoreArrayRead(a->A, &av)); 2700 PetscCall(MatDenseRestoreArrayRead(b->A, &bv)); 2701 PetscCall(MatDenseRestoreArrayWrite(c->A, &cv)); 2702 PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY)); 2703 PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY)); 2704 PetscFunctionReturn(0); 2705 } 2706 2707 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C) { 2708 Mat_MatTransMultDense *abt; 2709 2710 PetscFunctionBegin; 2711 MatCheckProduct(C, 3); 2712 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 2713 abt = (Mat_MatTransMultDense *)C->product->data; 2714 switch (abt->alg) { 2715 case 1: PetscCall(MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(A, B, C)); break; 2716 default: PetscCall(MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(A, B, C)); break; 2717 } 2718 PetscFunctionReturn(0); 2719 } 2720 2721 PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(void *data) { 2722 Mat_MatMultDense *ab = (Mat_MatMultDense *)data; 2723 2724 PetscFunctionBegin; 2725 PetscCall(MatDestroy(&ab->Ce)); 2726 PetscCall(MatDestroy(&ab->Ae)); 2727 PetscCall(MatDestroy(&ab->Be)); 2728 PetscCall(PetscFree(ab)); 2729 PetscFunctionReturn(0); 2730 } 2731 2732 #if defined(PETSC_HAVE_ELEMENTAL) 2733 PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C) { 2734 Mat_MatMultDense *ab; 2735 2736 PetscFunctionBegin; 2737 MatCheckProduct(C, 3); 2738 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Missing product data"); 2739 ab = (Mat_MatMultDense *)C->product->data; 2740 PetscCall(MatConvert_MPIDense_Elemental(A, MATELEMENTAL, MAT_REUSE_MATRIX, &ab->Ae)); 2741 PetscCall(MatConvert_MPIDense_Elemental(B, MATELEMENTAL, MAT_REUSE_MATRIX, &ab->Be)); 2742 PetscCall(MatMatMultNumeric_Elemental(ab->Ae, ab->Be, ab->Ce)); 2743 PetscCall(MatConvert(ab->Ce, MATMPIDENSE, MAT_REUSE_MATRIX, &C)); 2744 PetscFunctionReturn(0); 2745 } 2746 2747 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C) { 2748 Mat Ae, Be, Ce; 2749 Mat_MatMultDense *ab; 2750 2751 PetscFunctionBegin; 2752 MatCheckProduct(C, 4); 2753 PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty"); 2754 /* check local size of A and B */ 2755 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 2756 SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend); 2757 } 2758 2759 /* create elemental matrices Ae and Be */ 2760 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &Ae)); 2761 PetscCall(MatSetSizes(Ae, PETSC_DECIDE, PETSC_DECIDE, A->rmap->N, A->cmap->N)); 2762 PetscCall(MatSetType(Ae, MATELEMENTAL)); 2763 PetscCall(MatSetUp(Ae)); 2764 PetscCall(MatSetOption(Ae, MAT_ROW_ORIENTED, PETSC_FALSE)); 2765 2766 PetscCall(MatCreate(PetscObjectComm((PetscObject)B), &Be)); 2767 PetscCall(MatSetSizes(Be, PETSC_DECIDE, PETSC_DECIDE, B->rmap->N, B->cmap->N)); 2768 PetscCall(MatSetType(Be, MATELEMENTAL)); 2769 PetscCall(MatSetUp(Be)); 2770 PetscCall(MatSetOption(Be, MAT_ROW_ORIENTED, PETSC_FALSE)); 2771 2772 /* compute symbolic Ce = Ae*Be */ 2773 PetscCall(MatCreate(PetscObjectComm((PetscObject)C), &Ce)); 2774 PetscCall(MatMatMultSymbolic_Elemental(Ae, Be, fill, Ce)); 2775 2776 /* setup C */ 2777 PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, PETSC_DECIDE, PETSC_DECIDE)); 2778 PetscCall(MatSetType(C, MATDENSE)); 2779 PetscCall(MatSetUp(C)); 2780 2781 /* create data structure for reuse Cdense */ 2782 PetscCall(PetscNew(&ab)); 2783 ab->Ae = Ae; 2784 ab->Be = Be; 2785 ab->Ce = Ce; 2786 2787 C->product->data = ab; 2788 C->product->destroy = MatDestroy_MatMatMult_MPIDense_MPIDense; 2789 C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense; 2790 PetscFunctionReturn(0); 2791 } 2792 #endif 2793 /* ----------------------------------------------- */ 2794 #if defined(PETSC_HAVE_ELEMENTAL) 2795 static PetscErrorCode MatProductSetFromOptions_MPIDense_AB(Mat C) { 2796 PetscFunctionBegin; 2797 C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIDense; 2798 C->ops->productsymbolic = MatProductSymbolic_AB; 2799 PetscFunctionReturn(0); 2800 } 2801 #endif 2802 2803 static PetscErrorCode MatProductSetFromOptions_MPIDense_AtB(Mat C) { 2804 Mat_Product *product = C->product; 2805 Mat A = product->A, B = product->B; 2806 2807 PetscFunctionBegin; 2808 if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) 2809 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend); 2810 C->ops->transposematmultsymbolic = MatTransposeMatMultSymbolic_MPIDense_MPIDense; 2811 C->ops->productsymbolic = MatProductSymbolic_AtB; 2812 PetscFunctionReturn(0); 2813 } 2814 2815 static PetscErrorCode MatProductSetFromOptions_MPIDense_ABt(Mat C) { 2816 Mat_Product *product = C->product; 2817 const char *algTypes[2] = {"allgatherv", "cyclic"}; 2818 PetscInt alg, nalg = 2; 2819 PetscBool flg = PETSC_FALSE; 2820 2821 PetscFunctionBegin; 2822 /* Set default algorithm */ 2823 alg = 0; /* default is allgatherv */ 2824 PetscCall(PetscStrcmp(product->alg, "default", &flg)); 2825 if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg])); 2826 2827 /* Get runtime option */ 2828 if (product->api_user) { 2829 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatMatTransposeMult", "Mat"); 2830 PetscCall(PetscOptionsEList("-matmattransmult_mpidense_mpidense_via", "Algorithmic approach", "MatMatTransposeMult", algTypes, nalg, algTypes[alg], &alg, &flg)); 2831 PetscOptionsEnd(); 2832 } else { 2833 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_ABt", "Mat"); 2834 PetscCall(PetscOptionsEList("-mat_product_algorithm", "Algorithmic approach", "MatProduct_ABt", algTypes, nalg, algTypes[alg], &alg, &flg)); 2835 PetscOptionsEnd(); 2836 } 2837 if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg])); 2838 2839 C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_MPIDense_MPIDense; 2840 C->ops->productsymbolic = MatProductSymbolic_ABt; 2841 PetscFunctionReturn(0); 2842 } 2843 2844 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense(Mat C) { 2845 Mat_Product *product = C->product; 2846 2847 PetscFunctionBegin; 2848 switch (product->type) { 2849 #if defined(PETSC_HAVE_ELEMENTAL) 2850 case MATPRODUCT_AB: PetscCall(MatProductSetFromOptions_MPIDense_AB(C)); break; 2851 #endif 2852 case MATPRODUCT_AtB: PetscCall(MatProductSetFromOptions_MPIDense_AtB(C)); break; 2853 case MATPRODUCT_ABt: PetscCall(MatProductSetFromOptions_MPIDense_ABt(C)); break; 2854 default: break; 2855 } 2856 PetscFunctionReturn(0); 2857 } 2858