1 /* 2 Basic functions for basic parallel dense matrices. 3 Portions of this code are under: 4 Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. 5 */ 6 7 #include <../src/mat/impls/dense/mpi/mpidense.h> /*I "petscmat.h" I*/ 8 #include <../src/mat/impls/aij/mpi/mpiaij.h> 9 #include <petscblaslapack.h> 10 11 /*@ 12 MatDenseGetLocalMatrix - For a `MATMPIDENSE` or `MATSEQDENSE` matrix returns the sequential 13 matrix that represents the operator. For sequential matrices it returns itself. 14 15 Input Parameter: 16 . A - the sequential or MPI `MATDENSE` matrix 17 18 Output Parameter: 19 . B - the inner matrix 20 21 Level: intermediate 22 23 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MATMPIDENSE`, `MATSEQDENSE` 24 @*/ 25 PetscErrorCode MatDenseGetLocalMatrix(Mat A, Mat *B) 26 { 27 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 28 PetscBool flg; 29 30 PetscFunctionBegin; 31 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 32 PetscAssertPointer(B, 2); 33 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIDENSE, &flg)); 34 if (flg) *B = mat->A; 35 else { 36 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQDENSE, &flg)); 37 PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Not for matrix type %s", ((PetscObject)A)->type_name); 38 *B = A; 39 } 40 PetscFunctionReturn(PETSC_SUCCESS); 41 } 42 43 static PetscErrorCode MatCopy_MPIDense(Mat A, Mat B, MatStructure s) 44 { 45 Mat_MPIDense *Amat = (Mat_MPIDense *)A->data; 46 Mat_MPIDense *Bmat = (Mat_MPIDense *)B->data; 47 48 PetscFunctionBegin; 49 PetscCall(MatCopy(Amat->A, Bmat->A, s)); 50 PetscFunctionReturn(PETSC_SUCCESS); 51 } 52 53 PetscErrorCode MatShift_MPIDense(Mat A, PetscScalar alpha) 54 { 55 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 56 PetscInt j, lda, rstart = A->rmap->rstart, rend = A->rmap->rend, rend2; 57 PetscScalar *v; 58 59 PetscFunctionBegin; 60 PetscCall(MatDenseGetArray(mat->A, &v)); 61 PetscCall(MatDenseGetLDA(mat->A, &lda)); 62 rend2 = PetscMin(rend, A->cmap->N); 63 if (rend2 > rstart) { 64 for (j = rstart; j < rend2; j++) v[j - rstart + j * lda] += alpha; 65 PetscCall(PetscLogFlops(rend2 - rstart)); 66 } 67 PetscCall(MatDenseRestoreArray(mat->A, &v)); 68 PetscFunctionReturn(PETSC_SUCCESS); 69 } 70 71 static PetscErrorCode MatGetRow_MPIDense(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 72 { 73 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 74 PetscInt lrow, rstart = A->rmap->rstart, rend = A->rmap->rend; 75 76 PetscFunctionBegin; 77 PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "only local rows"); 78 lrow = row - rstart; 79 PetscCall(MatGetRow(mat->A, lrow, nz, (const PetscInt **)idx, (const PetscScalar **)v)); 80 PetscFunctionReturn(PETSC_SUCCESS); 81 } 82 83 static PetscErrorCode MatRestoreRow_MPIDense(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 84 { 85 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 86 PetscInt lrow, rstart = A->rmap->rstart, rend = A->rmap->rend; 87 88 PetscFunctionBegin; 89 PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "only local rows"); 90 lrow = row - rstart; 91 PetscCall(MatRestoreRow(mat->A, lrow, nz, (const PetscInt **)idx, (const PetscScalar **)v)); 92 PetscFunctionReturn(PETSC_SUCCESS); 93 } 94 95 static PetscErrorCode MatGetDiagonalBlock_MPIDense(Mat A, Mat *a) 96 { 97 Mat_MPIDense *mdn = (Mat_MPIDense *)A->data; 98 PetscInt m = A->rmap->n, rstart = A->rmap->rstart; 99 PetscScalar *array; 100 MPI_Comm comm; 101 PetscBool flg; 102 Mat B; 103 104 PetscFunctionBegin; 105 PetscCall(MatHasCongruentLayouts(A, &flg)); 106 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only square matrices supported."); 107 PetscCall(PetscObjectQuery((PetscObject)A, "DiagonalBlock", (PetscObject *)&B)); 108 if (!B) { /* This should use MatDenseGetSubMatrix (not create), but we would need a call like MatRestoreDiagonalBlock */ 109 #if PetscDefined(HAVE_CUDA) 110 PetscCall(PetscObjectTypeCompare((PetscObject)mdn->A, MATSEQDENSECUDA, &flg)); 111 PetscCheck(!flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature", MATSEQDENSECUDA); 112 #elif PetscDefined(HAVE_HIP) 113 PetscCall(PetscObjectTypeCompare((PetscObject)mdn->A, MATSEQDENSEHIP, &flg)); 114 PetscCheck(!flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature", MATSEQDENSEHIP); 115 #endif 116 PetscCall(PetscObjectGetComm((PetscObject)(mdn->A), &comm)); 117 PetscCall(MatCreate(comm, &B)); 118 PetscCall(MatSetSizes(B, m, m, m, m)); 119 PetscCall(MatSetType(B, ((PetscObject)mdn->A)->type_name)); 120 PetscCall(MatDenseGetArrayRead(mdn->A, (const PetscScalar **)&array)); 121 PetscCall(MatSeqDenseSetPreallocation(B, array + m * rstart)); 122 PetscCall(MatDenseRestoreArrayRead(mdn->A, (const PetscScalar **)&array)); 123 PetscCall(PetscObjectCompose((PetscObject)A, "DiagonalBlock", (PetscObject)B)); 124 *a = B; 125 PetscCall(MatDestroy(&B)); 126 } else *a = B; 127 PetscFunctionReturn(PETSC_SUCCESS); 128 } 129 130 static PetscErrorCode MatSetValues_MPIDense(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], const PetscScalar v[], InsertMode addv) 131 { 132 Mat_MPIDense *A = (Mat_MPIDense *)mat->data; 133 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend, row; 134 PetscBool roworiented = A->roworiented; 135 136 PetscFunctionBegin; 137 for (i = 0; i < m; i++) { 138 if (idxm[i] < 0) continue; 139 PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large"); 140 if (idxm[i] >= rstart && idxm[i] < rend) { 141 row = idxm[i] - rstart; 142 if (roworiented) { 143 PetscCall(MatSetValues(A->A, 1, &row, n, idxn, v + i * n, addv)); 144 } else { 145 for (j = 0; j < n; j++) { 146 if (idxn[j] < 0) continue; 147 PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large"); 148 PetscCall(MatSetValues(A->A, 1, &row, 1, &idxn[j], v + i + j * m, addv)); 149 } 150 } 151 } else if (!A->donotstash) { 152 mat->assembled = PETSC_FALSE; 153 if (roworiented) { 154 PetscCall(MatStashValuesRow_Private(&mat->stash, idxm[i], n, idxn, v + i * n, PETSC_FALSE)); 155 } else { 156 PetscCall(MatStashValuesCol_Private(&mat->stash, idxm[i], n, idxn, v + i, m, PETSC_FALSE)); 157 } 158 } 159 } 160 PetscFunctionReturn(PETSC_SUCCESS); 161 } 162 163 static PetscErrorCode MatGetValues_MPIDense(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[]) 164 { 165 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 166 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend, row; 167 168 PetscFunctionBegin; 169 for (i = 0; i < m; i++) { 170 if (idxm[i] < 0) continue; /* negative row */ 171 PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large"); 172 if (idxm[i] >= rstart && idxm[i] < rend) { 173 row = idxm[i] - rstart; 174 for (j = 0; j < n; j++) { 175 if (idxn[j] < 0) continue; /* negative column */ 176 PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large"); 177 PetscCall(MatGetValues(mdn->A, 1, &row, 1, &idxn[j], v + i * n + j)); 178 } 179 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported"); 180 } 181 PetscFunctionReturn(PETSC_SUCCESS); 182 } 183 184 static PetscErrorCode MatDenseGetLDA_MPIDense(Mat A, PetscInt *lda) 185 { 186 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 187 188 PetscFunctionBegin; 189 PetscCall(MatDenseGetLDA(a->A, lda)); 190 PetscFunctionReturn(PETSC_SUCCESS); 191 } 192 193 static PetscErrorCode MatDenseSetLDA_MPIDense(Mat A, PetscInt lda) 194 { 195 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 196 MatType mtype = MATSEQDENSE; 197 198 PetscFunctionBegin; 199 if (!a->A) { 200 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 201 PetscCall(PetscLayoutSetUp(A->rmap)); 202 PetscCall(PetscLayoutSetUp(A->cmap)); 203 PetscCall(MatCreate(PETSC_COMM_SELF, &a->A)); 204 PetscCall(MatSetSizes(a->A, A->rmap->n, A->cmap->N, A->rmap->n, A->cmap->N)); 205 #if PetscDefined(HAVE_CUDA) 206 PetscBool iscuda; 207 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIDENSECUDA, &iscuda)); 208 if (iscuda) mtype = MATSEQDENSECUDA; 209 #elif PetscDefined(HAVE_HIP) 210 PetscBool iship; 211 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIDENSEHIP, &iship)); 212 if (iship) mtype = MATSEQDENSEHIP; 213 #endif 214 PetscCall(MatSetType(a->A, mtype)); 215 } 216 PetscCall(MatDenseSetLDA(a->A, lda)); 217 PetscFunctionReturn(PETSC_SUCCESS); 218 } 219 220 static PetscErrorCode MatDenseGetArray_MPIDense(Mat A, PetscScalar **array) 221 { 222 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 223 224 PetscFunctionBegin; 225 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 226 PetscCall(MatDenseGetArray(a->A, array)); 227 PetscFunctionReturn(PETSC_SUCCESS); 228 } 229 230 static PetscErrorCode MatDenseGetArrayRead_MPIDense(Mat A, PetscScalar **array) 231 { 232 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 233 234 PetscFunctionBegin; 235 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 236 PetscCall(MatDenseGetArrayRead(a->A, (const PetscScalar **)array)); 237 PetscFunctionReturn(PETSC_SUCCESS); 238 } 239 240 static PetscErrorCode MatDenseGetArrayWrite_MPIDense(Mat A, PetscScalar **array) 241 { 242 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 243 244 PetscFunctionBegin; 245 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 246 PetscCall(MatDenseGetArrayWrite(a->A, array)); 247 PetscFunctionReturn(PETSC_SUCCESS); 248 } 249 250 static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A, const PetscScalar *array) 251 { 252 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 253 254 PetscFunctionBegin; 255 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 256 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 257 PetscCall(MatDensePlaceArray(a->A, array)); 258 PetscFunctionReturn(PETSC_SUCCESS); 259 } 260 261 static PetscErrorCode MatDenseResetArray_MPIDense(Mat A) 262 { 263 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 264 265 PetscFunctionBegin; 266 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 267 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 268 PetscCall(MatDenseResetArray(a->A)); 269 PetscFunctionReturn(PETSC_SUCCESS); 270 } 271 272 static PetscErrorCode MatDenseReplaceArray_MPIDense(Mat A, const PetscScalar *array) 273 { 274 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 275 276 PetscFunctionBegin; 277 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 278 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 279 PetscCall(MatDenseReplaceArray(a->A, array)); 280 PetscFunctionReturn(PETSC_SUCCESS); 281 } 282 283 static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A, IS isrow, IS iscol, MatReuse scall, Mat *B) 284 { 285 Mat_MPIDense *mat = (Mat_MPIDense *)A->data, *newmatd; 286 PetscInt lda, i, j, rstart, rend, nrows, ncols, Ncols, nlrows, nlcols; 287 const PetscInt *irow, *icol; 288 const PetscScalar *v; 289 PetscScalar *bv; 290 Mat newmat; 291 IS iscol_local; 292 MPI_Comm comm_is, comm_mat; 293 294 PetscFunctionBegin; 295 PetscCall(PetscObjectGetComm((PetscObject)A, &comm_mat)); 296 PetscCall(PetscObjectGetComm((PetscObject)iscol, &comm_is)); 297 PetscCheck(comm_mat == comm_is, PETSC_COMM_SELF, PETSC_ERR_ARG_NOTSAMECOMM, "IS communicator must match matrix communicator"); 298 299 PetscCall(ISAllGather(iscol, &iscol_local)); 300 PetscCall(ISGetIndices(isrow, &irow)); 301 PetscCall(ISGetIndices(iscol_local, &icol)); 302 PetscCall(ISGetLocalSize(isrow, &nrows)); 303 PetscCall(ISGetLocalSize(iscol, &ncols)); 304 PetscCall(ISGetSize(iscol, &Ncols)); /* global number of columns, size of iscol_local */ 305 306 /* No parallel redistribution currently supported! Should really check each index set 307 to confirm that it is OK. ... Currently supports only submatrix same partitioning as 308 original matrix! */ 309 310 PetscCall(MatGetLocalSize(A, &nlrows, &nlcols)); 311 PetscCall(MatGetOwnershipRange(A, &rstart, &rend)); 312 313 /* Check submatrix call */ 314 if (scall == MAT_REUSE_MATRIX) { 315 /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */ 316 /* Really need to test rows and column sizes! */ 317 newmat = *B; 318 } else { 319 /* Create and fill new matrix */ 320 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &newmat)); 321 PetscCall(MatSetSizes(newmat, nrows, ncols, PETSC_DECIDE, Ncols)); 322 PetscCall(MatSetType(newmat, ((PetscObject)A)->type_name)); 323 PetscCall(MatMPIDenseSetPreallocation(newmat, NULL)); 324 } 325 326 /* Now extract the data pointers and do the copy, column at a time */ 327 newmatd = (Mat_MPIDense *)newmat->data; 328 PetscCall(MatDenseGetArray(newmatd->A, &bv)); 329 PetscCall(MatDenseGetArrayRead(mat->A, &v)); 330 PetscCall(MatDenseGetLDA(mat->A, &lda)); 331 for (i = 0; i < Ncols; i++) { 332 const PetscScalar *av = v + lda * icol[i]; 333 for (j = 0; j < nrows; j++) *bv++ = av[irow[j] - rstart]; 334 } 335 PetscCall(MatDenseRestoreArrayRead(mat->A, &v)); 336 PetscCall(MatDenseRestoreArray(newmatd->A, &bv)); 337 338 /* Assemble the matrices so that the correct flags are set */ 339 PetscCall(MatAssemblyBegin(newmat, MAT_FINAL_ASSEMBLY)); 340 PetscCall(MatAssemblyEnd(newmat, MAT_FINAL_ASSEMBLY)); 341 342 /* Free work space */ 343 PetscCall(ISRestoreIndices(isrow, &irow)); 344 PetscCall(ISRestoreIndices(iscol_local, &icol)); 345 PetscCall(ISDestroy(&iscol_local)); 346 *B = newmat; 347 PetscFunctionReturn(PETSC_SUCCESS); 348 } 349 350 static PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A, PetscScalar **array) 351 { 352 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 353 354 PetscFunctionBegin; 355 PetscCall(MatDenseRestoreArray(a->A, array)); 356 PetscFunctionReturn(PETSC_SUCCESS); 357 } 358 359 static PetscErrorCode MatDenseRestoreArrayRead_MPIDense(Mat A, PetscScalar **array) 360 { 361 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 362 363 PetscFunctionBegin; 364 PetscCall(MatDenseRestoreArrayRead(a->A, (const PetscScalar **)array)); 365 PetscFunctionReturn(PETSC_SUCCESS); 366 } 367 368 static PetscErrorCode MatDenseRestoreArrayWrite_MPIDense(Mat A, PetscScalar **array) 369 { 370 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 371 372 PetscFunctionBegin; 373 PetscCall(MatDenseRestoreArrayWrite(a->A, array)); 374 PetscFunctionReturn(PETSC_SUCCESS); 375 } 376 377 static PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat, MatAssemblyType mode) 378 { 379 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 380 PetscInt nstash, reallocs; 381 382 PetscFunctionBegin; 383 if (mdn->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS); 384 385 PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range)); 386 PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs)); 387 PetscCall(PetscInfo(mdn->A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 388 PetscFunctionReturn(PETSC_SUCCESS); 389 } 390 391 static PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat, MatAssemblyType mode) 392 { 393 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 394 PetscInt i, *row, *col, flg, j, rstart, ncols; 395 PetscMPIInt n; 396 PetscScalar *val; 397 398 PetscFunctionBegin; 399 if (!mdn->donotstash && !mat->nooffprocentries) { 400 /* wait on receives */ 401 while (1) { 402 PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg)); 403 if (!flg) break; 404 405 for (i = 0; i < n;) { 406 /* Now identify the consecutive vals belonging to the same row */ 407 for (j = i, rstart = row[j]; j < n; j++) { 408 if (row[j] != rstart) break; 409 } 410 if (j < n) ncols = j - i; 411 else ncols = n - i; 412 /* Now assemble all these values with a single function call */ 413 PetscCall(MatSetValues_MPIDense(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode)); 414 i = j; 415 } 416 } 417 PetscCall(MatStashScatterEnd_Private(&mat->stash)); 418 } 419 420 PetscCall(MatAssemblyBegin(mdn->A, mode)); 421 PetscCall(MatAssemblyEnd(mdn->A, mode)); 422 PetscFunctionReturn(PETSC_SUCCESS); 423 } 424 425 static PetscErrorCode MatZeroEntries_MPIDense(Mat A) 426 { 427 Mat_MPIDense *l = (Mat_MPIDense *)A->data; 428 429 PetscFunctionBegin; 430 PetscCall(MatZeroEntries(l->A)); 431 PetscFunctionReturn(PETSC_SUCCESS); 432 } 433 434 static PetscErrorCode MatZeroRows_MPIDense(Mat A, PetscInt n, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 435 { 436 Mat_MPIDense *l = (Mat_MPIDense *)A->data; 437 PetscInt i, len, *lrows; 438 439 PetscFunctionBegin; 440 /* get locally owned rows */ 441 PetscCall(PetscLayoutMapLocal(A->rmap, n, rows, &len, &lrows, NULL)); 442 /* fix right hand side if needed */ 443 if (x && b) { 444 const PetscScalar *xx; 445 PetscScalar *bb; 446 447 PetscCall(VecGetArrayRead(x, &xx)); 448 PetscCall(VecGetArrayWrite(b, &bb)); 449 for (i = 0; i < len; ++i) bb[lrows[i]] = diag * xx[lrows[i]]; 450 PetscCall(VecRestoreArrayRead(x, &xx)); 451 PetscCall(VecRestoreArrayWrite(b, &bb)); 452 } 453 PetscCall(MatZeroRows(l->A, len, lrows, 0.0, NULL, NULL)); 454 if (diag != 0.0) { 455 Vec d; 456 457 PetscCall(MatCreateVecs(A, NULL, &d)); 458 PetscCall(VecSet(d, diag)); 459 PetscCall(MatDiagonalSet(A, d, INSERT_VALUES)); 460 PetscCall(VecDestroy(&d)); 461 } 462 PetscCall(PetscFree(lrows)); 463 PetscFunctionReturn(PETSC_SUCCESS); 464 } 465 466 static PetscErrorCode MatMult_MPIDense(Mat mat, Vec xx, Vec yy) 467 { 468 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 469 const PetscScalar *ax; 470 PetscScalar *ay; 471 PetscMemType axmtype, aymtype; 472 473 PetscFunctionBegin; 474 if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat)); 475 PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype)); 476 PetscCall(VecGetArrayAndMemType(mdn->lvec, &ay, &aymtype)); 477 PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE)); 478 PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE)); 479 PetscCall(VecRestoreArrayAndMemType(mdn->lvec, &ay)); 480 PetscCall(VecRestoreArrayReadAndMemType(xx, &ax)); 481 PetscCall((*mdn->A->ops->mult)(mdn->A, mdn->lvec, yy)); 482 PetscFunctionReturn(PETSC_SUCCESS); 483 } 484 485 static PetscErrorCode MatMultAdd_MPIDense(Mat mat, Vec xx, Vec yy, Vec zz) 486 { 487 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 488 const PetscScalar *ax; 489 PetscScalar *ay; 490 PetscMemType axmtype, aymtype; 491 492 PetscFunctionBegin; 493 if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat)); 494 PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype)); 495 PetscCall(VecGetArrayAndMemType(mdn->lvec, &ay, &aymtype)); 496 PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE)); 497 PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE)); 498 PetscCall(VecRestoreArrayAndMemType(mdn->lvec, &ay)); 499 PetscCall(VecRestoreArrayReadAndMemType(xx, &ax)); 500 PetscCall((*mdn->A->ops->multadd)(mdn->A, mdn->lvec, yy, zz)); 501 PetscFunctionReturn(PETSC_SUCCESS); 502 } 503 504 static PetscErrorCode MatMultTransposeKernel_MPIDense(Mat A, Vec xx, Vec yy, PetscBool herm) 505 { 506 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 507 const PetscScalar *ax; 508 PetscScalar *ay; 509 PetscMemType axmtype, aymtype; 510 PetscFunctionBegin; 511 if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A)); 512 PetscCall(VecSet(yy, 0.0)); 513 if (herm) PetscCall((*a->A->ops->multhermitiantranspose)(a->A, xx, a->lvec)); 514 else PetscCall((*a->A->ops->multtranspose)(a->A, xx, a->lvec)); 515 PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype)); 516 PetscCall(VecGetArrayAndMemType(yy, &ay, &aymtype)); 517 PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM)); 518 PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM)); 519 PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax)); 520 PetscCall(VecRestoreArrayAndMemType(yy, &ay)); 521 PetscFunctionReturn(PETSC_SUCCESS); 522 } 523 524 static PetscErrorCode MatMultTransposeAddKernel_MPIDense(Mat A, Vec xx, Vec yy, Vec zz, PetscBool herm) 525 { 526 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 527 const PetscScalar *ax; 528 PetscScalar *ay; 529 PetscMemType axmtype, aymtype; 530 531 PetscFunctionBegin; 532 if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A)); 533 PetscCall(VecCopy(yy, zz)); 534 if (herm) PetscCall((*a->A->ops->multhermitiantranspose)(a->A, xx, a->lvec)); 535 else PetscCall((*a->A->ops->multtranspose)(a->A, xx, a->lvec)); 536 PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype)); 537 PetscCall(VecGetArrayAndMemType(zz, &ay, &aymtype)); 538 PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM)); 539 PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM)); 540 PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax)); 541 PetscCall(VecRestoreArrayAndMemType(zz, &ay)); 542 PetscFunctionReturn(PETSC_SUCCESS); 543 } 544 545 static PetscErrorCode MatMultTranspose_MPIDense(Mat A, Vec xx, Vec yy) 546 { 547 PetscFunctionBegin; 548 PetscCall(MatMultTransposeKernel_MPIDense(A, xx, yy, PETSC_FALSE)); 549 PetscFunctionReturn(PETSC_SUCCESS); 550 } 551 552 static PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A, Vec xx, Vec yy, Vec zz) 553 { 554 PetscFunctionBegin; 555 PetscCall(MatMultTransposeAddKernel_MPIDense(A, xx, yy, zz, PETSC_FALSE)); 556 PetscFunctionReturn(PETSC_SUCCESS); 557 } 558 559 static PetscErrorCode MatMultHermitianTranspose_MPIDense(Mat A, Vec xx, Vec yy) 560 { 561 PetscFunctionBegin; 562 PetscCall(MatMultTransposeKernel_MPIDense(A, xx, yy, PETSC_TRUE)); 563 PetscFunctionReturn(PETSC_SUCCESS); 564 } 565 566 static PetscErrorCode MatMultHermitianTransposeAdd_MPIDense(Mat A, Vec xx, Vec yy, Vec zz) 567 { 568 PetscFunctionBegin; 569 PetscCall(MatMultTransposeAddKernel_MPIDense(A, xx, yy, zz, PETSC_TRUE)); 570 PetscFunctionReturn(PETSC_SUCCESS); 571 } 572 573 PetscErrorCode MatGetDiagonal_MPIDense(Mat A, Vec v) 574 { 575 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 576 PetscInt lda, len, i, nl, ng, m = A->rmap->n, radd; 577 PetscScalar *x; 578 const PetscScalar *av; 579 580 PetscFunctionBegin; 581 PetscCall(VecGetArray(v, &x)); 582 PetscCall(VecGetSize(v, &ng)); 583 PetscCheck(ng == A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming mat and vec"); 584 PetscCall(VecGetLocalSize(v, &nl)); 585 len = PetscMin(a->A->rmap->n, a->A->cmap->n); 586 radd = A->rmap->rstart * m; 587 PetscCall(MatDenseGetArrayRead(a->A, &av)); 588 PetscCall(MatDenseGetLDA(a->A, &lda)); 589 for (i = 0; i < len; i++) x[i] = av[radd + i * lda + i]; 590 PetscCall(MatDenseRestoreArrayRead(a->A, &av)); 591 PetscCall(PetscArrayzero(x + i, nl - i)); 592 PetscCall(VecRestoreArray(v, &x)); 593 PetscFunctionReturn(PETSC_SUCCESS); 594 } 595 596 static PetscErrorCode MatDestroy_MPIDense(Mat mat) 597 { 598 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 599 600 PetscFunctionBegin; 601 PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N)); 602 PetscCall(MatStashDestroy_Private(&mat->stash)); 603 PetscCheck(!mdn->vecinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 604 PetscCheck(!mdn->matinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 605 PetscCall(MatDestroy(&mdn->A)); 606 PetscCall(VecDestroy(&mdn->lvec)); 607 PetscCall(PetscSFDestroy(&mdn->Mvctx)); 608 PetscCall(VecDestroy(&mdn->cvec)); 609 PetscCall(MatDestroy(&mdn->cmat)); 610 611 PetscCall(PetscFree(mat->data)); 612 PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL)); 613 614 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetLDA_C", NULL)); 615 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseSetLDA_C", NULL)); 616 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArray_C", NULL)); 617 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArray_C", NULL)); 618 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayRead_C", NULL)); 619 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayRead_C", NULL)); 620 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayWrite_C", NULL)); 621 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayWrite_C", NULL)); 622 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDensePlaceArray_C", NULL)); 623 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseResetArray_C", NULL)); 624 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseReplaceArray_C", NULL)); 625 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", NULL)); 626 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpiaij_C", NULL)); 627 #if defined(PETSC_HAVE_ELEMENTAL) 628 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_elemental_C", NULL)); 629 #endif 630 #if defined(PETSC_HAVE_SCALAPACK) 631 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_scalapack_C", NULL)); 632 #endif 633 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIDenseSetPreallocation_C", NULL)); 634 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidense_C", NULL)); 635 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaij_C", NULL)); 636 #if defined(PETSC_HAVE_CUDA) 637 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidense_C", NULL)); 638 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijcusparse_C", NULL)); 639 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensecuda_C", NULL)); 640 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidensecuda_mpidense_C", NULL)); 641 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidensecuda_C", NULL)); 642 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C", NULL)); 643 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensecuda_mpiaij_C", NULL)); 644 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C", NULL)); 645 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArray_C", NULL)); 646 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArrayRead_C", NULL)); 647 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArrayWrite_C", NULL)); 648 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArray_C", NULL)); 649 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArrayRead_C", NULL)); 650 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArrayWrite_C", NULL)); 651 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAPlaceArray_C", NULL)); 652 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAResetArray_C", NULL)); 653 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAReplaceArray_C", NULL)); 654 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDASetPreallocation_C", NULL)); 655 #endif 656 #if defined(PETSC_HAVE_HIP) 657 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidense_C", NULL)); 658 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijhipsparse_C", NULL)); 659 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensehip_C", NULL)); 660 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidensehip_mpidense_C", NULL)); 661 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidensehip_C", NULL)); 662 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidensehip_C", NULL)); 663 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensehip_mpiaij_C", NULL)); 664 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensehip_mpiaijhipsparse_C", NULL)); 665 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArray_C", NULL)); 666 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArrayRead_C", NULL)); 667 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArrayWrite_C", NULL)); 668 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArray_C", NULL)); 669 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArrayRead_C", NULL)); 670 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArrayWrite_C", NULL)); 671 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPPlaceArray_C", NULL)); 672 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPResetArray_C", NULL)); 673 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPReplaceArray_C", NULL)); 674 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPSetPreallocation_C", NULL)); 675 #endif 676 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumn_C", NULL)); 677 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumn_C", NULL)); 678 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", NULL)); 679 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", NULL)); 680 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", NULL)); 681 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", NULL)); 682 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", NULL)); 683 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", NULL)); 684 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetSubMatrix_C", NULL)); 685 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreSubMatrix_C", NULL)); 686 687 PetscCall(PetscObjectCompose((PetscObject)mat, "DiagonalBlock", NULL)); 688 PetscFunctionReturn(PETSC_SUCCESS); 689 } 690 691 #include <petscdraw.h> 692 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat, PetscViewer viewer) 693 { 694 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 695 PetscMPIInt rank; 696 PetscViewerType vtype; 697 PetscBool iascii, isdraw; 698 PetscViewer sviewer; 699 PetscViewerFormat format; 700 701 PetscFunctionBegin; 702 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 703 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 704 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 705 if (iascii) { 706 PetscCall(PetscViewerGetType(viewer, &vtype)); 707 PetscCall(PetscViewerGetFormat(viewer, &format)); 708 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 709 MatInfo info; 710 PetscCall(MatGetInfo(mat, MAT_LOCAL, &info)); 711 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 712 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %" PetscInt_FMT " \n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, 713 (PetscInt)info.memory)); 714 PetscCall(PetscViewerFlush(viewer)); 715 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 716 if (mdn->Mvctx) PetscCall(PetscSFView(mdn->Mvctx, viewer)); 717 PetscFunctionReturn(PETSC_SUCCESS); 718 } else if (format == PETSC_VIEWER_ASCII_INFO) { 719 PetscFunctionReturn(PETSC_SUCCESS); 720 } 721 } else if (isdraw) { 722 PetscDraw draw; 723 PetscBool isnull; 724 725 PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw)); 726 PetscCall(PetscDrawIsNull(draw, &isnull)); 727 if (isnull) PetscFunctionReturn(PETSC_SUCCESS); 728 } 729 730 { 731 /* assemble the entire matrix onto first processor. */ 732 Mat A; 733 PetscInt M = mat->rmap->N, N = mat->cmap->N, m, row, i, nz; 734 PetscInt *cols; 735 PetscScalar *vals; 736 737 PetscCall(MatCreate(PetscObjectComm((PetscObject)mat), &A)); 738 if (rank == 0) { 739 PetscCall(MatSetSizes(A, M, N, M, N)); 740 } else { 741 PetscCall(MatSetSizes(A, 0, 0, M, N)); 742 } 743 /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */ 744 PetscCall(MatSetType(A, MATMPIDENSE)); 745 PetscCall(MatMPIDenseSetPreallocation(A, NULL)); 746 747 /* Copy the matrix ... This isn't the most efficient means, 748 but it's quick for now */ 749 A->insertmode = INSERT_VALUES; 750 751 row = mat->rmap->rstart; 752 m = mdn->A->rmap->n; 753 for (i = 0; i < m; i++) { 754 PetscCall(MatGetRow_MPIDense(mat, row, &nz, &cols, &vals)); 755 PetscCall(MatSetValues_MPIDense(A, 1, &row, nz, cols, vals, INSERT_VALUES)); 756 PetscCall(MatRestoreRow_MPIDense(mat, row, &nz, &cols, &vals)); 757 row++; 758 } 759 760 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 761 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 762 PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 763 if (rank == 0) { 764 PetscCall(PetscObjectSetName((PetscObject)((Mat_MPIDense *)(A->data))->A, ((PetscObject)mat)->name)); 765 PetscCall(MatView_SeqDense(((Mat_MPIDense *)(A->data))->A, sviewer)); 766 } 767 PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 768 PetscCall(MatDestroy(&A)); 769 } 770 PetscFunctionReturn(PETSC_SUCCESS); 771 } 772 773 static PetscErrorCode MatView_MPIDense(Mat mat, PetscViewer viewer) 774 { 775 PetscBool iascii, isbinary, isdraw, issocket; 776 777 PetscFunctionBegin; 778 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 779 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 780 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket)); 781 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 782 783 if (iascii || issocket || isdraw) { 784 PetscCall(MatView_MPIDense_ASCIIorDraworSocket(mat, viewer)); 785 } else if (isbinary) PetscCall(MatView_Dense_Binary(mat, viewer)); 786 PetscFunctionReturn(PETSC_SUCCESS); 787 } 788 789 static PetscErrorCode MatGetInfo_MPIDense(Mat A, MatInfoType flag, MatInfo *info) 790 { 791 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 792 Mat mdn = mat->A; 793 PetscLogDouble isend[5], irecv[5]; 794 795 PetscFunctionBegin; 796 info->block_size = 1.0; 797 798 PetscCall(MatGetInfo(mdn, MAT_LOCAL, info)); 799 800 isend[0] = info->nz_used; 801 isend[1] = info->nz_allocated; 802 isend[2] = info->nz_unneeded; 803 isend[3] = info->memory; 804 isend[4] = info->mallocs; 805 if (flag == MAT_LOCAL) { 806 info->nz_used = isend[0]; 807 info->nz_allocated = isend[1]; 808 info->nz_unneeded = isend[2]; 809 info->memory = isend[3]; 810 info->mallocs = isend[4]; 811 } else if (flag == MAT_GLOBAL_MAX) { 812 PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)A))); 813 814 info->nz_used = irecv[0]; 815 info->nz_allocated = irecv[1]; 816 info->nz_unneeded = irecv[2]; 817 info->memory = irecv[3]; 818 info->mallocs = irecv[4]; 819 } else if (flag == MAT_GLOBAL_SUM) { 820 PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)A))); 821 822 info->nz_used = irecv[0]; 823 info->nz_allocated = irecv[1]; 824 info->nz_unneeded = irecv[2]; 825 info->memory = irecv[3]; 826 info->mallocs = irecv[4]; 827 } 828 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 829 info->fill_ratio_needed = 0; 830 info->factor_mallocs = 0; 831 PetscFunctionReturn(PETSC_SUCCESS); 832 } 833 834 static PetscErrorCode MatSetOption_MPIDense(Mat A, MatOption op, PetscBool flg) 835 { 836 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 837 838 PetscFunctionBegin; 839 switch (op) { 840 case MAT_NEW_NONZERO_LOCATIONS: 841 case MAT_NEW_NONZERO_LOCATION_ERR: 842 case MAT_NEW_NONZERO_ALLOCATION_ERR: 843 MatCheckPreallocated(A, 1); 844 PetscCall(MatSetOption(a->A, op, flg)); 845 break; 846 case MAT_ROW_ORIENTED: 847 MatCheckPreallocated(A, 1); 848 a->roworiented = flg; 849 PetscCall(MatSetOption(a->A, op, flg)); 850 break; 851 case MAT_FORCE_DIAGONAL_ENTRIES: 852 case MAT_KEEP_NONZERO_PATTERN: 853 case MAT_USE_HASH_TABLE: 854 case MAT_SORTED_FULL: 855 PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op])); 856 break; 857 case MAT_IGNORE_OFF_PROC_ENTRIES: 858 a->donotstash = flg; 859 break; 860 case MAT_SYMMETRIC: 861 case MAT_STRUCTURALLY_SYMMETRIC: 862 case MAT_HERMITIAN: 863 case MAT_SYMMETRY_ETERNAL: 864 case MAT_STRUCTURAL_SYMMETRY_ETERNAL: 865 case MAT_SPD: 866 case MAT_IGNORE_LOWER_TRIANGULAR: 867 case MAT_IGNORE_ZERO_ENTRIES: 868 case MAT_SPD_ETERNAL: 869 /* if the diagonal matrix is square it inherits some of the properties above */ 870 PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op])); 871 break; 872 default: 873 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %s", MatOptions[op]); 874 } 875 PetscFunctionReturn(PETSC_SUCCESS); 876 } 877 878 static PetscErrorCode MatDiagonalScale_MPIDense(Mat A, Vec ll, Vec rr) 879 { 880 Mat_MPIDense *mdn = (Mat_MPIDense *)A->data; 881 const PetscScalar *l; 882 PetscScalar x, *v, *vv, *r; 883 PetscInt i, j, s2a, s3a, s2, s3, m = mdn->A->rmap->n, n = mdn->A->cmap->n, lda; 884 885 PetscFunctionBegin; 886 PetscCall(MatDenseGetArray(mdn->A, &vv)); 887 PetscCall(MatDenseGetLDA(mdn->A, &lda)); 888 PetscCall(MatGetLocalSize(A, &s2, &s3)); 889 if (ll) { 890 PetscCall(VecGetLocalSize(ll, &s2a)); 891 PetscCheck(s2a == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Left scaling vector non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT, s2a, s2); 892 PetscCall(VecGetArrayRead(ll, &l)); 893 for (i = 0; i < m; i++) { 894 x = l[i]; 895 v = vv + i; 896 for (j = 0; j < n; j++) { 897 (*v) *= x; 898 v += lda; 899 } 900 } 901 PetscCall(VecRestoreArrayRead(ll, &l)); 902 PetscCall(PetscLogFlops(1.0 * n * m)); 903 } 904 if (rr) { 905 const PetscScalar *ar; 906 907 PetscCall(VecGetLocalSize(rr, &s3a)); 908 PetscCheck(s3a == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Right scaling vec non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT ".", s3a, s3); 909 PetscCall(VecGetArrayRead(rr, &ar)); 910 if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A)); 911 PetscCall(VecGetArray(mdn->lvec, &r)); 912 PetscCall(PetscSFBcastBegin(mdn->Mvctx, MPIU_SCALAR, ar, r, MPI_REPLACE)); 913 PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ar, r, MPI_REPLACE)); 914 PetscCall(VecRestoreArrayRead(rr, &ar)); 915 for (i = 0; i < n; i++) { 916 x = r[i]; 917 v = vv + i * lda; 918 for (j = 0; j < m; j++) (*v++) *= x; 919 } 920 PetscCall(VecRestoreArray(mdn->lvec, &r)); 921 PetscCall(PetscLogFlops(1.0 * n * m)); 922 } 923 PetscCall(MatDenseRestoreArray(mdn->A, &vv)); 924 PetscFunctionReturn(PETSC_SUCCESS); 925 } 926 927 static PetscErrorCode MatNorm_MPIDense(Mat A, NormType type, PetscReal *nrm) 928 { 929 Mat_MPIDense *mdn = (Mat_MPIDense *)A->data; 930 PetscInt i, j; 931 PetscMPIInt size; 932 PetscReal sum = 0.0; 933 const PetscScalar *av, *v; 934 935 PetscFunctionBegin; 936 PetscCall(MatDenseGetArrayRead(mdn->A, &av)); 937 v = av; 938 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 939 if (size == 1) { 940 PetscCall(MatNorm(mdn->A, type, nrm)); 941 } else { 942 if (type == NORM_FROBENIUS) { 943 for (i = 0; i < mdn->A->cmap->n * mdn->A->rmap->n; i++) { 944 sum += PetscRealPart(PetscConj(*v) * (*v)); 945 v++; 946 } 947 PetscCall(MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A))); 948 *nrm = PetscSqrtReal(*nrm); 949 PetscCall(PetscLogFlops(2.0 * mdn->A->cmap->n * mdn->A->rmap->n)); 950 } else if (type == NORM_1) { 951 PetscReal *tmp, *tmp2; 952 PetscCall(PetscCalloc2(A->cmap->N, &tmp, A->cmap->N, &tmp2)); 953 *nrm = 0.0; 954 v = av; 955 for (j = 0; j < mdn->A->cmap->n; j++) { 956 for (i = 0; i < mdn->A->rmap->n; i++) { 957 tmp[j] += PetscAbsScalar(*v); 958 v++; 959 } 960 } 961 PetscCall(MPIU_Allreduce(tmp, tmp2, A->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A))); 962 for (j = 0; j < A->cmap->N; j++) { 963 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 964 } 965 PetscCall(PetscFree2(tmp, tmp2)); 966 PetscCall(PetscLogFlops(A->cmap->n * A->rmap->n)); 967 } else if (type == NORM_INFINITY) { /* max row norm */ 968 PetscReal ntemp; 969 PetscCall(MatNorm(mdn->A, type, &ntemp)); 970 PetscCall(MPIU_Allreduce(&ntemp, nrm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A))); 971 } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "No support for two norm"); 972 } 973 PetscCall(MatDenseRestoreArrayRead(mdn->A, &av)); 974 PetscFunctionReturn(PETSC_SUCCESS); 975 } 976 977 static PetscErrorCode MatTranspose_MPIDense(Mat A, MatReuse reuse, Mat *matout) 978 { 979 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 980 Mat B; 981 PetscInt M = A->rmap->N, N = A->cmap->N, m, n, *rwork, rstart = A->rmap->rstart; 982 PetscInt j, i, lda; 983 PetscScalar *v; 984 985 PetscFunctionBegin; 986 if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout)); 987 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) { 988 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 989 PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M)); 990 PetscCall(MatSetType(B, ((PetscObject)A)->type_name)); 991 PetscCall(MatMPIDenseSetPreallocation(B, NULL)); 992 } else B = *matout; 993 994 m = a->A->rmap->n; 995 n = a->A->cmap->n; 996 PetscCall(MatDenseGetArrayRead(a->A, (const PetscScalar **)&v)); 997 PetscCall(MatDenseGetLDA(a->A, &lda)); 998 PetscCall(PetscMalloc1(m, &rwork)); 999 for (i = 0; i < m; i++) rwork[i] = rstart + i; 1000 for (j = 0; j < n; j++) { 1001 PetscCall(MatSetValues(B, 1, &j, m, rwork, v, INSERT_VALUES)); 1002 v = PetscSafePointerPlusOffset(v, lda); 1003 } 1004 PetscCall(MatDenseRestoreArrayRead(a->A, (const PetscScalar **)&v)); 1005 PetscCall(PetscFree(rwork)); 1006 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 1007 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 1008 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 1009 *matout = B; 1010 } else { 1011 PetscCall(MatHeaderMerge(A, &B)); 1012 } 1013 PetscFunctionReturn(PETSC_SUCCESS); 1014 } 1015 1016 static PetscErrorCode MatDuplicate_MPIDense(Mat, MatDuplicateOption, Mat *); 1017 PETSC_INTERN PetscErrorCode MatScale_MPIDense(Mat, PetscScalar); 1018 1019 static PetscErrorCode MatSetUp_MPIDense(Mat A) 1020 { 1021 PetscFunctionBegin; 1022 PetscCall(PetscLayoutSetUp(A->rmap)); 1023 PetscCall(PetscLayoutSetUp(A->cmap)); 1024 if (!A->preallocated) PetscCall(MatMPIDenseSetPreallocation(A, NULL)); 1025 PetscFunctionReturn(PETSC_SUCCESS); 1026 } 1027 1028 static PetscErrorCode MatAXPY_MPIDense(Mat Y, PetscScalar alpha, Mat X, MatStructure str) 1029 { 1030 Mat_MPIDense *A = (Mat_MPIDense *)Y->data, *B = (Mat_MPIDense *)X->data; 1031 1032 PetscFunctionBegin; 1033 PetscCall(MatAXPY(A->A, alpha, B->A, str)); 1034 PetscFunctionReturn(PETSC_SUCCESS); 1035 } 1036 1037 static PetscErrorCode MatConjugate_MPIDense(Mat mat) 1038 { 1039 Mat_MPIDense *a = (Mat_MPIDense *)mat->data; 1040 1041 PetscFunctionBegin; 1042 PetscCall(MatConjugate(a->A)); 1043 PetscFunctionReturn(PETSC_SUCCESS); 1044 } 1045 1046 static PetscErrorCode MatRealPart_MPIDense(Mat A) 1047 { 1048 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1049 1050 PetscFunctionBegin; 1051 PetscCall(MatRealPart(a->A)); 1052 PetscFunctionReturn(PETSC_SUCCESS); 1053 } 1054 1055 static PetscErrorCode MatImaginaryPart_MPIDense(Mat A) 1056 { 1057 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1058 1059 PetscFunctionBegin; 1060 PetscCall(MatImaginaryPart(a->A)); 1061 PetscFunctionReturn(PETSC_SUCCESS); 1062 } 1063 1064 static PetscErrorCode MatGetColumnVector_MPIDense(Mat A, Vec v, PetscInt col) 1065 { 1066 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1067 1068 PetscFunctionBegin; 1069 PetscCheck(a->A, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Missing local matrix"); 1070 PetscCheck(a->A->ops->getcolumnvector, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Missing get column operation"); 1071 PetscCall((*a->A->ops->getcolumnvector)(a->A, v, col)); 1072 PetscFunctionReturn(PETSC_SUCCESS); 1073 } 1074 1075 PETSC_INTERN PetscErrorCode MatGetColumnReductions_SeqDense(Mat, PetscInt, PetscReal *); 1076 1077 static PetscErrorCode MatGetColumnReductions_MPIDense(Mat A, PetscInt type, PetscReal *reductions) 1078 { 1079 PetscInt i, m, n; 1080 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1081 PetscReal *work; 1082 1083 PetscFunctionBegin; 1084 PetscCall(MatGetSize(A, &m, &n)); 1085 PetscCall(PetscMalloc1(n, &work)); 1086 if (type == REDUCTION_MEAN_REALPART) { 1087 PetscCall(MatGetColumnReductions_SeqDense(a->A, (PetscInt)REDUCTION_SUM_REALPART, work)); 1088 } else if (type == REDUCTION_MEAN_IMAGINARYPART) { 1089 PetscCall(MatGetColumnReductions_SeqDense(a->A, (PetscInt)REDUCTION_SUM_IMAGINARYPART, work)); 1090 } else { 1091 PetscCall(MatGetColumnReductions_SeqDense(a->A, type, work)); 1092 } 1093 if (type == NORM_2) { 1094 for (i = 0; i < n; i++) work[i] *= work[i]; 1095 } 1096 if (type == NORM_INFINITY) { 1097 PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_MAX, A->hdr.comm)); 1098 } else { 1099 PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_SUM, A->hdr.comm)); 1100 } 1101 PetscCall(PetscFree(work)); 1102 if (type == NORM_2) { 1103 for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]); 1104 } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 1105 for (i = 0; i < n; i++) reductions[i] /= m; 1106 } 1107 PetscFunctionReturn(PETSC_SUCCESS); 1108 } 1109 1110 static PetscErrorCode MatSetRandom_MPIDense(Mat x, PetscRandom rctx) 1111 { 1112 Mat_MPIDense *d = (Mat_MPIDense *)x->data; 1113 1114 PetscFunctionBegin; 1115 PetscCall(MatSetRandom(d->A, rctx)); 1116 #if defined(PETSC_HAVE_DEVICE) 1117 x->offloadmask = d->A->offloadmask; 1118 #endif 1119 PetscFunctionReturn(PETSC_SUCCESS); 1120 } 1121 1122 static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A, PetscBool *missing, PetscInt *d) 1123 { 1124 PetscFunctionBegin; 1125 *missing = PETSC_FALSE; 1126 PetscFunctionReturn(PETSC_SUCCESS); 1127 } 1128 1129 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat, Mat, PetscReal, Mat); 1130 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat, Mat, Mat); 1131 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat, Mat, PetscReal, Mat); 1132 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat, Mat, Mat); 1133 static PetscErrorCode MatEqual_MPIDense(Mat, Mat, PetscBool *); 1134 static PetscErrorCode MatLoad_MPIDense(Mat, PetscViewer); 1135 static PetscErrorCode MatProductSetFromOptions_MPIDense(Mat); 1136 1137 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense, 1138 MatGetRow_MPIDense, 1139 MatRestoreRow_MPIDense, 1140 MatMult_MPIDense, 1141 /* 4*/ MatMultAdd_MPIDense, 1142 MatMultTranspose_MPIDense, 1143 MatMultTransposeAdd_MPIDense, 1144 NULL, 1145 NULL, 1146 NULL, 1147 /* 10*/ NULL, 1148 NULL, 1149 NULL, 1150 NULL, 1151 MatTranspose_MPIDense, 1152 /* 15*/ MatGetInfo_MPIDense, 1153 MatEqual_MPIDense, 1154 MatGetDiagonal_MPIDense, 1155 MatDiagonalScale_MPIDense, 1156 MatNorm_MPIDense, 1157 /* 20*/ MatAssemblyBegin_MPIDense, 1158 MatAssemblyEnd_MPIDense, 1159 MatSetOption_MPIDense, 1160 MatZeroEntries_MPIDense, 1161 /* 24*/ MatZeroRows_MPIDense, 1162 NULL, 1163 NULL, 1164 NULL, 1165 NULL, 1166 /* 29*/ MatSetUp_MPIDense, 1167 NULL, 1168 NULL, 1169 MatGetDiagonalBlock_MPIDense, 1170 NULL, 1171 /* 34*/ MatDuplicate_MPIDense, 1172 NULL, 1173 NULL, 1174 NULL, 1175 NULL, 1176 /* 39*/ MatAXPY_MPIDense, 1177 MatCreateSubMatrices_MPIDense, 1178 NULL, 1179 MatGetValues_MPIDense, 1180 MatCopy_MPIDense, 1181 /* 44*/ NULL, 1182 MatScale_MPIDense, 1183 MatShift_MPIDense, 1184 NULL, 1185 NULL, 1186 /* 49*/ MatSetRandom_MPIDense, 1187 NULL, 1188 NULL, 1189 NULL, 1190 NULL, 1191 /* 54*/ NULL, 1192 NULL, 1193 NULL, 1194 NULL, 1195 NULL, 1196 /* 59*/ MatCreateSubMatrix_MPIDense, 1197 MatDestroy_MPIDense, 1198 MatView_MPIDense, 1199 NULL, 1200 NULL, 1201 /* 64*/ NULL, 1202 NULL, 1203 NULL, 1204 NULL, 1205 NULL, 1206 /* 69*/ NULL, 1207 NULL, 1208 NULL, 1209 NULL, 1210 NULL, 1211 /* 74*/ NULL, 1212 NULL, 1213 NULL, 1214 NULL, 1215 NULL, 1216 /* 79*/ NULL, 1217 NULL, 1218 NULL, 1219 NULL, 1220 /* 83*/ MatLoad_MPIDense, 1221 NULL, 1222 NULL, 1223 NULL, 1224 NULL, 1225 NULL, 1226 /* 89*/ NULL, 1227 NULL, 1228 NULL, 1229 NULL, 1230 NULL, 1231 /* 94*/ NULL, 1232 NULL, 1233 MatMatTransposeMultSymbolic_MPIDense_MPIDense, 1234 MatMatTransposeMultNumeric_MPIDense_MPIDense, 1235 NULL, 1236 /* 99*/ MatProductSetFromOptions_MPIDense, 1237 NULL, 1238 NULL, 1239 MatConjugate_MPIDense, 1240 NULL, 1241 /*104*/ NULL, 1242 MatRealPart_MPIDense, 1243 MatImaginaryPart_MPIDense, 1244 NULL, 1245 NULL, 1246 /*109*/ NULL, 1247 NULL, 1248 NULL, 1249 MatGetColumnVector_MPIDense, 1250 MatMissingDiagonal_MPIDense, 1251 /*114*/ NULL, 1252 NULL, 1253 NULL, 1254 NULL, 1255 NULL, 1256 /*119*/ NULL, 1257 NULL, 1258 MatMultHermitianTranspose_MPIDense, 1259 MatMultHermitianTransposeAdd_MPIDense, 1260 NULL, 1261 /*124*/ NULL, 1262 MatGetColumnReductions_MPIDense, 1263 NULL, 1264 NULL, 1265 NULL, 1266 /*129*/ NULL, 1267 NULL, 1268 MatTransposeMatMultSymbolic_MPIDense_MPIDense, 1269 MatTransposeMatMultNumeric_MPIDense_MPIDense, 1270 NULL, 1271 /*134*/ NULL, 1272 NULL, 1273 NULL, 1274 NULL, 1275 NULL, 1276 /*139*/ NULL, 1277 NULL, 1278 NULL, 1279 NULL, 1280 NULL, 1281 MatCreateMPIMatConcatenateSeqMat_MPIDense, 1282 /*145*/ NULL, 1283 NULL, 1284 NULL, 1285 NULL, 1286 NULL, 1287 /*150*/ NULL, 1288 NULL}; 1289 1290 static PetscErrorCode MatMPIDenseSetPreallocation_MPIDense(Mat mat, PetscScalar *data) 1291 { 1292 Mat_MPIDense *a = (Mat_MPIDense *)mat->data; 1293 MatType mtype = MATSEQDENSE; 1294 1295 PetscFunctionBegin; 1296 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1297 PetscCall(PetscLayoutSetUp(mat->rmap)); 1298 PetscCall(PetscLayoutSetUp(mat->cmap)); 1299 if (!a->A) { 1300 PetscCall(MatCreate(PETSC_COMM_SELF, &a->A)); 1301 PetscCall(MatSetSizes(a->A, mat->rmap->n, mat->cmap->N, mat->rmap->n, mat->cmap->N)); 1302 } 1303 #if defined(PETSC_HAVE_CUDA) 1304 PetscBool iscuda; 1305 PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPIDENSECUDA, &iscuda)); 1306 if (iscuda) mtype = MATSEQDENSECUDA; 1307 #endif 1308 #if defined(PETSC_HAVE_HIP) 1309 PetscBool iship; 1310 PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPIDENSEHIP, &iship)); 1311 if (iship) mtype = MATSEQDENSEHIP; 1312 #endif 1313 PetscCall(MatSetType(a->A, mtype)); 1314 PetscCall(MatSeqDenseSetPreallocation(a->A, data)); 1315 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 1316 mat->offloadmask = a->A->offloadmask; 1317 #endif 1318 mat->preallocated = PETSC_TRUE; 1319 mat->assembled = PETSC_TRUE; 1320 PetscFunctionReturn(PETSC_SUCCESS); 1321 } 1322 1323 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIDense(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 1324 { 1325 Mat B, C; 1326 1327 PetscFunctionBegin; 1328 PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &C)); 1329 PetscCall(MatConvert_SeqAIJ_SeqDense(C, MATSEQDENSE, MAT_INITIAL_MATRIX, &B)); 1330 PetscCall(MatDestroy(&C)); 1331 if (reuse == MAT_REUSE_MATRIX) { 1332 C = *newmat; 1333 } else C = NULL; 1334 PetscCall(MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A), B, A->cmap->n, !C ? MAT_INITIAL_MATRIX : MAT_REUSE_MATRIX, &C)); 1335 PetscCall(MatDestroy(&B)); 1336 if (reuse == MAT_INPLACE_MATRIX) { 1337 PetscCall(MatHeaderReplace(A, &C)); 1338 } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C; 1339 PetscFunctionReturn(PETSC_SUCCESS); 1340 } 1341 1342 static PetscErrorCode MatConvert_MPIDense_MPIAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 1343 { 1344 Mat B, C; 1345 1346 PetscFunctionBegin; 1347 PetscCall(MatDenseGetLocalMatrix(A, &C)); 1348 PetscCall(MatConvert_SeqDense_SeqAIJ(C, MATSEQAIJ, MAT_INITIAL_MATRIX, &B)); 1349 if (reuse == MAT_REUSE_MATRIX) { 1350 C = *newmat; 1351 } else C = NULL; 1352 PetscCall(MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A), B, A->cmap->n, !C ? MAT_INITIAL_MATRIX : MAT_REUSE_MATRIX, &C)); 1353 PetscCall(MatDestroy(&B)); 1354 if (reuse == MAT_INPLACE_MATRIX) { 1355 PetscCall(MatHeaderReplace(A, &C)); 1356 } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C; 1357 PetscFunctionReturn(PETSC_SUCCESS); 1358 } 1359 1360 #if defined(PETSC_HAVE_ELEMENTAL) 1361 PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 1362 { 1363 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1364 Mat mat_elemental; 1365 PetscScalar *v; 1366 PetscInt m = A->rmap->n, N = A->cmap->N, rstart = A->rmap->rstart, i, *rows, *cols, lda; 1367 1368 PetscFunctionBegin; 1369 if (reuse == MAT_REUSE_MATRIX) { 1370 mat_elemental = *newmat; 1371 PetscCall(MatZeroEntries(*newmat)); 1372 } else { 1373 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental)); 1374 PetscCall(MatSetSizes(mat_elemental, PETSC_DECIDE, PETSC_DECIDE, A->rmap->N, A->cmap->N)); 1375 PetscCall(MatSetType(mat_elemental, MATELEMENTAL)); 1376 PetscCall(MatSetUp(mat_elemental)); 1377 PetscCall(MatSetOption(mat_elemental, MAT_ROW_ORIENTED, PETSC_FALSE)); 1378 } 1379 1380 PetscCall(PetscMalloc2(m, &rows, N, &cols)); 1381 for (i = 0; i < N; i++) cols[i] = i; 1382 for (i = 0; i < m; i++) rows[i] = rstart + i; 1383 1384 /* PETSc-Elemental interface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */ 1385 PetscCall(MatDenseGetArray(A, &v)); 1386 PetscCall(MatDenseGetLDA(a->A, &lda)); 1387 if (lda == m) PetscCall(MatSetValues(mat_elemental, m, rows, N, cols, v, ADD_VALUES)); 1388 else { 1389 for (i = 0; i < N; i++) PetscCall(MatSetValues(mat_elemental, m, rows, 1, &i, v + lda * i, ADD_VALUES)); 1390 } 1391 PetscCall(MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY)); 1392 PetscCall(MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY)); 1393 PetscCall(MatDenseRestoreArray(A, &v)); 1394 PetscCall(PetscFree2(rows, cols)); 1395 1396 if (reuse == MAT_INPLACE_MATRIX) { 1397 PetscCall(MatHeaderReplace(A, &mat_elemental)); 1398 } else { 1399 *newmat = mat_elemental; 1400 } 1401 PetscFunctionReturn(PETSC_SUCCESS); 1402 } 1403 #endif 1404 1405 static PetscErrorCode MatDenseGetColumn_MPIDense(Mat A, PetscInt col, PetscScalar **vals) 1406 { 1407 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 1408 1409 PetscFunctionBegin; 1410 PetscCall(MatDenseGetColumn(mat->A, col, vals)); 1411 PetscFunctionReturn(PETSC_SUCCESS); 1412 } 1413 1414 static PetscErrorCode MatDenseRestoreColumn_MPIDense(Mat A, PetscScalar **vals) 1415 { 1416 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 1417 1418 PetscFunctionBegin; 1419 PetscCall(MatDenseRestoreColumn(mat->A, vals)); 1420 PetscFunctionReturn(PETSC_SUCCESS); 1421 } 1422 1423 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIDense(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat) 1424 { 1425 Mat_MPIDense *mat; 1426 PetscInt m, nloc, N; 1427 1428 PetscFunctionBegin; 1429 PetscCall(MatGetSize(inmat, &m, &N)); 1430 PetscCall(MatGetLocalSize(inmat, NULL, &nloc)); 1431 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 1432 PetscInt sum; 1433 1434 if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnership(comm, &n, &N)); 1435 /* Check sum(n) = N */ 1436 PetscCall(MPIU_Allreduce(&n, &sum, 1, MPIU_INT, MPI_SUM, comm)); 1437 PetscCheck(sum == N, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT, sum, N); 1438 1439 PetscCall(MatCreateDense(comm, m, n, PETSC_DETERMINE, N, NULL, outmat)); 1440 PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 1441 } 1442 1443 /* numeric phase */ 1444 mat = (Mat_MPIDense *)(*outmat)->data; 1445 PetscCall(MatCopy(inmat, mat->A, SAME_NONZERO_PATTERN)); 1446 PetscFunctionReturn(PETSC_SUCCESS); 1447 } 1448 1449 PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat A, PetscInt col, Vec *v) 1450 { 1451 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1452 PetscInt lda; 1453 1454 PetscFunctionBegin; 1455 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1456 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1457 if (!a->cvec) PetscCall(MatDenseCreateColumnVec_Private(A, &a->cvec)); 1458 a->vecinuse = col + 1; 1459 PetscCall(MatDenseGetLDA(a->A, &lda)); 1460 PetscCall(MatDenseGetArray(a->A, (PetscScalar **)&a->ptrinuse)); 1461 PetscCall(VecPlaceArray(a->cvec, PetscSafePointerPlusOffset(a->ptrinuse, (size_t)col * (size_t)lda))); 1462 *v = a->cvec; 1463 PetscFunctionReturn(PETSC_SUCCESS); 1464 } 1465 1466 PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat A, PetscInt col, Vec *v) 1467 { 1468 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1469 1470 PetscFunctionBegin; 1471 PetscCheck(a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first"); 1472 PetscCheck(a->cvec, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing internal column vector"); 1473 a->vecinuse = 0; 1474 PetscCall(MatDenseRestoreArray(a->A, (PetscScalar **)&a->ptrinuse)); 1475 PetscCall(VecResetArray(a->cvec)); 1476 if (v) *v = NULL; 1477 PetscFunctionReturn(PETSC_SUCCESS); 1478 } 1479 1480 PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat A, PetscInt col, Vec *v) 1481 { 1482 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1483 PetscInt lda; 1484 1485 PetscFunctionBegin; 1486 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1487 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1488 if (!a->cvec) PetscCall(MatDenseCreateColumnVec_Private(A, &a->cvec)); 1489 a->vecinuse = col + 1; 1490 PetscCall(MatDenseGetLDA(a->A, &lda)); 1491 PetscCall(MatDenseGetArrayRead(a->A, &a->ptrinuse)); 1492 PetscCall(VecPlaceArray(a->cvec, PetscSafePointerPlusOffset(a->ptrinuse, (size_t)col * (size_t)lda))); 1493 PetscCall(VecLockReadPush(a->cvec)); 1494 *v = a->cvec; 1495 PetscFunctionReturn(PETSC_SUCCESS); 1496 } 1497 1498 PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat A, PetscInt col, Vec *v) 1499 { 1500 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1501 1502 PetscFunctionBegin; 1503 PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first"); 1504 PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector"); 1505 a->vecinuse = 0; 1506 PetscCall(MatDenseRestoreArrayRead(a->A, &a->ptrinuse)); 1507 PetscCall(VecLockReadPop(a->cvec)); 1508 PetscCall(VecResetArray(a->cvec)); 1509 if (v) *v = NULL; 1510 PetscFunctionReturn(PETSC_SUCCESS); 1511 } 1512 1513 PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat A, PetscInt col, Vec *v) 1514 { 1515 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1516 PetscInt lda; 1517 1518 PetscFunctionBegin; 1519 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1520 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1521 if (!a->cvec) PetscCall(MatDenseCreateColumnVec_Private(A, &a->cvec)); 1522 a->vecinuse = col + 1; 1523 PetscCall(MatDenseGetLDA(a->A, &lda)); 1524 PetscCall(MatDenseGetArrayWrite(a->A, (PetscScalar **)&a->ptrinuse)); 1525 PetscCall(VecPlaceArray(a->cvec, PetscSafePointerPlusOffset(a->ptrinuse, (size_t)col * (size_t)lda))); 1526 *v = a->cvec; 1527 PetscFunctionReturn(PETSC_SUCCESS); 1528 } 1529 1530 PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat A, PetscInt col, Vec *v) 1531 { 1532 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1533 1534 PetscFunctionBegin; 1535 PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first"); 1536 PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector"); 1537 a->vecinuse = 0; 1538 PetscCall(MatDenseRestoreArrayWrite(a->A, (PetscScalar **)&a->ptrinuse)); 1539 PetscCall(VecResetArray(a->cvec)); 1540 if (v) *v = NULL; 1541 PetscFunctionReturn(PETSC_SUCCESS); 1542 } 1543 1544 static PetscErrorCode MatDenseGetSubMatrix_MPIDense(Mat A, PetscInt rbegin, PetscInt rend, PetscInt cbegin, PetscInt cend, Mat *v) 1545 { 1546 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1547 Mat_MPIDense *c; 1548 MPI_Comm comm; 1549 PetscInt pbegin, pend; 1550 1551 PetscFunctionBegin; 1552 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 1553 PetscCheck(!a->vecinuse, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1554 PetscCheck(!a->matinuse, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1555 pbegin = PetscMax(0, PetscMin(A->rmap->rend, rbegin) - A->rmap->rstart); 1556 pend = PetscMin(A->rmap->n, PetscMax(0, rend - A->rmap->rstart)); 1557 if (!a->cmat) { 1558 PetscCall(MatCreate(comm, &a->cmat)); 1559 PetscCall(MatSetType(a->cmat, ((PetscObject)A)->type_name)); 1560 if (rend - rbegin == A->rmap->N) PetscCall(PetscLayoutReference(A->rmap, &a->cmat->rmap)); 1561 else { 1562 PetscCall(PetscLayoutSetLocalSize(a->cmat->rmap, pend - pbegin)); 1563 PetscCall(PetscLayoutSetSize(a->cmat->rmap, rend - rbegin)); 1564 PetscCall(PetscLayoutSetUp(a->cmat->rmap)); 1565 } 1566 PetscCall(PetscLayoutSetSize(a->cmat->cmap, cend - cbegin)); 1567 PetscCall(PetscLayoutSetUp(a->cmat->cmap)); 1568 } else { 1569 PetscBool same = (PetscBool)(rend - rbegin == a->cmat->rmap->N); 1570 if (same && a->cmat->rmap->N != A->rmap->N) { 1571 same = (PetscBool)(pend - pbegin == a->cmat->rmap->n); 1572 PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &same, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A))); 1573 } 1574 if (!same) { 1575 PetscCall(PetscLayoutDestroy(&a->cmat->rmap)); 1576 PetscCall(PetscLayoutCreate(comm, &a->cmat->rmap)); 1577 PetscCall(PetscLayoutSetLocalSize(a->cmat->rmap, pend - pbegin)); 1578 PetscCall(PetscLayoutSetSize(a->cmat->rmap, rend - rbegin)); 1579 PetscCall(PetscLayoutSetUp(a->cmat->rmap)); 1580 } 1581 if (cend - cbegin != a->cmat->cmap->N) { 1582 PetscCall(PetscLayoutDestroy(&a->cmat->cmap)); 1583 PetscCall(PetscLayoutCreate(comm, &a->cmat->cmap)); 1584 PetscCall(PetscLayoutSetSize(a->cmat->cmap, cend - cbegin)); 1585 PetscCall(PetscLayoutSetUp(a->cmat->cmap)); 1586 } 1587 } 1588 c = (Mat_MPIDense *)a->cmat->data; 1589 PetscCheck(!c->A, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1590 PetscCall(MatDenseGetSubMatrix(a->A, pbegin, pend, cbegin, cend, &c->A)); 1591 1592 a->cmat->preallocated = PETSC_TRUE; 1593 a->cmat->assembled = PETSC_TRUE; 1594 #if defined(PETSC_HAVE_DEVICE) 1595 a->cmat->offloadmask = c->A->offloadmask; 1596 #endif 1597 a->matinuse = cbegin + 1; 1598 *v = a->cmat; 1599 PetscFunctionReturn(PETSC_SUCCESS); 1600 } 1601 1602 static PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat A, Mat *v) 1603 { 1604 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1605 Mat_MPIDense *c; 1606 1607 PetscFunctionBegin; 1608 PetscCheck(a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetSubMatrix() first"); 1609 PetscCheck(a->cmat, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal matrix"); 1610 PetscCheck(*v == a->cmat, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Not the matrix obtained from MatDenseGetSubMatrix()"); 1611 a->matinuse = 0; 1612 c = (Mat_MPIDense *)a->cmat->data; 1613 PetscCall(MatDenseRestoreSubMatrix(a->A, &c->A)); 1614 if (v) *v = NULL; 1615 #if defined(PETSC_HAVE_DEVICE) 1616 A->offloadmask = a->A->offloadmask; 1617 #endif 1618 PetscFunctionReturn(PETSC_SUCCESS); 1619 } 1620 1621 /*MC 1622 MATMPIDENSE - MATMPIDENSE = "mpidense" - A matrix type to be used for distributed dense matrices. 1623 1624 Options Database Key: 1625 . -mat_type mpidense - sets the matrix type to `MATMPIDENSE` during a call to `MatSetFromOptions()` 1626 1627 Level: beginner 1628 1629 .seealso: [](ch_matrices), `Mat`, `MatCreateDense()`, `MATSEQDENSE`, `MATDENSE` 1630 M*/ 1631 PetscErrorCode MatCreate_MPIDense(Mat mat) 1632 { 1633 Mat_MPIDense *a; 1634 1635 PetscFunctionBegin; 1636 PetscCall(PetscNew(&a)); 1637 mat->data = (void *)a; 1638 mat->ops[0] = MatOps_Values; 1639 1640 mat->insertmode = NOT_SET_VALUES; 1641 1642 /* build cache for off array entries formed */ 1643 a->donotstash = PETSC_FALSE; 1644 1645 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)mat), 1, &mat->stash)); 1646 1647 /* stuff used for matrix vector multiply */ 1648 a->lvec = NULL; 1649 a->Mvctx = NULL; 1650 a->roworiented = PETSC_TRUE; 1651 1652 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetLDA_C", MatDenseGetLDA_MPIDense)); 1653 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseSetLDA_C", MatDenseSetLDA_MPIDense)); 1654 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArray_C", MatDenseGetArray_MPIDense)); 1655 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArray_C", MatDenseRestoreArray_MPIDense)); 1656 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayRead_C", MatDenseGetArrayRead_MPIDense)); 1657 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayRead_C", MatDenseRestoreArrayRead_MPIDense)); 1658 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayWrite_C", MatDenseGetArrayWrite_MPIDense)); 1659 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayWrite_C", MatDenseRestoreArrayWrite_MPIDense)); 1660 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDensePlaceArray_C", MatDensePlaceArray_MPIDense)); 1661 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseResetArray_C", MatDenseResetArray_MPIDense)); 1662 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseReplaceArray_C", MatDenseReplaceArray_MPIDense)); 1663 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", MatDenseGetColumnVec_MPIDense)); 1664 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", MatDenseRestoreColumnVec_MPIDense)); 1665 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", MatDenseGetColumnVecRead_MPIDense)); 1666 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", MatDenseRestoreColumnVecRead_MPIDense)); 1667 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", MatDenseGetColumnVecWrite_MPIDense)); 1668 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", MatDenseRestoreColumnVecWrite_MPIDense)); 1669 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetSubMatrix_C", MatDenseGetSubMatrix_MPIDense)); 1670 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreSubMatrix_C", MatDenseRestoreSubMatrix_MPIDense)); 1671 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", MatConvert_MPIAIJ_MPIDense)); 1672 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpiaij_C", MatConvert_MPIDense_MPIAIJ)); 1673 #if defined(PETSC_HAVE_ELEMENTAL) 1674 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_elemental_C", MatConvert_MPIDense_Elemental)); 1675 #endif 1676 #if defined(PETSC_HAVE_SCALAPACK) 1677 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_scalapack_C", MatConvert_Dense_ScaLAPACK)); 1678 #endif 1679 #if defined(PETSC_HAVE_CUDA) 1680 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensecuda_C", MatConvert_MPIDense_MPIDenseCUDA)); 1681 #endif 1682 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIDenseSetPreallocation_C", MatMPIDenseSetPreallocation_MPIDense)); 1683 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense)); 1684 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaij_C", MatProductSetFromOptions_MPIDense_MPIAIJ)); 1685 #if defined(PETSC_HAVE_CUDA) 1686 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense)); 1687 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijcusparse_C", MatProductSetFromOptions_MPIDense_MPIAIJ)); 1688 #endif 1689 #if defined(PETSC_HAVE_HIP) 1690 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensehip_C", MatConvert_MPIDense_MPIDenseHIP)); 1691 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense)); 1692 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijhipsparse_C", MatProductSetFromOptions_MPIDense_MPIAIJ)); 1693 #endif 1694 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumn_C", MatDenseGetColumn_MPIDense)); 1695 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumn_C", MatDenseRestoreColumn_MPIDense)); 1696 PetscCall(PetscObjectChangeTypeName((PetscObject)mat, MATMPIDENSE)); 1697 PetscFunctionReturn(PETSC_SUCCESS); 1698 } 1699 1700 /*MC 1701 MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices. 1702 1703 This matrix type is identical to `MATSEQDENSE` when constructed with a single process communicator, 1704 and `MATMPIDENSE` otherwise. 1705 1706 Options Database Key: 1707 . -mat_type dense - sets the matrix type to `MATDENSE` during a call to `MatSetFromOptions()` 1708 1709 Level: beginner 1710 1711 .seealso: [](ch_matrices), `Mat`, `MATSEQDENSE`, `MATMPIDENSE`, `MATDENSECUDA`, `MATDENSEHIP` 1712 M*/ 1713 1714 /*@C 1715 MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries 1716 1717 Collective 1718 1719 Input Parameters: 1720 + B - the matrix 1721 - data - optional location of matrix data. Set to `NULL` for PETSc 1722 to control all matrix memory allocation. 1723 1724 Level: intermediate 1725 1726 Notes: 1727 The dense format is fully compatible with standard Fortran 1728 storage by columns. 1729 1730 The data input variable is intended primarily for Fortran programmers 1731 who wish to allocate their own matrix memory space. Most users should 1732 set `data` to `NULL`. 1733 1734 .seealso: [](ch_matrices), `Mat`, `MATMPIDENSE`, `MatCreate()`, `MatCreateSeqDense()`, `MatSetValues()` 1735 @*/ 1736 PetscErrorCode MatMPIDenseSetPreallocation(Mat B, PetscScalar *data) 1737 { 1738 PetscFunctionBegin; 1739 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 1740 PetscTryMethod(B, "MatMPIDenseSetPreallocation_C", (Mat, PetscScalar *), (B, data)); 1741 PetscFunctionReturn(PETSC_SUCCESS); 1742 } 1743 1744 /*@ 1745 MatDensePlaceArray - Allows one to replace the array in a `MATDENSE` matrix with an 1746 array provided by the user. This is useful to avoid copying an array 1747 into a matrix 1748 1749 Not Collective 1750 1751 Input Parameters: 1752 + mat - the matrix 1753 - array - the array in column major order 1754 1755 Level: developer 1756 1757 Note: 1758 You can return to the original array with a call to `MatDenseResetArray()`. The user is responsible for freeing this array; it will not be 1759 freed when the matrix is destroyed. 1760 1761 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseGetArray()`, `MatDenseResetArray()`, `VecPlaceArray()`, `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`, `VecResetArray()`, 1762 `MatDenseReplaceArray()` 1763 @*/ 1764 PetscErrorCode MatDensePlaceArray(Mat mat, const PetscScalar *array) 1765 { 1766 PetscFunctionBegin; 1767 PetscValidHeaderSpecific(mat, MAT_CLASSID, 1); 1768 PetscUseMethod(mat, "MatDensePlaceArray_C", (Mat, const PetscScalar *), (mat, array)); 1769 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 1770 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 1771 mat->offloadmask = PETSC_OFFLOAD_CPU; 1772 #endif 1773 PetscFunctionReturn(PETSC_SUCCESS); 1774 } 1775 1776 /*@ 1777 MatDenseResetArray - Resets the matrix array to that it previously had before the call to `MatDensePlaceArray()` 1778 1779 Not Collective 1780 1781 Input Parameter: 1782 . mat - the matrix 1783 1784 Level: developer 1785 1786 Note: 1787 You can only call this after a call to `MatDensePlaceArray()` 1788 1789 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseGetArray()`, `MatDensePlaceArray()`, `VecPlaceArray()`, `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`, `VecResetArray()` 1790 @*/ 1791 PetscErrorCode MatDenseResetArray(Mat mat) 1792 { 1793 PetscFunctionBegin; 1794 PetscValidHeaderSpecific(mat, MAT_CLASSID, 1); 1795 PetscUseMethod(mat, "MatDenseResetArray_C", (Mat), (mat)); 1796 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 1797 PetscFunctionReturn(PETSC_SUCCESS); 1798 } 1799 1800 /*@ 1801 MatDenseReplaceArray - Allows one to replace the array in a dense matrix with an 1802 array provided by the user. This is useful to avoid copying an array 1803 into a matrix 1804 1805 Not Collective 1806 1807 Input Parameters: 1808 + mat - the matrix 1809 - array - the array in column major order 1810 1811 Level: developer 1812 1813 Note: 1814 The memory passed in MUST be obtained with `PetscMalloc()` and CANNOT be 1815 freed by the user. It will be freed when the matrix is destroyed. 1816 1817 .seealso: [](ch_matrices), `Mat`, `MatDensePlaceArray()`, `MatDenseGetArray()`, `VecReplaceArray()` 1818 @*/ 1819 PetscErrorCode MatDenseReplaceArray(Mat mat, const PetscScalar *array) 1820 { 1821 PetscFunctionBegin; 1822 PetscValidHeaderSpecific(mat, MAT_CLASSID, 1); 1823 PetscUseMethod(mat, "MatDenseReplaceArray_C", (Mat, const PetscScalar *), (mat, array)); 1824 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 1825 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 1826 mat->offloadmask = PETSC_OFFLOAD_CPU; 1827 #endif 1828 PetscFunctionReturn(PETSC_SUCCESS); 1829 } 1830 1831 /*@C 1832 MatCreateDense - Creates a matrix in `MATDENSE` format. 1833 1834 Collective 1835 1836 Input Parameters: 1837 + comm - MPI communicator 1838 . m - number of local rows (or `PETSC_DECIDE` to have calculated if `M` is given) 1839 . n - number of local columns (or `PETSC_DECIDE` to have calculated if `N` is given) 1840 . M - number of global rows (or `PETSC_DECIDE` to have calculated if `m` is given) 1841 . N - number of global columns (or `PETSC_DECIDE` to have calculated if `n` is given) 1842 - data - optional location of matrix data. Set data to `NULL` (`PETSC_NULL_SCALAR` for Fortran users) for PETSc 1843 to control all matrix memory allocation. 1844 1845 Output Parameter: 1846 . A - the matrix 1847 1848 Level: intermediate 1849 1850 Notes: 1851 The dense format is fully compatible with standard Fortran 1852 storage by columns. 1853 1854 Although local portions of the matrix are stored in column-major 1855 order, the matrix is partitioned across MPI ranks by row. 1856 1857 The data input variable is intended primarily for Fortran programmers 1858 who wish to allocate their own matrix memory space. Most users should 1859 set `data` to `NULL` (`PETSC_NULL_SCALAR` for Fortran users). 1860 1861 The user MUST specify either the local or global matrix dimensions 1862 (possibly both). 1863 1864 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatCreate()`, `MatCreateSeqDense()`, `MatSetValues()` 1865 @*/ 1866 PetscErrorCode MatCreateDense(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscScalar *data, Mat *A) 1867 { 1868 PetscFunctionBegin; 1869 PetscCall(MatCreate(comm, A)); 1870 PetscCall(MatSetSizes(*A, m, n, M, N)); 1871 PetscCall(MatSetType(*A, MATDENSE)); 1872 PetscCall(MatSeqDenseSetPreallocation(*A, data)); 1873 PetscCall(MatMPIDenseSetPreallocation(*A, data)); 1874 PetscFunctionReturn(PETSC_SUCCESS); 1875 } 1876 1877 static PetscErrorCode MatDuplicate_MPIDense(Mat A, MatDuplicateOption cpvalues, Mat *newmat) 1878 { 1879 Mat mat; 1880 Mat_MPIDense *a, *oldmat = (Mat_MPIDense *)A->data; 1881 1882 PetscFunctionBegin; 1883 *newmat = NULL; 1884 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat)); 1885 PetscCall(MatSetSizes(mat, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 1886 PetscCall(MatSetType(mat, ((PetscObject)A)->type_name)); 1887 a = (Mat_MPIDense *)mat->data; 1888 1889 mat->factortype = A->factortype; 1890 mat->assembled = PETSC_TRUE; 1891 mat->preallocated = PETSC_TRUE; 1892 1893 mat->insertmode = NOT_SET_VALUES; 1894 a->donotstash = oldmat->donotstash; 1895 1896 PetscCall(PetscLayoutReference(A->rmap, &mat->rmap)); 1897 PetscCall(PetscLayoutReference(A->cmap, &mat->cmap)); 1898 1899 PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A)); 1900 1901 *newmat = mat; 1902 PetscFunctionReturn(PETSC_SUCCESS); 1903 } 1904 1905 static PetscErrorCode MatLoad_MPIDense(Mat newMat, PetscViewer viewer) 1906 { 1907 PetscBool isbinary; 1908 #if defined(PETSC_HAVE_HDF5) 1909 PetscBool ishdf5; 1910 #endif 1911 1912 PetscFunctionBegin; 1913 PetscValidHeaderSpecific(newMat, MAT_CLASSID, 1); 1914 PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2); 1915 /* force binary viewer to load .info file if it has not yet done so */ 1916 PetscCall(PetscViewerSetUp(viewer)); 1917 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 1918 #if defined(PETSC_HAVE_HDF5) 1919 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5)); 1920 #endif 1921 if (isbinary) { 1922 PetscCall(MatLoad_Dense_Binary(newMat, viewer)); 1923 #if defined(PETSC_HAVE_HDF5) 1924 } else if (ishdf5) { 1925 PetscCall(MatLoad_Dense_HDF5(newMat, viewer)); 1926 #endif 1927 } else SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name); 1928 PetscFunctionReturn(PETSC_SUCCESS); 1929 } 1930 1931 static PetscErrorCode MatEqual_MPIDense(Mat A, Mat B, PetscBool *flag) 1932 { 1933 Mat_MPIDense *matB = (Mat_MPIDense *)B->data, *matA = (Mat_MPIDense *)A->data; 1934 Mat a, b; 1935 1936 PetscFunctionBegin; 1937 a = matA->A; 1938 b = matB->A; 1939 PetscCall(MatEqual(a, b, flag)); 1940 PetscCall(MPIU_Allreduce(MPI_IN_PLACE, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A))); 1941 PetscFunctionReturn(PETSC_SUCCESS); 1942 } 1943 1944 static PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(void *data) 1945 { 1946 Mat_TransMatMultDense *atb = (Mat_TransMatMultDense *)data; 1947 1948 PetscFunctionBegin; 1949 PetscCall(PetscFree2(atb->sendbuf, atb->recvcounts)); 1950 PetscCall(MatDestroy(&atb->atb)); 1951 PetscCall(PetscFree(atb)); 1952 PetscFunctionReturn(PETSC_SUCCESS); 1953 } 1954 1955 static PetscErrorCode MatDestroy_MatMatTransMult_MPIDense_MPIDense(void *data) 1956 { 1957 Mat_MatTransMultDense *abt = (Mat_MatTransMultDense *)data; 1958 1959 PetscFunctionBegin; 1960 PetscCall(PetscFree2(abt->buf[0], abt->buf[1])); 1961 PetscCall(PetscFree2(abt->recvcounts, abt->recvdispls)); 1962 PetscCall(PetscFree(abt)); 1963 PetscFunctionReturn(PETSC_SUCCESS); 1964 } 1965 1966 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C) 1967 { 1968 Mat_MPIDense *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data; 1969 Mat_TransMatMultDense *atb; 1970 MPI_Comm comm; 1971 PetscMPIInt size, *recvcounts; 1972 PetscScalar *carray, *sendbuf; 1973 const PetscScalar *atbarray; 1974 PetscInt i, cN = C->cmap->N, proc, k, j, lda; 1975 const PetscInt *ranges; 1976 1977 PetscFunctionBegin; 1978 MatCheckProduct(C, 3); 1979 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 1980 atb = (Mat_TransMatMultDense *)C->product->data; 1981 recvcounts = atb->recvcounts; 1982 sendbuf = atb->sendbuf; 1983 1984 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 1985 PetscCallMPI(MPI_Comm_size(comm, &size)); 1986 1987 /* compute atbarray = aseq^T * bseq */ 1988 PetscCall(MatTransposeMatMult(a->A, b->A, atb->atb ? MAT_REUSE_MATRIX : MAT_INITIAL_MATRIX, PETSC_DEFAULT, &atb->atb)); 1989 1990 PetscCall(MatGetOwnershipRanges(C, &ranges)); 1991 1992 /* arrange atbarray into sendbuf */ 1993 PetscCall(MatDenseGetArrayRead(atb->atb, &atbarray)); 1994 PetscCall(MatDenseGetLDA(atb->atb, &lda)); 1995 for (proc = 0, k = 0; proc < size; proc++) { 1996 for (j = 0; j < cN; j++) { 1997 for (i = ranges[proc]; i < ranges[proc + 1]; i++) sendbuf[k++] = atbarray[i + j * lda]; 1998 } 1999 } 2000 PetscCall(MatDenseRestoreArrayRead(atb->atb, &atbarray)); 2001 2002 /* sum all atbarray to local values of C */ 2003 PetscCall(MatDenseGetArrayWrite(c->A, &carray)); 2004 PetscCallMPI(MPI_Reduce_scatter(sendbuf, carray, recvcounts, MPIU_SCALAR, MPIU_SUM, comm)); 2005 PetscCall(MatDenseRestoreArrayWrite(c->A, &carray)); 2006 PetscCall(MatSetOption(C, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 2007 PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY)); 2008 PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY)); 2009 PetscFunctionReturn(PETSC_SUCCESS); 2010 } 2011 2012 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C) 2013 { 2014 MPI_Comm comm; 2015 PetscMPIInt size; 2016 PetscInt cm = A->cmap->n, cM, cN = B->cmap->N; 2017 Mat_TransMatMultDense *atb; 2018 PetscBool cisdense = PETSC_FALSE; 2019 PetscInt i; 2020 const PetscInt *ranges; 2021 2022 PetscFunctionBegin; 2023 MatCheckProduct(C, 4); 2024 PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty"); 2025 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 2026 if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) { 2027 SETERRQ(comm, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend); 2028 } 2029 2030 /* create matrix product C */ 2031 PetscCall(MatSetSizes(C, cm, B->cmap->n, A->cmap->N, B->cmap->N)); 2032 #if defined(PETSC_HAVE_CUDA) 2033 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSECUDA, "")); 2034 #endif 2035 #if defined(PETSC_HAVE_HIP) 2036 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSEHIP, "")); 2037 #endif 2038 if (!cisdense) PetscCall(MatSetType(C, ((PetscObject)A)->type_name)); 2039 PetscCall(MatSetUp(C)); 2040 2041 /* create data structure for reuse C */ 2042 PetscCallMPI(MPI_Comm_size(comm, &size)); 2043 PetscCall(PetscNew(&atb)); 2044 cM = C->rmap->N; 2045 PetscCall(PetscMalloc2(cM * cN, &atb->sendbuf, size, &atb->recvcounts)); 2046 PetscCall(MatGetOwnershipRanges(C, &ranges)); 2047 for (i = 0; i < size; i++) atb->recvcounts[i] = (ranges[i + 1] - ranges[i]) * cN; 2048 2049 C->product->data = atb; 2050 C->product->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense; 2051 PetscFunctionReturn(PETSC_SUCCESS); 2052 } 2053 2054 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C) 2055 { 2056 MPI_Comm comm; 2057 PetscMPIInt i, size; 2058 PetscInt maxRows, bufsiz; 2059 PetscMPIInt tag; 2060 PetscInt alg; 2061 Mat_MatTransMultDense *abt; 2062 Mat_Product *product = C->product; 2063 PetscBool flg; 2064 2065 PetscFunctionBegin; 2066 MatCheckProduct(C, 4); 2067 PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty"); 2068 /* check local size of A and B */ 2069 PetscCheck(A->cmap->n == B->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local column dimensions are incompatible, A (%" PetscInt_FMT ") != B (%" PetscInt_FMT ")", A->cmap->n, B->cmap->n); 2070 2071 PetscCall(PetscStrcmp(product->alg, "allgatherv", &flg)); 2072 alg = flg ? 0 : 1; 2073 2074 /* setup matrix product C */ 2075 PetscCall(MatSetSizes(C, A->rmap->n, B->rmap->n, A->rmap->N, B->rmap->N)); 2076 PetscCall(MatSetType(C, MATMPIDENSE)); 2077 PetscCall(MatSetUp(C)); 2078 PetscCall(PetscObjectGetNewTag((PetscObject)C, &tag)); 2079 2080 /* create data structure for reuse C */ 2081 PetscCall(PetscObjectGetComm((PetscObject)C, &comm)); 2082 PetscCallMPI(MPI_Comm_size(comm, &size)); 2083 PetscCall(PetscNew(&abt)); 2084 abt->tag = tag; 2085 abt->alg = alg; 2086 switch (alg) { 2087 case 1: /* alg: "cyclic" */ 2088 for (maxRows = 0, i = 0; i < size; i++) maxRows = PetscMax(maxRows, (B->rmap->range[i + 1] - B->rmap->range[i])); 2089 bufsiz = A->cmap->N * maxRows; 2090 PetscCall(PetscMalloc2(bufsiz, &(abt->buf[0]), bufsiz, &(abt->buf[1]))); 2091 break; 2092 default: /* alg: "allgatherv" */ 2093 PetscCall(PetscMalloc2(B->rmap->n * B->cmap->N, &(abt->buf[0]), B->rmap->N * B->cmap->N, &(abt->buf[1]))); 2094 PetscCall(PetscMalloc2(size, &(abt->recvcounts), size + 1, &(abt->recvdispls))); 2095 for (i = 0; i <= size; i++) abt->recvdispls[i] = B->rmap->range[i] * A->cmap->N; 2096 for (i = 0; i < size; i++) abt->recvcounts[i] = abt->recvdispls[i + 1] - abt->recvdispls[i]; 2097 break; 2098 } 2099 2100 C->product->data = abt; 2101 C->product->destroy = MatDestroy_MatMatTransMult_MPIDense_MPIDense; 2102 C->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_MPIDense_MPIDense; 2103 PetscFunctionReturn(PETSC_SUCCESS); 2104 } 2105 2106 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(Mat A, Mat B, Mat C) 2107 { 2108 Mat_MPIDense *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data; 2109 Mat_MatTransMultDense *abt; 2110 MPI_Comm comm; 2111 PetscMPIInt rank, size, sendsiz, recvsiz, sendto, recvfrom, recvisfrom; 2112 PetscScalar *sendbuf, *recvbuf = NULL, *cv; 2113 PetscInt i, cK = A->cmap->N, k, j, bn; 2114 PetscScalar _DOne = 1.0, _DZero = 0.0; 2115 const PetscScalar *av, *bv; 2116 PetscBLASInt cm, cn, ck, alda, blda = 0, clda; 2117 MPI_Request reqs[2]; 2118 const PetscInt *ranges; 2119 2120 PetscFunctionBegin; 2121 MatCheckProduct(C, 3); 2122 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 2123 abt = (Mat_MatTransMultDense *)C->product->data; 2124 PetscCall(PetscObjectGetComm((PetscObject)C, &comm)); 2125 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 2126 PetscCallMPI(MPI_Comm_size(comm, &size)); 2127 PetscCall(MatDenseGetArrayRead(a->A, &av)); 2128 PetscCall(MatDenseGetArrayRead(b->A, &bv)); 2129 PetscCall(MatDenseGetArrayWrite(c->A, &cv)); 2130 PetscCall(MatDenseGetLDA(a->A, &i)); 2131 PetscCall(PetscBLASIntCast(i, &alda)); 2132 PetscCall(MatDenseGetLDA(b->A, &i)); 2133 PetscCall(PetscBLASIntCast(i, &blda)); 2134 PetscCall(MatDenseGetLDA(c->A, &i)); 2135 PetscCall(PetscBLASIntCast(i, &clda)); 2136 PetscCall(MatGetOwnershipRanges(B, &ranges)); 2137 bn = B->rmap->n; 2138 if (blda == bn) { 2139 sendbuf = (PetscScalar *)bv; 2140 } else { 2141 sendbuf = abt->buf[0]; 2142 for (k = 0, i = 0; i < cK; i++) { 2143 for (j = 0; j < bn; j++, k++) sendbuf[k] = bv[i * blda + j]; 2144 } 2145 } 2146 if (size > 1) { 2147 sendto = (rank + size - 1) % size; 2148 recvfrom = (rank + size + 1) % size; 2149 } else { 2150 sendto = recvfrom = 0; 2151 } 2152 PetscCall(PetscBLASIntCast(cK, &ck)); 2153 PetscCall(PetscBLASIntCast(c->A->rmap->n, &cm)); 2154 recvisfrom = rank; 2155 for (i = 0; i < size; i++) { 2156 /* we have finished receiving in sending, bufs can be read/modified */ 2157 PetscInt nextrecvisfrom = (recvisfrom + 1) % size; /* which process the next recvbuf will originate on */ 2158 PetscInt nextbn = ranges[nextrecvisfrom + 1] - ranges[nextrecvisfrom]; 2159 2160 if (nextrecvisfrom != rank) { 2161 /* start the cyclic sends from sendbuf, to recvbuf (which will switch to sendbuf) */ 2162 sendsiz = cK * bn; 2163 recvsiz = cK * nextbn; 2164 recvbuf = (i & 1) ? abt->buf[0] : abt->buf[1]; 2165 PetscCallMPI(MPI_Isend(sendbuf, sendsiz, MPIU_SCALAR, sendto, abt->tag, comm, &reqs[0])); 2166 PetscCallMPI(MPI_Irecv(recvbuf, recvsiz, MPIU_SCALAR, recvfrom, abt->tag, comm, &reqs[1])); 2167 } 2168 2169 /* local aseq * sendbuf^T */ 2170 PetscCall(PetscBLASIntCast(ranges[recvisfrom + 1] - ranges[recvisfrom], &cn)); 2171 if (cm && cn && ck) PetscCallBLAS("BLASgemm", BLASgemm_("N", "T", &cm, &cn, &ck, &_DOne, av, &alda, sendbuf, &cn, &_DZero, cv + clda * ranges[recvisfrom], &clda)); 2172 2173 if (nextrecvisfrom != rank) { 2174 /* wait for the sends and receives to complete, swap sendbuf and recvbuf */ 2175 PetscCallMPI(MPI_Waitall(2, reqs, MPI_STATUSES_IGNORE)); 2176 } 2177 bn = nextbn; 2178 recvisfrom = nextrecvisfrom; 2179 sendbuf = recvbuf; 2180 } 2181 PetscCall(MatDenseRestoreArrayRead(a->A, &av)); 2182 PetscCall(MatDenseRestoreArrayRead(b->A, &bv)); 2183 PetscCall(MatDenseRestoreArrayWrite(c->A, &cv)); 2184 PetscCall(MatSetOption(C, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 2185 PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY)); 2186 PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY)); 2187 PetscFunctionReturn(PETSC_SUCCESS); 2188 } 2189 2190 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(Mat A, Mat B, Mat C) 2191 { 2192 Mat_MPIDense *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data; 2193 Mat_MatTransMultDense *abt; 2194 MPI_Comm comm; 2195 PetscMPIInt size; 2196 PetscScalar *cv, *sendbuf, *recvbuf; 2197 const PetscScalar *av, *bv; 2198 PetscInt blda, i, cK = A->cmap->N, k, j, bn; 2199 PetscScalar _DOne = 1.0, _DZero = 0.0; 2200 PetscBLASInt cm, cn, ck, alda, clda; 2201 2202 PetscFunctionBegin; 2203 MatCheckProduct(C, 3); 2204 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 2205 abt = (Mat_MatTransMultDense *)C->product->data; 2206 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 2207 PetscCallMPI(MPI_Comm_size(comm, &size)); 2208 PetscCall(MatDenseGetArrayRead(a->A, &av)); 2209 PetscCall(MatDenseGetArrayRead(b->A, &bv)); 2210 PetscCall(MatDenseGetArrayWrite(c->A, &cv)); 2211 PetscCall(MatDenseGetLDA(a->A, &i)); 2212 PetscCall(PetscBLASIntCast(i, &alda)); 2213 PetscCall(MatDenseGetLDA(b->A, &blda)); 2214 PetscCall(MatDenseGetLDA(c->A, &i)); 2215 PetscCall(PetscBLASIntCast(i, &clda)); 2216 /* copy transpose of B into buf[0] */ 2217 bn = B->rmap->n; 2218 sendbuf = abt->buf[0]; 2219 recvbuf = abt->buf[1]; 2220 for (k = 0, j = 0; j < bn; j++) { 2221 for (i = 0; i < cK; i++, k++) sendbuf[k] = bv[i * blda + j]; 2222 } 2223 PetscCall(MatDenseRestoreArrayRead(b->A, &bv)); 2224 PetscCallMPI(MPI_Allgatherv(sendbuf, bn * cK, MPIU_SCALAR, recvbuf, abt->recvcounts, abt->recvdispls, MPIU_SCALAR, comm)); 2225 PetscCall(PetscBLASIntCast(cK, &ck)); 2226 PetscCall(PetscBLASIntCast(c->A->rmap->n, &cm)); 2227 PetscCall(PetscBLASIntCast(c->A->cmap->n, &cn)); 2228 if (cm && cn && ck) PetscCallBLAS("BLASgemm", BLASgemm_("N", "N", &cm, &cn, &ck, &_DOne, av, &alda, recvbuf, &ck, &_DZero, cv, &clda)); 2229 PetscCall(MatDenseRestoreArrayRead(a->A, &av)); 2230 PetscCall(MatDenseRestoreArrayRead(b->A, &bv)); 2231 PetscCall(MatDenseRestoreArrayWrite(c->A, &cv)); 2232 PetscCall(MatSetOption(C, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 2233 PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY)); 2234 PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY)); 2235 PetscFunctionReturn(PETSC_SUCCESS); 2236 } 2237 2238 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C) 2239 { 2240 Mat_MatTransMultDense *abt; 2241 2242 PetscFunctionBegin; 2243 MatCheckProduct(C, 3); 2244 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 2245 abt = (Mat_MatTransMultDense *)C->product->data; 2246 switch (abt->alg) { 2247 case 1: 2248 PetscCall(MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(A, B, C)); 2249 break; 2250 default: 2251 PetscCall(MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(A, B, C)); 2252 break; 2253 } 2254 PetscFunctionReturn(PETSC_SUCCESS); 2255 } 2256 2257 static PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(void *data) 2258 { 2259 Mat_MatMultDense *ab = (Mat_MatMultDense *)data; 2260 2261 PetscFunctionBegin; 2262 PetscCall(MatDestroy(&ab->Ce)); 2263 PetscCall(MatDestroy(&ab->Ae)); 2264 PetscCall(MatDestroy(&ab->Be)); 2265 PetscCall(PetscFree(ab)); 2266 PetscFunctionReturn(PETSC_SUCCESS); 2267 } 2268 2269 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C) 2270 { 2271 Mat_MatMultDense *ab; 2272 Mat_MPIDense *mdn = (Mat_MPIDense *)A->data; 2273 2274 PetscFunctionBegin; 2275 MatCheckProduct(C, 3); 2276 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Missing product data"); 2277 ab = (Mat_MatMultDense *)C->product->data; 2278 if (ab->Ae && ab->Ce) { 2279 #if PetscDefined(HAVE_ELEMENTAL) 2280 PetscCall(MatConvert_MPIDense_Elemental(A, MATELEMENTAL, MAT_REUSE_MATRIX, &ab->Ae)); 2281 PetscCall(MatConvert_MPIDense_Elemental(B, MATELEMENTAL, MAT_REUSE_MATRIX, &ab->Be)); 2282 PetscCall(MatMatMultNumeric_Elemental(ab->Ae, ab->Be, ab->Ce)); 2283 PetscCall(MatConvert(ab->Ce, MATMPIDENSE, MAT_REUSE_MATRIX, &C)); 2284 #else 2285 SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "PETSC_HAVE_ELEMENTAL not defined"); 2286 #endif 2287 } else { 2288 const PetscScalar *read; 2289 PetscScalar *write; 2290 PetscInt lda; 2291 2292 PetscCall(MatDenseGetLDA(B, &lda)); 2293 PetscCall(MatDenseGetArrayRead(B, &read)); 2294 PetscCall(MatDenseGetArrayWrite(ab->Be, &write)); 2295 if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A)); /* cannot be done during the symbolic phase because of possible calls to MatProductReplaceMats() */ 2296 for (PetscInt i = 0; i < C->cmap->N; ++i) { 2297 PetscCall(PetscSFBcastBegin(mdn->Mvctx, MPIU_SCALAR, read + i * lda, write + i * ab->Be->rmap->n, MPI_REPLACE)); 2298 PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, read + i * lda, write + i * ab->Be->rmap->n, MPI_REPLACE)); 2299 } 2300 PetscCall(MatDenseRestoreArrayWrite(ab->Be, &write)); 2301 PetscCall(MatDenseRestoreArrayRead(B, &read)); 2302 PetscCall(MatMatMultNumeric_SeqDense_SeqDense(((Mat_MPIDense *)(A->data))->A, ab->Be, ((Mat_MPIDense *)(C->data))->A)); 2303 } 2304 PetscFunctionReturn(PETSC_SUCCESS); 2305 } 2306 2307 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C) 2308 { 2309 Mat_Product *product = C->product; 2310 PetscInt alg; 2311 Mat_MatMultDense *ab; 2312 PetscBool flg; 2313 2314 PetscFunctionBegin; 2315 MatCheckProduct(C, 4); 2316 PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty"); 2317 /* check local size of A and B */ 2318 PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ", %" PetscInt_FMT ")", 2319 A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend); 2320 2321 PetscCall(PetscStrcmp(product->alg, "petsc", &flg)); 2322 alg = flg ? 0 : 1; 2323 2324 /* setup C */ 2325 PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, A->rmap->N, B->cmap->N)); 2326 PetscCall(MatSetType(C, MATMPIDENSE)); 2327 PetscCall(MatSetUp(C)); 2328 2329 /* create data structure for reuse Cdense */ 2330 PetscCall(PetscNew(&ab)); 2331 2332 switch (alg) { 2333 case 1: /* alg: "elemental" */ 2334 #if PetscDefined(HAVE_ELEMENTAL) 2335 /* create elemental matrices Ae and Be */ 2336 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &ab->Ae)); 2337 PetscCall(MatSetSizes(ab->Ae, PETSC_DECIDE, PETSC_DECIDE, A->rmap->N, A->cmap->N)); 2338 PetscCall(MatSetType(ab->Ae, MATELEMENTAL)); 2339 PetscCall(MatSetUp(ab->Ae)); 2340 PetscCall(MatSetOption(ab->Ae, MAT_ROW_ORIENTED, PETSC_FALSE)); 2341 2342 PetscCall(MatCreate(PetscObjectComm((PetscObject)B), &ab->Be)); 2343 PetscCall(MatSetSizes(ab->Be, PETSC_DECIDE, PETSC_DECIDE, B->rmap->N, B->cmap->N)); 2344 PetscCall(MatSetType(ab->Be, MATELEMENTAL)); 2345 PetscCall(MatSetUp(ab->Be)); 2346 PetscCall(MatSetOption(ab->Be, MAT_ROW_ORIENTED, PETSC_FALSE)); 2347 2348 /* compute symbolic Ce = Ae*Be */ 2349 PetscCall(MatCreate(PetscObjectComm((PetscObject)C), &ab->Ce)); 2350 PetscCall(MatMatMultSymbolic_Elemental(ab->Ae, ab->Be, fill, ab->Ce)); 2351 #else 2352 SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "PETSC_HAVE_ELEMENTAL not defined"); 2353 #endif 2354 break; 2355 default: /* alg: "petsc" */ 2356 ab->Ae = NULL; 2357 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, A->cmap->N, B->cmap->N, NULL, &ab->Be)); 2358 ab->Ce = NULL; 2359 break; 2360 } 2361 2362 C->product->data = ab; 2363 C->product->destroy = MatDestroy_MatMatMult_MPIDense_MPIDense; 2364 C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense; 2365 PetscFunctionReturn(PETSC_SUCCESS); 2366 } 2367 2368 static PetscErrorCode MatProductSetFromOptions_MPIDense_AB(Mat C) 2369 { 2370 Mat_Product *product = C->product; 2371 const char *algTypes[2] = {"petsc", "elemental"}; 2372 PetscInt alg, nalg = PetscDefined(HAVE_ELEMENTAL) ? 2 : 1; 2373 PetscBool flg = PETSC_FALSE; 2374 PetscFunctionBegin; 2375 2376 /* Set default algorithm */ 2377 alg = 0; /* default is petsc */ 2378 PetscCall(PetscStrcmp(product->alg, "default", &flg)); 2379 if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg])); 2380 2381 /* Get runtime option */ 2382 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_AB", "Mat"); 2383 PetscCall(PetscOptionsEList("-mat_product_algorithm", "Algorithmic approach", "MatProduct_AB", algTypes, nalg, algTypes[alg], &alg, &flg)); 2384 PetscOptionsEnd(); 2385 if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg])); 2386 2387 C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIDense; 2388 C->ops->productsymbolic = MatProductSymbolic_AB; 2389 PetscFunctionReturn(PETSC_SUCCESS); 2390 } 2391 2392 static PetscErrorCode MatProductSetFromOptions_MPIDense_AtB(Mat C) 2393 { 2394 Mat_Product *product = C->product; 2395 Mat A = product->A, B = product->B; 2396 2397 PetscFunctionBegin; 2398 PetscCheck(A->rmap->rstart == B->rmap->rstart && A->rmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", 2399 A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend); 2400 C->ops->transposematmultsymbolic = MatTransposeMatMultSymbolic_MPIDense_MPIDense; 2401 C->ops->productsymbolic = MatProductSymbolic_AtB; 2402 PetscFunctionReturn(PETSC_SUCCESS); 2403 } 2404 2405 static PetscErrorCode MatProductSetFromOptions_MPIDense_ABt(Mat C) 2406 { 2407 Mat_Product *product = C->product; 2408 const char *algTypes[2] = {"allgatherv", "cyclic"}; 2409 PetscInt alg, nalg = 2; 2410 PetscBool flg = PETSC_FALSE; 2411 2412 PetscFunctionBegin; 2413 /* Set default algorithm */ 2414 alg = 0; /* default is allgatherv */ 2415 PetscCall(PetscStrcmp(product->alg, "default", &flg)); 2416 if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg])); 2417 2418 /* Get runtime option */ 2419 if (product->api_user) { 2420 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatMatTransposeMult", "Mat"); 2421 PetscCall(PetscOptionsEList("-matmattransmult_mpidense_mpidense_via", "Algorithmic approach", "MatMatTransposeMult", algTypes, nalg, algTypes[alg], &alg, &flg)); 2422 PetscOptionsEnd(); 2423 } else { 2424 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_ABt", "Mat"); 2425 PetscCall(PetscOptionsEList("-mat_product_algorithm", "Algorithmic approach", "MatProduct_ABt", algTypes, nalg, algTypes[alg], &alg, &flg)); 2426 PetscOptionsEnd(); 2427 } 2428 if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg])); 2429 2430 C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_MPIDense_MPIDense; 2431 C->ops->productsymbolic = MatProductSymbolic_ABt; 2432 PetscFunctionReturn(PETSC_SUCCESS); 2433 } 2434 2435 static PetscErrorCode MatProductSetFromOptions_MPIDense(Mat C) 2436 { 2437 Mat_Product *product = C->product; 2438 2439 PetscFunctionBegin; 2440 switch (product->type) { 2441 case MATPRODUCT_AB: 2442 PetscCall(MatProductSetFromOptions_MPIDense_AB(C)); 2443 break; 2444 case MATPRODUCT_AtB: 2445 PetscCall(MatProductSetFromOptions_MPIDense_AtB(C)); 2446 break; 2447 case MATPRODUCT_ABt: 2448 PetscCall(MatProductSetFromOptions_MPIDense_ABt(C)); 2449 break; 2450 default: 2451 break; 2452 } 2453 PetscFunctionReturn(PETSC_SUCCESS); 2454 } 2455