1 /* 2 Basic functions for basic parallel dense matrices. 3 Portions of this code are under: 4 Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. 5 */ 6 7 #include <../src/mat/impls/dense/mpi/mpidense.h> /*I "petscmat.h" I*/ 8 #include <../src/mat/impls/aij/mpi/mpiaij.h> 9 #include <petscblaslapack.h> 10 11 /*@ 12 MatDenseGetLocalMatrix - For a `MATMPIDENSE` or `MATSEQDENSE` matrix returns the sequential 13 matrix that represents the operator. For sequential matrices it returns itself. 14 15 Input Parameter: 16 . A - the sequential or MPI `MATDENSE` matrix 17 18 Output Parameter: 19 . B - the inner matrix 20 21 Level: intermediate 22 23 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MATMPIDENSE`, `MATSEQDENSE` 24 @*/ 25 PetscErrorCode MatDenseGetLocalMatrix(Mat A, Mat *B) 26 { 27 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 28 PetscBool flg; 29 30 PetscFunctionBegin; 31 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 32 PetscAssertPointer(B, 2); 33 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIDENSE, &flg)); 34 if (flg) *B = mat->A; 35 else { 36 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQDENSE, &flg)); 37 PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Not for matrix type %s", ((PetscObject)A)->type_name); 38 *B = A; 39 } 40 PetscFunctionReturn(PETSC_SUCCESS); 41 } 42 43 static PetscErrorCode MatCopy_MPIDense(Mat A, Mat B, MatStructure s) 44 { 45 Mat_MPIDense *Amat = (Mat_MPIDense *)A->data; 46 Mat_MPIDense *Bmat = (Mat_MPIDense *)B->data; 47 48 PetscFunctionBegin; 49 PetscCall(MatCopy(Amat->A, Bmat->A, s)); 50 PetscFunctionReturn(PETSC_SUCCESS); 51 } 52 53 PetscErrorCode MatShift_MPIDense(Mat A, PetscScalar alpha) 54 { 55 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 56 PetscInt j, lda, rstart = A->rmap->rstart, rend = A->rmap->rend, rend2; 57 PetscScalar *v; 58 59 PetscFunctionBegin; 60 PetscCall(MatDenseGetArray(mat->A, &v)); 61 PetscCall(MatDenseGetLDA(mat->A, &lda)); 62 rend2 = PetscMin(rend, A->cmap->N); 63 if (rend2 > rstart) { 64 for (j = rstart; j < rend2; j++) v[j - rstart + j * lda] += alpha; 65 PetscCall(PetscLogFlops(rend2 - rstart)); 66 } 67 PetscCall(MatDenseRestoreArray(mat->A, &v)); 68 PetscFunctionReturn(PETSC_SUCCESS); 69 } 70 71 static PetscErrorCode MatGetRow_MPIDense(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 72 { 73 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 74 PetscInt lrow, rstart = A->rmap->rstart, rend = A->rmap->rend; 75 76 PetscFunctionBegin; 77 PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "only local rows"); 78 lrow = row - rstart; 79 PetscCall(MatGetRow(mat->A, lrow, nz, (const PetscInt **)idx, (const PetscScalar **)v)); 80 PetscFunctionReturn(PETSC_SUCCESS); 81 } 82 83 static PetscErrorCode MatRestoreRow_MPIDense(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 84 { 85 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 86 PetscInt lrow, rstart = A->rmap->rstart, rend = A->rmap->rend; 87 88 PetscFunctionBegin; 89 PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "only local rows"); 90 lrow = row - rstart; 91 PetscCall(MatRestoreRow(mat->A, lrow, nz, (const PetscInt **)idx, (const PetscScalar **)v)); 92 PetscFunctionReturn(PETSC_SUCCESS); 93 } 94 95 static PetscErrorCode MatGetDiagonalBlock_MPIDense(Mat A, Mat *a) 96 { 97 Mat_MPIDense *mdn = (Mat_MPIDense *)A->data; 98 PetscInt m = A->rmap->n, rstart = A->rmap->rstart; 99 PetscScalar *array; 100 MPI_Comm comm; 101 PetscBool flg; 102 Mat B; 103 104 PetscFunctionBegin; 105 PetscCall(MatHasCongruentLayouts(A, &flg)); 106 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only square matrices supported."); 107 PetscCall(PetscObjectQuery((PetscObject)A, "DiagonalBlock", (PetscObject *)&B)); 108 if (!B) { /* This should use MatDenseGetSubMatrix (not create), but we would need a call like MatRestoreDiagonalBlock */ 109 #if PetscDefined(HAVE_CUDA) 110 PetscCall(PetscObjectTypeCompare((PetscObject)mdn->A, MATSEQDENSECUDA, &flg)); 111 PetscCheck(!flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature", MATSEQDENSECUDA); 112 #elif PetscDefined(HAVE_HIP) 113 PetscCall(PetscObjectTypeCompare((PetscObject)mdn->A, MATSEQDENSEHIP, &flg)); 114 PetscCheck(!flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature", MATSEQDENSEHIP); 115 #endif 116 PetscCall(PetscObjectGetComm((PetscObject)(mdn->A), &comm)); 117 PetscCall(MatCreate(comm, &B)); 118 PetscCall(MatSetSizes(B, m, m, m, m)); 119 PetscCall(MatSetType(B, ((PetscObject)mdn->A)->type_name)); 120 PetscCall(MatDenseGetArrayRead(mdn->A, (const PetscScalar **)&array)); 121 PetscCall(MatSeqDenseSetPreallocation(B, array + m * rstart)); 122 PetscCall(MatDenseRestoreArrayRead(mdn->A, (const PetscScalar **)&array)); 123 PetscCall(PetscObjectCompose((PetscObject)A, "DiagonalBlock", (PetscObject)B)); 124 *a = B; 125 PetscCall(MatDestroy(&B)); 126 } else *a = B; 127 PetscFunctionReturn(PETSC_SUCCESS); 128 } 129 130 static PetscErrorCode MatSetValues_MPIDense(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], const PetscScalar v[], InsertMode addv) 131 { 132 Mat_MPIDense *A = (Mat_MPIDense *)mat->data; 133 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend, row; 134 PetscBool roworiented = A->roworiented; 135 136 PetscFunctionBegin; 137 for (i = 0; i < m; i++) { 138 if (idxm[i] < 0) continue; 139 PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large"); 140 if (idxm[i] >= rstart && idxm[i] < rend) { 141 row = idxm[i] - rstart; 142 if (roworiented) { 143 PetscCall(MatSetValues(A->A, 1, &row, n, idxn, v + i * n, addv)); 144 } else { 145 for (j = 0; j < n; j++) { 146 if (idxn[j] < 0) continue; 147 PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large"); 148 PetscCall(MatSetValues(A->A, 1, &row, 1, &idxn[j], v + i + j * m, addv)); 149 } 150 } 151 } else if (!A->donotstash) { 152 mat->assembled = PETSC_FALSE; 153 if (roworiented) { 154 PetscCall(MatStashValuesRow_Private(&mat->stash, idxm[i], n, idxn, v + i * n, PETSC_FALSE)); 155 } else { 156 PetscCall(MatStashValuesCol_Private(&mat->stash, idxm[i], n, idxn, v + i, m, PETSC_FALSE)); 157 } 158 } 159 } 160 PetscFunctionReturn(PETSC_SUCCESS); 161 } 162 163 static PetscErrorCode MatGetValues_MPIDense(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[]) 164 { 165 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 166 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend, row; 167 168 PetscFunctionBegin; 169 for (i = 0; i < m; i++) { 170 if (idxm[i] < 0) continue; /* negative row */ 171 PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large"); 172 if (idxm[i] >= rstart && idxm[i] < rend) { 173 row = idxm[i] - rstart; 174 for (j = 0; j < n; j++) { 175 if (idxn[j] < 0) continue; /* negative column */ 176 PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large"); 177 PetscCall(MatGetValues(mdn->A, 1, &row, 1, &idxn[j], v + i * n + j)); 178 } 179 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported"); 180 } 181 PetscFunctionReturn(PETSC_SUCCESS); 182 } 183 184 static PetscErrorCode MatDenseGetLDA_MPIDense(Mat A, PetscInt *lda) 185 { 186 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 187 188 PetscFunctionBegin; 189 PetscCall(MatDenseGetLDA(a->A, lda)); 190 PetscFunctionReturn(PETSC_SUCCESS); 191 } 192 193 static PetscErrorCode MatDenseSetLDA_MPIDense(Mat A, PetscInt lda) 194 { 195 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 196 MatType mtype = MATSEQDENSE; 197 198 PetscFunctionBegin; 199 if (!a->A) { 200 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 201 PetscCall(PetscLayoutSetUp(A->rmap)); 202 PetscCall(PetscLayoutSetUp(A->cmap)); 203 PetscCall(MatCreate(PETSC_COMM_SELF, &a->A)); 204 PetscCall(MatSetSizes(a->A, A->rmap->n, A->cmap->N, A->rmap->n, A->cmap->N)); 205 #if PetscDefined(HAVE_CUDA) 206 PetscBool iscuda; 207 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIDENSECUDA, &iscuda)); 208 if (iscuda) mtype = MATSEQDENSECUDA; 209 #elif PetscDefined(HAVE_HIP) 210 PetscBool iship; 211 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIDENSEHIP, &iship)); 212 if (iship) mtype = MATSEQDENSEHIP; 213 #endif 214 PetscCall(MatSetType(a->A, mtype)); 215 } 216 PetscCall(MatDenseSetLDA(a->A, lda)); 217 PetscFunctionReturn(PETSC_SUCCESS); 218 } 219 220 static PetscErrorCode MatDenseGetArray_MPIDense(Mat A, PetscScalar **array) 221 { 222 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 223 224 PetscFunctionBegin; 225 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 226 PetscCall(MatDenseGetArray(a->A, array)); 227 PetscFunctionReturn(PETSC_SUCCESS); 228 } 229 230 static PetscErrorCode MatDenseGetArrayRead_MPIDense(Mat A, const PetscScalar **array) 231 { 232 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 233 234 PetscFunctionBegin; 235 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 236 PetscCall(MatDenseGetArrayRead(a->A, array)); 237 PetscFunctionReturn(PETSC_SUCCESS); 238 } 239 240 static PetscErrorCode MatDenseGetArrayWrite_MPIDense(Mat A, PetscScalar **array) 241 { 242 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 243 244 PetscFunctionBegin; 245 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 246 PetscCall(MatDenseGetArrayWrite(a->A, array)); 247 PetscFunctionReturn(PETSC_SUCCESS); 248 } 249 250 static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A, const PetscScalar *array) 251 { 252 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 253 254 PetscFunctionBegin; 255 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 256 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 257 PetscCall(MatDensePlaceArray(a->A, array)); 258 PetscFunctionReturn(PETSC_SUCCESS); 259 } 260 261 static PetscErrorCode MatDenseResetArray_MPIDense(Mat A) 262 { 263 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 264 265 PetscFunctionBegin; 266 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 267 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 268 PetscCall(MatDenseResetArray(a->A)); 269 PetscFunctionReturn(PETSC_SUCCESS); 270 } 271 272 static PetscErrorCode MatDenseReplaceArray_MPIDense(Mat A, const PetscScalar *array) 273 { 274 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 275 276 PetscFunctionBegin; 277 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 278 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 279 PetscCall(MatDenseReplaceArray(a->A, array)); 280 PetscFunctionReturn(PETSC_SUCCESS); 281 } 282 283 static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A, IS isrow, IS iscol, MatReuse scall, Mat *B) 284 { 285 Mat_MPIDense *mat = (Mat_MPIDense *)A->data, *newmatd; 286 PetscInt lda, i, j, rstart, rend, nrows, ncols, Ncols, nlrows, nlcols; 287 const PetscInt *irow, *icol; 288 const PetscScalar *v; 289 PetscScalar *bv; 290 Mat newmat; 291 IS iscol_local; 292 MPI_Comm comm_is, comm_mat; 293 294 PetscFunctionBegin; 295 PetscCall(PetscObjectGetComm((PetscObject)A, &comm_mat)); 296 PetscCall(PetscObjectGetComm((PetscObject)iscol, &comm_is)); 297 PetscCheck(comm_mat == comm_is, PETSC_COMM_SELF, PETSC_ERR_ARG_NOTSAMECOMM, "IS communicator must match matrix communicator"); 298 299 PetscCall(ISAllGather(iscol, &iscol_local)); 300 PetscCall(ISGetIndices(isrow, &irow)); 301 PetscCall(ISGetIndices(iscol_local, &icol)); 302 PetscCall(ISGetLocalSize(isrow, &nrows)); 303 PetscCall(ISGetLocalSize(iscol, &ncols)); 304 PetscCall(ISGetSize(iscol, &Ncols)); /* global number of columns, size of iscol_local */ 305 306 /* No parallel redistribution currently supported! Should really check each index set 307 to confirm that it is OK. ... Currently supports only submatrix same partitioning as 308 original matrix! */ 309 310 PetscCall(MatGetLocalSize(A, &nlrows, &nlcols)); 311 PetscCall(MatGetOwnershipRange(A, &rstart, &rend)); 312 313 /* Check submatrix call */ 314 if (scall == MAT_REUSE_MATRIX) { 315 /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */ 316 /* Really need to test rows and column sizes! */ 317 newmat = *B; 318 } else { 319 /* Create and fill new matrix */ 320 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &newmat)); 321 PetscCall(MatSetSizes(newmat, nrows, ncols, PETSC_DECIDE, Ncols)); 322 PetscCall(MatSetType(newmat, ((PetscObject)A)->type_name)); 323 PetscCall(MatMPIDenseSetPreallocation(newmat, NULL)); 324 } 325 326 /* Now extract the data pointers and do the copy, column at a time */ 327 newmatd = (Mat_MPIDense *)newmat->data; 328 PetscCall(MatDenseGetArray(newmatd->A, &bv)); 329 PetscCall(MatDenseGetArrayRead(mat->A, &v)); 330 PetscCall(MatDenseGetLDA(mat->A, &lda)); 331 for (i = 0; i < Ncols; i++) { 332 const PetscScalar *av = v + lda * icol[i]; 333 for (j = 0; j < nrows; j++) *bv++ = av[irow[j] - rstart]; 334 } 335 PetscCall(MatDenseRestoreArrayRead(mat->A, &v)); 336 PetscCall(MatDenseRestoreArray(newmatd->A, &bv)); 337 338 /* Assemble the matrices so that the correct flags are set */ 339 PetscCall(MatAssemblyBegin(newmat, MAT_FINAL_ASSEMBLY)); 340 PetscCall(MatAssemblyEnd(newmat, MAT_FINAL_ASSEMBLY)); 341 342 /* Free work space */ 343 PetscCall(ISRestoreIndices(isrow, &irow)); 344 PetscCall(ISRestoreIndices(iscol_local, &icol)); 345 PetscCall(ISDestroy(&iscol_local)); 346 *B = newmat; 347 PetscFunctionReturn(PETSC_SUCCESS); 348 } 349 350 static PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A, PetscScalar **array) 351 { 352 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 353 354 PetscFunctionBegin; 355 PetscCall(MatDenseRestoreArray(a->A, array)); 356 PetscFunctionReturn(PETSC_SUCCESS); 357 } 358 359 static PetscErrorCode MatDenseRestoreArrayRead_MPIDense(Mat A, const PetscScalar **array) 360 { 361 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 362 363 PetscFunctionBegin; 364 PetscCall(MatDenseRestoreArrayRead(a->A, array)); 365 PetscFunctionReturn(PETSC_SUCCESS); 366 } 367 368 static PetscErrorCode MatDenseRestoreArrayWrite_MPIDense(Mat A, PetscScalar **array) 369 { 370 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 371 372 PetscFunctionBegin; 373 PetscCall(MatDenseRestoreArrayWrite(a->A, array)); 374 PetscFunctionReturn(PETSC_SUCCESS); 375 } 376 377 static PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat, MatAssemblyType mode) 378 { 379 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 380 PetscInt nstash, reallocs; 381 382 PetscFunctionBegin; 383 if (mdn->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS); 384 385 PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range)); 386 PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs)); 387 PetscCall(PetscInfo(mdn->A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 388 PetscFunctionReturn(PETSC_SUCCESS); 389 } 390 391 static PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat, MatAssemblyType mode) 392 { 393 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 394 PetscInt i, *row, *col, flg, j, rstart, ncols; 395 PetscMPIInt n; 396 PetscScalar *val; 397 398 PetscFunctionBegin; 399 if (!mdn->donotstash && !mat->nooffprocentries) { 400 /* wait on receives */ 401 while (1) { 402 PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg)); 403 if (!flg) break; 404 405 for (i = 0; i < n;) { 406 /* Now identify the consecutive vals belonging to the same row */ 407 for (j = i, rstart = row[j]; j < n; j++) { 408 if (row[j] != rstart) break; 409 } 410 if (j < n) ncols = j - i; 411 else ncols = n - i; 412 /* Now assemble all these values with a single function call */ 413 PetscCall(MatSetValues_MPIDense(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode)); 414 i = j; 415 } 416 } 417 PetscCall(MatStashScatterEnd_Private(&mat->stash)); 418 } 419 420 PetscCall(MatAssemblyBegin(mdn->A, mode)); 421 PetscCall(MatAssemblyEnd(mdn->A, mode)); 422 PetscFunctionReturn(PETSC_SUCCESS); 423 } 424 425 static PetscErrorCode MatZeroEntries_MPIDense(Mat A) 426 { 427 Mat_MPIDense *l = (Mat_MPIDense *)A->data; 428 429 PetscFunctionBegin; 430 PetscCall(MatZeroEntries(l->A)); 431 PetscFunctionReturn(PETSC_SUCCESS); 432 } 433 434 static PetscErrorCode MatZeroRows_MPIDense(Mat A, PetscInt n, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 435 { 436 Mat_MPIDense *l = (Mat_MPIDense *)A->data; 437 PetscInt i, len, *lrows; 438 439 PetscFunctionBegin; 440 /* get locally owned rows */ 441 PetscCall(PetscLayoutMapLocal(A->rmap, n, rows, &len, &lrows, NULL)); 442 /* fix right hand side if needed */ 443 if (x && b) { 444 const PetscScalar *xx; 445 PetscScalar *bb; 446 447 PetscCall(VecGetArrayRead(x, &xx)); 448 PetscCall(VecGetArrayWrite(b, &bb)); 449 for (i = 0; i < len; ++i) bb[lrows[i]] = diag * xx[lrows[i]]; 450 PetscCall(VecRestoreArrayRead(x, &xx)); 451 PetscCall(VecRestoreArrayWrite(b, &bb)); 452 } 453 PetscCall(MatZeroRows(l->A, len, lrows, 0.0, NULL, NULL)); 454 if (diag != 0.0) { 455 Vec d; 456 457 PetscCall(MatCreateVecs(A, NULL, &d)); 458 PetscCall(VecSet(d, diag)); 459 PetscCall(MatDiagonalSet(A, d, INSERT_VALUES)); 460 PetscCall(VecDestroy(&d)); 461 } 462 PetscCall(PetscFree(lrows)); 463 PetscFunctionReturn(PETSC_SUCCESS); 464 } 465 466 static PetscErrorCode MatMult_MPIDense(Mat mat, Vec xx, Vec yy) 467 { 468 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 469 const PetscScalar *ax; 470 PetscScalar *ay; 471 PetscMemType axmtype, aymtype; 472 473 PetscFunctionBegin; 474 if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat)); 475 PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype)); 476 PetscCall(VecGetArrayAndMemType(mdn->lvec, &ay, &aymtype)); 477 PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE)); 478 PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE)); 479 PetscCall(VecRestoreArrayAndMemType(mdn->lvec, &ay)); 480 PetscCall(VecRestoreArrayReadAndMemType(xx, &ax)); 481 PetscCall((*mdn->A->ops->mult)(mdn->A, mdn->lvec, yy)); 482 PetscFunctionReturn(PETSC_SUCCESS); 483 } 484 485 static PetscErrorCode MatMultAdd_MPIDense(Mat mat, Vec xx, Vec yy, Vec zz) 486 { 487 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 488 const PetscScalar *ax; 489 PetscScalar *ay; 490 PetscMemType axmtype, aymtype; 491 492 PetscFunctionBegin; 493 if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat)); 494 PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype)); 495 PetscCall(VecGetArrayAndMemType(mdn->lvec, &ay, &aymtype)); 496 PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE)); 497 PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE)); 498 PetscCall(VecRestoreArrayAndMemType(mdn->lvec, &ay)); 499 PetscCall(VecRestoreArrayReadAndMemType(xx, &ax)); 500 PetscCall((*mdn->A->ops->multadd)(mdn->A, mdn->lvec, yy, zz)); 501 PetscFunctionReturn(PETSC_SUCCESS); 502 } 503 504 static PetscErrorCode MatMultTranspose_MPIDense(Mat A, Vec xx, Vec yy) 505 { 506 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 507 const PetscScalar *ax; 508 PetscScalar *ay; 509 PetscMemType axmtype, aymtype; 510 511 PetscFunctionBegin; 512 if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A)); 513 PetscCall(VecSet(yy, 0.0)); 514 PetscCall((*a->A->ops->multtranspose)(a->A, xx, a->lvec)); 515 PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype)); 516 PetscCall(VecGetArrayAndMemType(yy, &ay, &aymtype)); 517 PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM)); 518 PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM)); 519 PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax)); 520 PetscCall(VecRestoreArrayAndMemType(yy, &ay)); 521 PetscFunctionReturn(PETSC_SUCCESS); 522 } 523 524 static PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A, Vec xx, Vec yy, Vec zz) 525 { 526 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 527 const PetscScalar *ax; 528 PetscScalar *ay; 529 PetscMemType axmtype, aymtype; 530 531 PetscFunctionBegin; 532 if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A)); 533 PetscCall(VecCopy(yy, zz)); 534 PetscCall((*a->A->ops->multtranspose)(a->A, xx, a->lvec)); 535 PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype)); 536 PetscCall(VecGetArrayAndMemType(zz, &ay, &aymtype)); 537 PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM)); 538 PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM)); 539 PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax)); 540 PetscCall(VecRestoreArrayAndMemType(zz, &ay)); 541 PetscFunctionReturn(PETSC_SUCCESS); 542 } 543 544 PetscErrorCode MatGetDiagonal_MPIDense(Mat A, Vec v) 545 { 546 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 547 PetscInt lda, len, i, nl, ng, m = A->rmap->n, radd; 548 PetscScalar *x; 549 const PetscScalar *av; 550 551 PetscFunctionBegin; 552 PetscCall(VecGetArray(v, &x)); 553 PetscCall(VecGetSize(v, &ng)); 554 PetscCheck(ng == A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming mat and vec"); 555 PetscCall(VecGetLocalSize(v, &nl)); 556 len = PetscMin(a->A->rmap->n, a->A->cmap->n); 557 radd = A->rmap->rstart * m; 558 PetscCall(MatDenseGetArrayRead(a->A, &av)); 559 PetscCall(MatDenseGetLDA(a->A, &lda)); 560 for (i = 0; i < len; i++) x[i] = av[radd + i * lda + i]; 561 PetscCall(MatDenseRestoreArrayRead(a->A, &av)); 562 PetscCall(PetscArrayzero(x + i, nl - i)); 563 PetscCall(VecRestoreArray(v, &x)); 564 PetscFunctionReturn(PETSC_SUCCESS); 565 } 566 567 static PetscErrorCode MatDestroy_MPIDense(Mat mat) 568 { 569 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 570 571 PetscFunctionBegin; 572 PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N)); 573 PetscCall(MatStashDestroy_Private(&mat->stash)); 574 PetscCheck(!mdn->vecinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 575 PetscCheck(!mdn->matinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 576 PetscCall(MatDestroy(&mdn->A)); 577 PetscCall(VecDestroy(&mdn->lvec)); 578 PetscCall(PetscSFDestroy(&mdn->Mvctx)); 579 PetscCall(VecDestroy(&mdn->cvec)); 580 PetscCall(MatDestroy(&mdn->cmat)); 581 582 PetscCall(PetscFree(mat->data)); 583 PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL)); 584 585 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetLDA_C", NULL)); 586 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseSetLDA_C", NULL)); 587 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArray_C", NULL)); 588 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArray_C", NULL)); 589 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayRead_C", NULL)); 590 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayRead_C", NULL)); 591 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayWrite_C", NULL)); 592 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayWrite_C", NULL)); 593 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDensePlaceArray_C", NULL)); 594 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseResetArray_C", NULL)); 595 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseReplaceArray_C", NULL)); 596 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", NULL)); 597 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpiaij_C", NULL)); 598 #if defined(PETSC_HAVE_ELEMENTAL) 599 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_elemental_C", NULL)); 600 #endif 601 #if defined(PETSC_HAVE_SCALAPACK) 602 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_scalapack_C", NULL)); 603 #endif 604 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIDenseSetPreallocation_C", NULL)); 605 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidense_C", NULL)); 606 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaij_C", NULL)); 607 #if defined(PETSC_HAVE_CUDA) 608 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidense_C", NULL)); 609 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijcusparse_C", NULL)); 610 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensecuda_C", NULL)); 611 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidensecuda_mpidense_C", NULL)); 612 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidensecuda_C", NULL)); 613 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C", NULL)); 614 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensecuda_mpiaij_C", NULL)); 615 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C", NULL)); 616 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArray_C", NULL)); 617 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArrayRead_C", NULL)); 618 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArrayWrite_C", NULL)); 619 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArray_C", NULL)); 620 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArrayRead_C", NULL)); 621 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArrayWrite_C", NULL)); 622 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAPlaceArray_C", NULL)); 623 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAResetArray_C", NULL)); 624 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAReplaceArray_C", NULL)); 625 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDASetPreallocation_C", NULL)); 626 #endif 627 #if defined(PETSC_HAVE_HIP) 628 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidense_C", NULL)); 629 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijhipsparse_C", NULL)); 630 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensehip_C", NULL)); 631 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidensehip_mpidense_C", NULL)); 632 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidensehip_C", NULL)); 633 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidensehip_C", NULL)); 634 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensehip_mpiaij_C", NULL)); 635 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensehip_mpiaijhipsparse_C", NULL)); 636 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArray_C", NULL)); 637 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArrayRead_C", NULL)); 638 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArrayWrite_C", NULL)); 639 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArray_C", NULL)); 640 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArrayRead_C", NULL)); 641 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArrayWrite_C", NULL)); 642 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPPlaceArray_C", NULL)); 643 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPResetArray_C", NULL)); 644 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPReplaceArray_C", NULL)); 645 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPSetPreallocation_C", NULL)); 646 #endif 647 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumn_C", NULL)); 648 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumn_C", NULL)); 649 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", NULL)); 650 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", NULL)); 651 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", NULL)); 652 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", NULL)); 653 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", NULL)); 654 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", NULL)); 655 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetSubMatrix_C", NULL)); 656 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreSubMatrix_C", NULL)); 657 658 PetscCall(PetscObjectCompose((PetscObject)mat, "DiagonalBlock", NULL)); 659 PetscFunctionReturn(PETSC_SUCCESS); 660 } 661 662 #include <petscdraw.h> 663 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat, PetscViewer viewer) 664 { 665 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 666 PetscMPIInt rank; 667 PetscViewerType vtype; 668 PetscBool iascii, isdraw; 669 PetscViewer sviewer; 670 PetscViewerFormat format; 671 672 PetscFunctionBegin; 673 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 674 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 675 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 676 if (iascii) { 677 PetscCall(PetscViewerGetType(viewer, &vtype)); 678 PetscCall(PetscViewerGetFormat(viewer, &format)); 679 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 680 MatInfo info; 681 PetscCall(MatGetInfo(mat, MAT_LOCAL, &info)); 682 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 683 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %" PetscInt_FMT " \n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, 684 (PetscInt)info.memory)); 685 PetscCall(PetscViewerFlush(viewer)); 686 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 687 if (mdn->Mvctx) PetscCall(PetscSFView(mdn->Mvctx, viewer)); 688 PetscFunctionReturn(PETSC_SUCCESS); 689 } else if (format == PETSC_VIEWER_ASCII_INFO) { 690 PetscFunctionReturn(PETSC_SUCCESS); 691 } 692 } else if (isdraw) { 693 PetscDraw draw; 694 PetscBool isnull; 695 696 PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw)); 697 PetscCall(PetscDrawIsNull(draw, &isnull)); 698 if (isnull) PetscFunctionReturn(PETSC_SUCCESS); 699 } 700 701 { 702 /* assemble the entire matrix onto first processor. */ 703 Mat A; 704 PetscInt M = mat->rmap->N, N = mat->cmap->N, m, row, i, nz; 705 PetscInt *cols; 706 PetscScalar *vals; 707 708 PetscCall(MatCreate(PetscObjectComm((PetscObject)mat), &A)); 709 if (rank == 0) { 710 PetscCall(MatSetSizes(A, M, N, M, N)); 711 } else { 712 PetscCall(MatSetSizes(A, 0, 0, M, N)); 713 } 714 /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */ 715 PetscCall(MatSetType(A, MATMPIDENSE)); 716 PetscCall(MatMPIDenseSetPreallocation(A, NULL)); 717 718 /* Copy the matrix ... This isn't the most efficient means, 719 but it's quick for now */ 720 A->insertmode = INSERT_VALUES; 721 722 row = mat->rmap->rstart; 723 m = mdn->A->rmap->n; 724 for (i = 0; i < m; i++) { 725 PetscCall(MatGetRow_MPIDense(mat, row, &nz, &cols, &vals)); 726 PetscCall(MatSetValues_MPIDense(A, 1, &row, nz, cols, vals, INSERT_VALUES)); 727 PetscCall(MatRestoreRow_MPIDense(mat, row, &nz, &cols, &vals)); 728 row++; 729 } 730 731 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 732 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 733 PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 734 if (rank == 0) { 735 PetscCall(PetscObjectSetName((PetscObject)((Mat_MPIDense *)(A->data))->A, ((PetscObject)mat)->name)); 736 PetscCall(MatView_SeqDense(((Mat_MPIDense *)(A->data))->A, sviewer)); 737 } 738 PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 739 PetscCall(MatDestroy(&A)); 740 } 741 PetscFunctionReturn(PETSC_SUCCESS); 742 } 743 744 static PetscErrorCode MatView_MPIDense(Mat mat, PetscViewer viewer) 745 { 746 PetscBool iascii, isbinary, isdraw, issocket; 747 748 PetscFunctionBegin; 749 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 750 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 751 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket)); 752 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 753 754 if (iascii || issocket || isdraw) { 755 PetscCall(MatView_MPIDense_ASCIIorDraworSocket(mat, viewer)); 756 } else if (isbinary) PetscCall(MatView_Dense_Binary(mat, viewer)); 757 PetscFunctionReturn(PETSC_SUCCESS); 758 } 759 760 static PetscErrorCode MatGetInfo_MPIDense(Mat A, MatInfoType flag, MatInfo *info) 761 { 762 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 763 Mat mdn = mat->A; 764 PetscLogDouble isend[5], irecv[5]; 765 766 PetscFunctionBegin; 767 info->block_size = 1.0; 768 769 PetscCall(MatGetInfo(mdn, MAT_LOCAL, info)); 770 771 isend[0] = info->nz_used; 772 isend[1] = info->nz_allocated; 773 isend[2] = info->nz_unneeded; 774 isend[3] = info->memory; 775 isend[4] = info->mallocs; 776 if (flag == MAT_LOCAL) { 777 info->nz_used = isend[0]; 778 info->nz_allocated = isend[1]; 779 info->nz_unneeded = isend[2]; 780 info->memory = isend[3]; 781 info->mallocs = isend[4]; 782 } else if (flag == MAT_GLOBAL_MAX) { 783 PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)A))); 784 785 info->nz_used = irecv[0]; 786 info->nz_allocated = irecv[1]; 787 info->nz_unneeded = irecv[2]; 788 info->memory = irecv[3]; 789 info->mallocs = irecv[4]; 790 } else if (flag == MAT_GLOBAL_SUM) { 791 PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)A))); 792 793 info->nz_used = irecv[0]; 794 info->nz_allocated = irecv[1]; 795 info->nz_unneeded = irecv[2]; 796 info->memory = irecv[3]; 797 info->mallocs = irecv[4]; 798 } 799 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 800 info->fill_ratio_needed = 0; 801 info->factor_mallocs = 0; 802 PetscFunctionReturn(PETSC_SUCCESS); 803 } 804 805 static PetscErrorCode MatSetOption_MPIDense(Mat A, MatOption op, PetscBool flg) 806 { 807 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 808 809 PetscFunctionBegin; 810 switch (op) { 811 case MAT_NEW_NONZERO_LOCATIONS: 812 case MAT_NEW_NONZERO_LOCATION_ERR: 813 case MAT_NEW_NONZERO_ALLOCATION_ERR: 814 MatCheckPreallocated(A, 1); 815 PetscCall(MatSetOption(a->A, op, flg)); 816 break; 817 case MAT_ROW_ORIENTED: 818 MatCheckPreallocated(A, 1); 819 a->roworiented = flg; 820 PetscCall(MatSetOption(a->A, op, flg)); 821 break; 822 case MAT_FORCE_DIAGONAL_ENTRIES: 823 case MAT_KEEP_NONZERO_PATTERN: 824 case MAT_USE_HASH_TABLE: 825 case MAT_SORTED_FULL: 826 PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op])); 827 break; 828 case MAT_IGNORE_OFF_PROC_ENTRIES: 829 a->donotstash = flg; 830 break; 831 case MAT_SYMMETRIC: 832 case MAT_STRUCTURALLY_SYMMETRIC: 833 case MAT_HERMITIAN: 834 case MAT_SYMMETRY_ETERNAL: 835 case MAT_STRUCTURAL_SYMMETRY_ETERNAL: 836 case MAT_SPD: 837 case MAT_IGNORE_LOWER_TRIANGULAR: 838 case MAT_IGNORE_ZERO_ENTRIES: 839 case MAT_SPD_ETERNAL: 840 /* if the diagonal matrix is square it inherits some of the properties above */ 841 PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op])); 842 break; 843 default: 844 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %s", MatOptions[op]); 845 } 846 PetscFunctionReturn(PETSC_SUCCESS); 847 } 848 849 static PetscErrorCode MatDiagonalScale_MPIDense(Mat A, Vec ll, Vec rr) 850 { 851 Mat_MPIDense *mdn = (Mat_MPIDense *)A->data; 852 const PetscScalar *l; 853 PetscScalar x, *v, *vv, *r; 854 PetscInt i, j, s2a, s3a, s2, s3, m = mdn->A->rmap->n, n = mdn->A->cmap->n, lda; 855 856 PetscFunctionBegin; 857 PetscCall(MatDenseGetArray(mdn->A, &vv)); 858 PetscCall(MatDenseGetLDA(mdn->A, &lda)); 859 PetscCall(MatGetLocalSize(A, &s2, &s3)); 860 if (ll) { 861 PetscCall(VecGetLocalSize(ll, &s2a)); 862 PetscCheck(s2a == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Left scaling vector non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT, s2a, s2); 863 PetscCall(VecGetArrayRead(ll, &l)); 864 for (i = 0; i < m; i++) { 865 x = l[i]; 866 v = vv + i; 867 for (j = 0; j < n; j++) { 868 (*v) *= x; 869 v += lda; 870 } 871 } 872 PetscCall(VecRestoreArrayRead(ll, &l)); 873 PetscCall(PetscLogFlops(1.0 * n * m)); 874 } 875 if (rr) { 876 const PetscScalar *ar; 877 878 PetscCall(VecGetLocalSize(rr, &s3a)); 879 PetscCheck(s3a == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Right scaling vec non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT ".", s3a, s3); 880 PetscCall(VecGetArrayRead(rr, &ar)); 881 if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A)); 882 PetscCall(VecGetArray(mdn->lvec, &r)); 883 PetscCall(PetscSFBcastBegin(mdn->Mvctx, MPIU_SCALAR, ar, r, MPI_REPLACE)); 884 PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ar, r, MPI_REPLACE)); 885 PetscCall(VecRestoreArrayRead(rr, &ar)); 886 for (i = 0; i < n; i++) { 887 x = r[i]; 888 v = vv + i * lda; 889 for (j = 0; j < m; j++) (*v++) *= x; 890 } 891 PetscCall(VecRestoreArray(mdn->lvec, &r)); 892 PetscCall(PetscLogFlops(1.0 * n * m)); 893 } 894 PetscCall(MatDenseRestoreArray(mdn->A, &vv)); 895 PetscFunctionReturn(PETSC_SUCCESS); 896 } 897 898 static PetscErrorCode MatNorm_MPIDense(Mat A, NormType type, PetscReal *nrm) 899 { 900 Mat_MPIDense *mdn = (Mat_MPIDense *)A->data; 901 PetscInt i, j; 902 PetscMPIInt size; 903 PetscReal sum = 0.0; 904 const PetscScalar *av, *v; 905 906 PetscFunctionBegin; 907 PetscCall(MatDenseGetArrayRead(mdn->A, &av)); 908 v = av; 909 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 910 if (size == 1) { 911 PetscCall(MatNorm(mdn->A, type, nrm)); 912 } else { 913 if (type == NORM_FROBENIUS) { 914 for (i = 0; i < mdn->A->cmap->n * mdn->A->rmap->n; i++) { 915 sum += PetscRealPart(PetscConj(*v) * (*v)); 916 v++; 917 } 918 PetscCall(MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A))); 919 *nrm = PetscSqrtReal(*nrm); 920 PetscCall(PetscLogFlops(2.0 * mdn->A->cmap->n * mdn->A->rmap->n)); 921 } else if (type == NORM_1) { 922 PetscReal *tmp, *tmp2; 923 PetscCall(PetscCalloc2(A->cmap->N, &tmp, A->cmap->N, &tmp2)); 924 *nrm = 0.0; 925 v = av; 926 for (j = 0; j < mdn->A->cmap->n; j++) { 927 for (i = 0; i < mdn->A->rmap->n; i++) { 928 tmp[j] += PetscAbsScalar(*v); 929 v++; 930 } 931 } 932 PetscCall(MPIU_Allreduce(tmp, tmp2, A->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A))); 933 for (j = 0; j < A->cmap->N; j++) { 934 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 935 } 936 PetscCall(PetscFree2(tmp, tmp2)); 937 PetscCall(PetscLogFlops(A->cmap->n * A->rmap->n)); 938 } else if (type == NORM_INFINITY) { /* max row norm */ 939 PetscReal ntemp; 940 PetscCall(MatNorm(mdn->A, type, &ntemp)); 941 PetscCall(MPIU_Allreduce(&ntemp, nrm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A))); 942 } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "No support for two norm"); 943 } 944 PetscCall(MatDenseRestoreArrayRead(mdn->A, &av)); 945 PetscFunctionReturn(PETSC_SUCCESS); 946 } 947 948 static PetscErrorCode MatTranspose_MPIDense(Mat A, MatReuse reuse, Mat *matout) 949 { 950 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 951 Mat B; 952 PetscInt M = A->rmap->N, N = A->cmap->N, m, n, *rwork, rstart = A->rmap->rstart; 953 PetscInt j, i, lda; 954 PetscScalar *v; 955 956 PetscFunctionBegin; 957 if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout)); 958 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) { 959 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 960 PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M)); 961 PetscCall(MatSetType(B, ((PetscObject)A)->type_name)); 962 PetscCall(MatMPIDenseSetPreallocation(B, NULL)); 963 } else B = *matout; 964 965 m = a->A->rmap->n; 966 n = a->A->cmap->n; 967 PetscCall(MatDenseGetArrayRead(a->A, (const PetscScalar **)&v)); 968 PetscCall(MatDenseGetLDA(a->A, &lda)); 969 PetscCall(PetscMalloc1(m, &rwork)); 970 for (i = 0; i < m; i++) rwork[i] = rstart + i; 971 for (j = 0; j < n; j++) { 972 PetscCall(MatSetValues(B, 1, &j, m, rwork, v, INSERT_VALUES)); 973 v += lda; 974 } 975 PetscCall(MatDenseRestoreArrayRead(a->A, (const PetscScalar **)&v)); 976 PetscCall(PetscFree(rwork)); 977 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 978 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 979 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 980 *matout = B; 981 } else { 982 PetscCall(MatHeaderMerge(A, &B)); 983 } 984 PetscFunctionReturn(PETSC_SUCCESS); 985 } 986 987 static PetscErrorCode MatDuplicate_MPIDense(Mat, MatDuplicateOption, Mat *); 988 PETSC_INTERN PetscErrorCode MatScale_MPIDense(Mat, PetscScalar); 989 990 static PetscErrorCode MatSetUp_MPIDense(Mat A) 991 { 992 PetscFunctionBegin; 993 PetscCall(PetscLayoutSetUp(A->rmap)); 994 PetscCall(PetscLayoutSetUp(A->cmap)); 995 if (!A->preallocated) PetscCall(MatMPIDenseSetPreallocation(A, NULL)); 996 PetscFunctionReturn(PETSC_SUCCESS); 997 } 998 999 static PetscErrorCode MatAXPY_MPIDense(Mat Y, PetscScalar alpha, Mat X, MatStructure str) 1000 { 1001 Mat_MPIDense *A = (Mat_MPIDense *)Y->data, *B = (Mat_MPIDense *)X->data; 1002 1003 PetscFunctionBegin; 1004 PetscCall(MatAXPY(A->A, alpha, B->A, str)); 1005 PetscFunctionReturn(PETSC_SUCCESS); 1006 } 1007 1008 static PetscErrorCode MatConjugate_MPIDense(Mat mat) 1009 { 1010 Mat_MPIDense *a = (Mat_MPIDense *)mat->data; 1011 1012 PetscFunctionBegin; 1013 PetscCall(MatConjugate(a->A)); 1014 PetscFunctionReturn(PETSC_SUCCESS); 1015 } 1016 1017 static PetscErrorCode MatRealPart_MPIDense(Mat A) 1018 { 1019 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1020 1021 PetscFunctionBegin; 1022 PetscCall(MatRealPart(a->A)); 1023 PetscFunctionReturn(PETSC_SUCCESS); 1024 } 1025 1026 static PetscErrorCode MatImaginaryPart_MPIDense(Mat A) 1027 { 1028 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1029 1030 PetscFunctionBegin; 1031 PetscCall(MatImaginaryPart(a->A)); 1032 PetscFunctionReturn(PETSC_SUCCESS); 1033 } 1034 1035 static PetscErrorCode MatGetColumnVector_MPIDense(Mat A, Vec v, PetscInt col) 1036 { 1037 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1038 1039 PetscFunctionBegin; 1040 PetscCheck(a->A, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Missing local matrix"); 1041 PetscCheck(a->A->ops->getcolumnvector, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Missing get column operation"); 1042 PetscCall((*a->A->ops->getcolumnvector)(a->A, v, col)); 1043 PetscFunctionReturn(PETSC_SUCCESS); 1044 } 1045 1046 PETSC_INTERN PetscErrorCode MatGetColumnReductions_SeqDense(Mat, PetscInt, PetscReal *); 1047 1048 static PetscErrorCode MatGetColumnReductions_MPIDense(Mat A, PetscInt type, PetscReal *reductions) 1049 { 1050 PetscInt i, m, n; 1051 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1052 PetscReal *work; 1053 1054 PetscFunctionBegin; 1055 PetscCall(MatGetSize(A, &m, &n)); 1056 PetscCall(PetscMalloc1(n, &work)); 1057 if (type == REDUCTION_MEAN_REALPART) { 1058 PetscCall(MatGetColumnReductions_SeqDense(a->A, (PetscInt)REDUCTION_SUM_REALPART, work)); 1059 } else if (type == REDUCTION_MEAN_IMAGINARYPART) { 1060 PetscCall(MatGetColumnReductions_SeqDense(a->A, (PetscInt)REDUCTION_SUM_IMAGINARYPART, work)); 1061 } else { 1062 PetscCall(MatGetColumnReductions_SeqDense(a->A, type, work)); 1063 } 1064 if (type == NORM_2) { 1065 for (i = 0; i < n; i++) work[i] *= work[i]; 1066 } 1067 if (type == NORM_INFINITY) { 1068 PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_MAX, A->hdr.comm)); 1069 } else { 1070 PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_SUM, A->hdr.comm)); 1071 } 1072 PetscCall(PetscFree(work)); 1073 if (type == NORM_2) { 1074 for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]); 1075 } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 1076 for (i = 0; i < n; i++) reductions[i] /= m; 1077 } 1078 PetscFunctionReturn(PETSC_SUCCESS); 1079 } 1080 1081 static PetscErrorCode MatSetRandom_MPIDense(Mat x, PetscRandom rctx) 1082 { 1083 Mat_MPIDense *d = (Mat_MPIDense *)x->data; 1084 1085 PetscFunctionBegin; 1086 PetscCall(MatSetRandom(d->A, rctx)); 1087 #if defined(PETSC_HAVE_DEVICE) 1088 x->offloadmask = d->A->offloadmask; 1089 #endif 1090 PetscFunctionReturn(PETSC_SUCCESS); 1091 } 1092 1093 static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A, PetscBool *missing, PetscInt *d) 1094 { 1095 PetscFunctionBegin; 1096 *missing = PETSC_FALSE; 1097 PetscFunctionReturn(PETSC_SUCCESS); 1098 } 1099 1100 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat, Mat, PetscReal, Mat); 1101 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat, Mat, Mat); 1102 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat, Mat, PetscReal, Mat); 1103 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat, Mat, Mat); 1104 static PetscErrorCode MatEqual_MPIDense(Mat, Mat, PetscBool *); 1105 static PetscErrorCode MatLoad_MPIDense(Mat, PetscViewer); 1106 static PetscErrorCode MatProductSetFromOptions_MPIDense(Mat); 1107 1108 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense, 1109 MatGetRow_MPIDense, 1110 MatRestoreRow_MPIDense, 1111 MatMult_MPIDense, 1112 /* 4*/ MatMultAdd_MPIDense, 1113 MatMultTranspose_MPIDense, 1114 MatMultTransposeAdd_MPIDense, 1115 NULL, 1116 NULL, 1117 NULL, 1118 /* 10*/ NULL, 1119 NULL, 1120 NULL, 1121 NULL, 1122 MatTranspose_MPIDense, 1123 /* 15*/ MatGetInfo_MPIDense, 1124 MatEqual_MPIDense, 1125 MatGetDiagonal_MPIDense, 1126 MatDiagonalScale_MPIDense, 1127 MatNorm_MPIDense, 1128 /* 20*/ MatAssemblyBegin_MPIDense, 1129 MatAssemblyEnd_MPIDense, 1130 MatSetOption_MPIDense, 1131 MatZeroEntries_MPIDense, 1132 /* 24*/ MatZeroRows_MPIDense, 1133 NULL, 1134 NULL, 1135 NULL, 1136 NULL, 1137 /* 29*/ MatSetUp_MPIDense, 1138 NULL, 1139 NULL, 1140 MatGetDiagonalBlock_MPIDense, 1141 NULL, 1142 /* 34*/ MatDuplicate_MPIDense, 1143 NULL, 1144 NULL, 1145 NULL, 1146 NULL, 1147 /* 39*/ MatAXPY_MPIDense, 1148 MatCreateSubMatrices_MPIDense, 1149 NULL, 1150 MatGetValues_MPIDense, 1151 MatCopy_MPIDense, 1152 /* 44*/ NULL, 1153 MatScale_MPIDense, 1154 MatShift_MPIDense, 1155 NULL, 1156 NULL, 1157 /* 49*/ MatSetRandom_MPIDense, 1158 NULL, 1159 NULL, 1160 NULL, 1161 NULL, 1162 /* 54*/ NULL, 1163 NULL, 1164 NULL, 1165 NULL, 1166 NULL, 1167 /* 59*/ MatCreateSubMatrix_MPIDense, 1168 MatDestroy_MPIDense, 1169 MatView_MPIDense, 1170 NULL, 1171 NULL, 1172 /* 64*/ NULL, 1173 NULL, 1174 NULL, 1175 NULL, 1176 NULL, 1177 /* 69*/ NULL, 1178 NULL, 1179 NULL, 1180 NULL, 1181 NULL, 1182 /* 74*/ NULL, 1183 NULL, 1184 NULL, 1185 NULL, 1186 NULL, 1187 /* 79*/ NULL, 1188 NULL, 1189 NULL, 1190 NULL, 1191 /* 83*/ MatLoad_MPIDense, 1192 NULL, 1193 NULL, 1194 NULL, 1195 NULL, 1196 NULL, 1197 /* 89*/ NULL, 1198 NULL, 1199 NULL, 1200 NULL, 1201 NULL, 1202 /* 94*/ NULL, 1203 NULL, 1204 MatMatTransposeMultSymbolic_MPIDense_MPIDense, 1205 MatMatTransposeMultNumeric_MPIDense_MPIDense, 1206 NULL, 1207 /* 99*/ MatProductSetFromOptions_MPIDense, 1208 NULL, 1209 NULL, 1210 MatConjugate_MPIDense, 1211 NULL, 1212 /*104*/ NULL, 1213 MatRealPart_MPIDense, 1214 MatImaginaryPart_MPIDense, 1215 NULL, 1216 NULL, 1217 /*109*/ NULL, 1218 NULL, 1219 NULL, 1220 MatGetColumnVector_MPIDense, 1221 MatMissingDiagonal_MPIDense, 1222 /*114*/ NULL, 1223 NULL, 1224 NULL, 1225 NULL, 1226 NULL, 1227 /*119*/ NULL, 1228 NULL, 1229 NULL, 1230 NULL, 1231 NULL, 1232 /*124*/ NULL, 1233 MatGetColumnReductions_MPIDense, 1234 NULL, 1235 NULL, 1236 NULL, 1237 /*129*/ NULL, 1238 NULL, 1239 MatTransposeMatMultSymbolic_MPIDense_MPIDense, 1240 MatTransposeMatMultNumeric_MPIDense_MPIDense, 1241 NULL, 1242 /*134*/ NULL, 1243 NULL, 1244 NULL, 1245 NULL, 1246 NULL, 1247 /*139*/ NULL, 1248 NULL, 1249 NULL, 1250 NULL, 1251 NULL, 1252 MatCreateMPIMatConcatenateSeqMat_MPIDense, 1253 /*145*/ NULL, 1254 NULL, 1255 NULL, 1256 NULL, 1257 NULL, 1258 /*150*/ NULL, 1259 NULL}; 1260 1261 static PetscErrorCode MatMPIDenseSetPreallocation_MPIDense(Mat mat, PetscScalar *data) 1262 { 1263 Mat_MPIDense *a = (Mat_MPIDense *)mat->data; 1264 MatType mtype = MATSEQDENSE; 1265 1266 PetscFunctionBegin; 1267 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1268 PetscCall(PetscLayoutSetUp(mat->rmap)); 1269 PetscCall(PetscLayoutSetUp(mat->cmap)); 1270 if (!a->A) { 1271 PetscCall(MatCreate(PETSC_COMM_SELF, &a->A)); 1272 PetscCall(MatSetSizes(a->A, mat->rmap->n, mat->cmap->N, mat->rmap->n, mat->cmap->N)); 1273 } 1274 #if defined(PETSC_HAVE_CUDA) 1275 PetscBool iscuda; 1276 PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPIDENSECUDA, &iscuda)); 1277 if (iscuda) mtype = MATSEQDENSECUDA; 1278 #endif 1279 #if defined(PETSC_HAVE_HIP) 1280 PetscBool iship; 1281 PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPIDENSEHIP, &iship)); 1282 if (iship) mtype = MATSEQDENSEHIP; 1283 #endif 1284 PetscCall(MatSetType(a->A, mtype)); 1285 PetscCall(MatSeqDenseSetPreallocation(a->A, data)); 1286 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 1287 mat->offloadmask = a->A->offloadmask; 1288 #endif 1289 mat->preallocated = PETSC_TRUE; 1290 mat->assembled = PETSC_TRUE; 1291 PetscFunctionReturn(PETSC_SUCCESS); 1292 } 1293 1294 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIDense(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 1295 { 1296 Mat B, C; 1297 1298 PetscFunctionBegin; 1299 PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &C)); 1300 PetscCall(MatConvert_SeqAIJ_SeqDense(C, MATSEQDENSE, MAT_INITIAL_MATRIX, &B)); 1301 PetscCall(MatDestroy(&C)); 1302 if (reuse == MAT_REUSE_MATRIX) { 1303 C = *newmat; 1304 } else C = NULL; 1305 PetscCall(MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A), B, A->cmap->n, !C ? MAT_INITIAL_MATRIX : MAT_REUSE_MATRIX, &C)); 1306 PetscCall(MatDestroy(&B)); 1307 if (reuse == MAT_INPLACE_MATRIX) { 1308 PetscCall(MatHeaderReplace(A, &C)); 1309 } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C; 1310 PetscFunctionReturn(PETSC_SUCCESS); 1311 } 1312 1313 static PetscErrorCode MatConvert_MPIDense_MPIAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 1314 { 1315 Mat B, C; 1316 1317 PetscFunctionBegin; 1318 PetscCall(MatDenseGetLocalMatrix(A, &C)); 1319 PetscCall(MatConvert_SeqDense_SeqAIJ(C, MATSEQAIJ, MAT_INITIAL_MATRIX, &B)); 1320 if (reuse == MAT_REUSE_MATRIX) { 1321 C = *newmat; 1322 } else C = NULL; 1323 PetscCall(MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A), B, A->cmap->n, !C ? MAT_INITIAL_MATRIX : MAT_REUSE_MATRIX, &C)); 1324 PetscCall(MatDestroy(&B)); 1325 if (reuse == MAT_INPLACE_MATRIX) { 1326 PetscCall(MatHeaderReplace(A, &C)); 1327 } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C; 1328 PetscFunctionReturn(PETSC_SUCCESS); 1329 } 1330 1331 #if defined(PETSC_HAVE_ELEMENTAL) 1332 PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 1333 { 1334 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1335 Mat mat_elemental; 1336 PetscScalar *v; 1337 PetscInt m = A->rmap->n, N = A->cmap->N, rstart = A->rmap->rstart, i, *rows, *cols, lda; 1338 1339 PetscFunctionBegin; 1340 if (reuse == MAT_REUSE_MATRIX) { 1341 mat_elemental = *newmat; 1342 PetscCall(MatZeroEntries(*newmat)); 1343 } else { 1344 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental)); 1345 PetscCall(MatSetSizes(mat_elemental, PETSC_DECIDE, PETSC_DECIDE, A->rmap->N, A->cmap->N)); 1346 PetscCall(MatSetType(mat_elemental, MATELEMENTAL)); 1347 PetscCall(MatSetUp(mat_elemental)); 1348 PetscCall(MatSetOption(mat_elemental, MAT_ROW_ORIENTED, PETSC_FALSE)); 1349 } 1350 1351 PetscCall(PetscMalloc2(m, &rows, N, &cols)); 1352 for (i = 0; i < N; i++) cols[i] = i; 1353 for (i = 0; i < m; i++) rows[i] = rstart + i; 1354 1355 /* PETSc-Elemental interface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */ 1356 PetscCall(MatDenseGetArray(A, &v)); 1357 PetscCall(MatDenseGetLDA(a->A, &lda)); 1358 if (lda == m) PetscCall(MatSetValues(mat_elemental, m, rows, N, cols, v, ADD_VALUES)); 1359 else { 1360 for (i = 0; i < N; i++) PetscCall(MatSetValues(mat_elemental, m, rows, 1, &i, v + lda * i, ADD_VALUES)); 1361 } 1362 PetscCall(MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY)); 1363 PetscCall(MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY)); 1364 PetscCall(MatDenseRestoreArray(A, &v)); 1365 PetscCall(PetscFree2(rows, cols)); 1366 1367 if (reuse == MAT_INPLACE_MATRIX) { 1368 PetscCall(MatHeaderReplace(A, &mat_elemental)); 1369 } else { 1370 *newmat = mat_elemental; 1371 } 1372 PetscFunctionReturn(PETSC_SUCCESS); 1373 } 1374 #endif 1375 1376 static PetscErrorCode MatDenseGetColumn_MPIDense(Mat A, PetscInt col, PetscScalar **vals) 1377 { 1378 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 1379 1380 PetscFunctionBegin; 1381 PetscCall(MatDenseGetColumn(mat->A, col, vals)); 1382 PetscFunctionReturn(PETSC_SUCCESS); 1383 } 1384 1385 static PetscErrorCode MatDenseRestoreColumn_MPIDense(Mat A, PetscScalar **vals) 1386 { 1387 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 1388 1389 PetscFunctionBegin; 1390 PetscCall(MatDenseRestoreColumn(mat->A, vals)); 1391 PetscFunctionReturn(PETSC_SUCCESS); 1392 } 1393 1394 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIDense(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat) 1395 { 1396 Mat_MPIDense *mat; 1397 PetscInt m, nloc, N; 1398 1399 PetscFunctionBegin; 1400 PetscCall(MatGetSize(inmat, &m, &N)); 1401 PetscCall(MatGetLocalSize(inmat, NULL, &nloc)); 1402 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 1403 PetscInt sum; 1404 1405 if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnership(comm, &n, &N)); 1406 /* Check sum(n) = N */ 1407 PetscCall(MPIU_Allreduce(&n, &sum, 1, MPIU_INT, MPI_SUM, comm)); 1408 PetscCheck(sum == N, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT, sum, N); 1409 1410 PetscCall(MatCreateDense(comm, m, n, PETSC_DETERMINE, N, NULL, outmat)); 1411 PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 1412 } 1413 1414 /* numeric phase */ 1415 mat = (Mat_MPIDense *)(*outmat)->data; 1416 PetscCall(MatCopy(inmat, mat->A, SAME_NONZERO_PATTERN)); 1417 PetscFunctionReturn(PETSC_SUCCESS); 1418 } 1419 1420 PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat A, PetscInt col, Vec *v) 1421 { 1422 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1423 PetscInt lda; 1424 1425 PetscFunctionBegin; 1426 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1427 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1428 if (!a->cvec) PetscCall(MatDenseCreateColumnVec_Private(A, &a->cvec)); 1429 a->vecinuse = col + 1; 1430 PetscCall(MatDenseGetLDA(a->A, &lda)); 1431 PetscCall(MatDenseGetArray(a->A, (PetscScalar **)&a->ptrinuse)); 1432 PetscCall(VecPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda)); 1433 *v = a->cvec; 1434 PetscFunctionReturn(PETSC_SUCCESS); 1435 } 1436 1437 PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat A, PetscInt col, Vec *v) 1438 { 1439 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1440 1441 PetscFunctionBegin; 1442 PetscCheck(a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first"); 1443 PetscCheck(a->cvec, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing internal column vector"); 1444 a->vecinuse = 0; 1445 PetscCall(MatDenseRestoreArray(a->A, (PetscScalar **)&a->ptrinuse)); 1446 PetscCall(VecResetArray(a->cvec)); 1447 if (v) *v = NULL; 1448 PetscFunctionReturn(PETSC_SUCCESS); 1449 } 1450 1451 PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat A, PetscInt col, Vec *v) 1452 { 1453 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1454 PetscInt lda; 1455 1456 PetscFunctionBegin; 1457 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1458 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1459 if (!a->cvec) PetscCall(MatDenseCreateColumnVec_Private(A, &a->cvec)); 1460 a->vecinuse = col + 1; 1461 PetscCall(MatDenseGetLDA(a->A, &lda)); 1462 PetscCall(MatDenseGetArrayRead(a->A, &a->ptrinuse)); 1463 PetscCall(VecPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda)); 1464 PetscCall(VecLockReadPush(a->cvec)); 1465 *v = a->cvec; 1466 PetscFunctionReturn(PETSC_SUCCESS); 1467 } 1468 1469 PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat A, PetscInt col, Vec *v) 1470 { 1471 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1472 1473 PetscFunctionBegin; 1474 PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first"); 1475 PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector"); 1476 a->vecinuse = 0; 1477 PetscCall(MatDenseRestoreArrayRead(a->A, &a->ptrinuse)); 1478 PetscCall(VecLockReadPop(a->cvec)); 1479 PetscCall(VecResetArray(a->cvec)); 1480 if (v) *v = NULL; 1481 PetscFunctionReturn(PETSC_SUCCESS); 1482 } 1483 1484 PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat A, PetscInt col, Vec *v) 1485 { 1486 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1487 PetscInt lda; 1488 1489 PetscFunctionBegin; 1490 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1491 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1492 if (!a->cvec) PetscCall(MatDenseCreateColumnVec_Private(A, &a->cvec)); 1493 a->vecinuse = col + 1; 1494 PetscCall(MatDenseGetLDA(a->A, &lda)); 1495 PetscCall(MatDenseGetArrayWrite(a->A, (PetscScalar **)&a->ptrinuse)); 1496 PetscCall(VecPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda)); 1497 *v = a->cvec; 1498 PetscFunctionReturn(PETSC_SUCCESS); 1499 } 1500 1501 PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat A, PetscInt col, Vec *v) 1502 { 1503 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1504 1505 PetscFunctionBegin; 1506 PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first"); 1507 PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector"); 1508 a->vecinuse = 0; 1509 PetscCall(MatDenseRestoreArrayWrite(a->A, (PetscScalar **)&a->ptrinuse)); 1510 PetscCall(VecResetArray(a->cvec)); 1511 if (v) *v = NULL; 1512 PetscFunctionReturn(PETSC_SUCCESS); 1513 } 1514 1515 static PetscErrorCode MatDenseGetSubMatrix_MPIDense(Mat A, PetscInt rbegin, PetscInt rend, PetscInt cbegin, PetscInt cend, Mat *v) 1516 { 1517 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1518 Mat_MPIDense *c; 1519 MPI_Comm comm; 1520 PetscInt pbegin, pend; 1521 1522 PetscFunctionBegin; 1523 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 1524 PetscCheck(!a->vecinuse, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1525 PetscCheck(!a->matinuse, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1526 pbegin = PetscMax(0, PetscMin(A->rmap->rend, rbegin) - A->rmap->rstart); 1527 pend = PetscMin(A->rmap->n, PetscMax(0, rend - A->rmap->rstart)); 1528 if (!a->cmat) { 1529 PetscCall(MatCreate(comm, &a->cmat)); 1530 PetscCall(MatSetType(a->cmat, ((PetscObject)A)->type_name)); 1531 if (rend - rbegin == A->rmap->N) PetscCall(PetscLayoutReference(A->rmap, &a->cmat->rmap)); 1532 else { 1533 PetscCall(PetscLayoutSetLocalSize(a->cmat->rmap, pend - pbegin)); 1534 PetscCall(PetscLayoutSetSize(a->cmat->rmap, rend - rbegin)); 1535 PetscCall(PetscLayoutSetUp(a->cmat->rmap)); 1536 } 1537 PetscCall(PetscLayoutSetSize(a->cmat->cmap, cend - cbegin)); 1538 PetscCall(PetscLayoutSetUp(a->cmat->cmap)); 1539 } else { 1540 PetscBool same = (PetscBool)(rend - rbegin == a->cmat->rmap->N); 1541 if (same && a->cmat->rmap->N != A->rmap->N) { 1542 same = (PetscBool)(pend - pbegin == a->cmat->rmap->n); 1543 PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &same, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A))); 1544 } 1545 if (!same) { 1546 PetscCall(PetscLayoutDestroy(&a->cmat->rmap)); 1547 PetscCall(PetscLayoutCreate(comm, &a->cmat->rmap)); 1548 PetscCall(PetscLayoutSetLocalSize(a->cmat->rmap, pend - pbegin)); 1549 PetscCall(PetscLayoutSetSize(a->cmat->rmap, rend - rbegin)); 1550 PetscCall(PetscLayoutSetUp(a->cmat->rmap)); 1551 } 1552 if (cend - cbegin != a->cmat->cmap->N) { 1553 PetscCall(PetscLayoutDestroy(&a->cmat->cmap)); 1554 PetscCall(PetscLayoutCreate(comm, &a->cmat->cmap)); 1555 PetscCall(PetscLayoutSetSize(a->cmat->cmap, cend - cbegin)); 1556 PetscCall(PetscLayoutSetUp(a->cmat->cmap)); 1557 } 1558 } 1559 c = (Mat_MPIDense *)a->cmat->data; 1560 PetscCheck(!c->A, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1561 PetscCall(MatDenseGetSubMatrix(a->A, pbegin, pend, cbegin, cend, &c->A)); 1562 1563 a->cmat->preallocated = PETSC_TRUE; 1564 a->cmat->assembled = PETSC_TRUE; 1565 #if defined(PETSC_HAVE_DEVICE) 1566 a->cmat->offloadmask = c->A->offloadmask; 1567 #endif 1568 a->matinuse = cbegin + 1; 1569 *v = a->cmat; 1570 PetscFunctionReturn(PETSC_SUCCESS); 1571 } 1572 1573 static PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat A, Mat *v) 1574 { 1575 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1576 Mat_MPIDense *c; 1577 1578 PetscFunctionBegin; 1579 PetscCheck(a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetSubMatrix() first"); 1580 PetscCheck(a->cmat, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal matrix"); 1581 PetscCheck(*v == a->cmat, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Not the matrix obtained from MatDenseGetSubMatrix()"); 1582 a->matinuse = 0; 1583 c = (Mat_MPIDense *)a->cmat->data; 1584 PetscCall(MatDenseRestoreSubMatrix(a->A, &c->A)); 1585 if (v) *v = NULL; 1586 #if defined(PETSC_HAVE_DEVICE) 1587 A->offloadmask = a->A->offloadmask; 1588 #endif 1589 PetscFunctionReturn(PETSC_SUCCESS); 1590 } 1591 1592 /*MC 1593 MATMPIDENSE - MATMPIDENSE = "mpidense" - A matrix type to be used for distributed dense matrices. 1594 1595 Options Database Key: 1596 . -mat_type mpidense - sets the matrix type to `MATMPIDENSE` during a call to `MatSetFromOptions()` 1597 1598 Level: beginner 1599 1600 .seealso: [](ch_matrices), `Mat`, `MatCreateDense()`, `MATSEQDENSE`, `MATDENSE` 1601 M*/ 1602 PetscErrorCode MatCreate_MPIDense(Mat mat) 1603 { 1604 Mat_MPIDense *a; 1605 1606 PetscFunctionBegin; 1607 PetscCall(PetscNew(&a)); 1608 mat->data = (void *)a; 1609 mat->ops[0] = MatOps_Values; 1610 1611 mat->insertmode = NOT_SET_VALUES; 1612 1613 /* build cache for off array entries formed */ 1614 a->donotstash = PETSC_FALSE; 1615 1616 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)mat), 1, &mat->stash)); 1617 1618 /* stuff used for matrix vector multiply */ 1619 a->lvec = NULL; 1620 a->Mvctx = NULL; 1621 a->roworiented = PETSC_TRUE; 1622 1623 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetLDA_C", MatDenseGetLDA_MPIDense)); 1624 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseSetLDA_C", MatDenseSetLDA_MPIDense)); 1625 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArray_C", MatDenseGetArray_MPIDense)); 1626 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArray_C", MatDenseRestoreArray_MPIDense)); 1627 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayRead_C", MatDenseGetArrayRead_MPIDense)); 1628 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayRead_C", MatDenseRestoreArrayRead_MPIDense)); 1629 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayWrite_C", MatDenseGetArrayWrite_MPIDense)); 1630 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayWrite_C", MatDenseRestoreArrayWrite_MPIDense)); 1631 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDensePlaceArray_C", MatDensePlaceArray_MPIDense)); 1632 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseResetArray_C", MatDenseResetArray_MPIDense)); 1633 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseReplaceArray_C", MatDenseReplaceArray_MPIDense)); 1634 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", MatDenseGetColumnVec_MPIDense)); 1635 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", MatDenseRestoreColumnVec_MPIDense)); 1636 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", MatDenseGetColumnVecRead_MPIDense)); 1637 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", MatDenseRestoreColumnVecRead_MPIDense)); 1638 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", MatDenseGetColumnVecWrite_MPIDense)); 1639 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", MatDenseRestoreColumnVecWrite_MPIDense)); 1640 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetSubMatrix_C", MatDenseGetSubMatrix_MPIDense)); 1641 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreSubMatrix_C", MatDenseRestoreSubMatrix_MPIDense)); 1642 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", MatConvert_MPIAIJ_MPIDense)); 1643 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpiaij_C", MatConvert_MPIDense_MPIAIJ)); 1644 #if defined(PETSC_HAVE_ELEMENTAL) 1645 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_elemental_C", MatConvert_MPIDense_Elemental)); 1646 #endif 1647 #if defined(PETSC_HAVE_SCALAPACK) 1648 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_scalapack_C", MatConvert_Dense_ScaLAPACK)); 1649 #endif 1650 #if defined(PETSC_HAVE_CUDA) 1651 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensecuda_C", MatConvert_MPIDense_MPIDenseCUDA)); 1652 #endif 1653 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIDenseSetPreallocation_C", MatMPIDenseSetPreallocation_MPIDense)); 1654 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense)); 1655 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaij_C", MatProductSetFromOptions_MPIDense_MPIAIJ)); 1656 #if defined(PETSC_HAVE_CUDA) 1657 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense)); 1658 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijcusparse_C", MatProductSetFromOptions_MPIDense_MPIAIJ)); 1659 #endif 1660 #if defined(PETSC_HAVE_HIP) 1661 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensehip_C", MatConvert_MPIDense_MPIDenseHIP)); 1662 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense)); 1663 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijhipsparse_C", MatProductSetFromOptions_MPIDense_MPIAIJ)); 1664 #endif 1665 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumn_C", MatDenseGetColumn_MPIDense)); 1666 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumn_C", MatDenseRestoreColumn_MPIDense)); 1667 PetscCall(PetscObjectChangeTypeName((PetscObject)mat, MATMPIDENSE)); 1668 PetscFunctionReturn(PETSC_SUCCESS); 1669 } 1670 1671 /*MC 1672 MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices. 1673 1674 This matrix type is identical to `MATSEQDENSE` when constructed with a single process communicator, 1675 and `MATMPIDENSE` otherwise. 1676 1677 Options Database Key: 1678 . -mat_type dense - sets the matrix type to `MATDENSE` during a call to `MatSetFromOptions()` 1679 1680 Level: beginner 1681 1682 .seealso: [](ch_matrices), `Mat`, `MATSEQDENSE`, `MATMPIDENSE`, `MATDENSECUDA`, `MATDENSEHIP` 1683 M*/ 1684 1685 /*@C 1686 MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries 1687 1688 Collective 1689 1690 Input Parameters: 1691 + B - the matrix 1692 - data - optional location of matrix data. Set to `NULL` for PETSc 1693 to control all matrix memory allocation. 1694 1695 Level: intermediate 1696 1697 Notes: 1698 The dense format is fully compatible with standard Fortran 1699 storage by columns. 1700 1701 The data input variable is intended primarily for Fortran programmers 1702 who wish to allocate their own matrix memory space. Most users should 1703 set `data` to `NULL`. 1704 1705 .seealso: [](ch_matrices), `Mat`, `MATMPIDENSE`, `MatCreate()`, `MatCreateSeqDense()`, `MatSetValues()` 1706 @*/ 1707 PetscErrorCode MatMPIDenseSetPreallocation(Mat B, PetscScalar *data) 1708 { 1709 PetscFunctionBegin; 1710 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 1711 PetscTryMethod(B, "MatMPIDenseSetPreallocation_C", (Mat, PetscScalar *), (B, data)); 1712 PetscFunctionReturn(PETSC_SUCCESS); 1713 } 1714 1715 /*@ 1716 MatDensePlaceArray - Allows one to replace the array in a `MATDENSE` matrix with an 1717 array provided by the user. This is useful to avoid copying an array 1718 into a matrix 1719 1720 Not Collective 1721 1722 Input Parameters: 1723 + mat - the matrix 1724 - array - the array in column major order 1725 1726 Level: developer 1727 1728 Note: 1729 You can return to the original array with a call to `MatDenseResetArray()`. The user is responsible for freeing this array; it will not be 1730 freed when the matrix is destroyed. 1731 1732 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseGetArray()`, `MatDenseResetArray()`, `VecPlaceArray()`, `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`, `VecResetArray()`, 1733 `MatDenseReplaceArray()` 1734 @*/ 1735 PetscErrorCode MatDensePlaceArray(Mat mat, const PetscScalar *array) 1736 { 1737 PetscFunctionBegin; 1738 PetscValidHeaderSpecific(mat, MAT_CLASSID, 1); 1739 PetscUseMethod(mat, "MatDensePlaceArray_C", (Mat, const PetscScalar *), (mat, array)); 1740 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 1741 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 1742 mat->offloadmask = PETSC_OFFLOAD_CPU; 1743 #endif 1744 PetscFunctionReturn(PETSC_SUCCESS); 1745 } 1746 1747 /*@ 1748 MatDenseResetArray - Resets the matrix array to that it previously had before the call to `MatDensePlaceArray()` 1749 1750 Not Collective 1751 1752 Input Parameter: 1753 . mat - the matrix 1754 1755 Level: developer 1756 1757 Note: 1758 You can only call this after a call to `MatDensePlaceArray()` 1759 1760 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseGetArray()`, `MatDensePlaceArray()`, `VecPlaceArray()`, `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`, `VecResetArray()` 1761 @*/ 1762 PetscErrorCode MatDenseResetArray(Mat mat) 1763 { 1764 PetscFunctionBegin; 1765 PetscValidHeaderSpecific(mat, MAT_CLASSID, 1); 1766 PetscUseMethod(mat, "MatDenseResetArray_C", (Mat), (mat)); 1767 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 1768 PetscFunctionReturn(PETSC_SUCCESS); 1769 } 1770 1771 /*@ 1772 MatDenseReplaceArray - Allows one to replace the array in a dense matrix with an 1773 array provided by the user. This is useful to avoid copying an array 1774 into a matrix 1775 1776 Not Collective 1777 1778 Input Parameters: 1779 + mat - the matrix 1780 - array - the array in column major order 1781 1782 Level: developer 1783 1784 Note: 1785 The memory passed in MUST be obtained with `PetscMalloc()` and CANNOT be 1786 freed by the user. It will be freed when the matrix is destroyed. 1787 1788 .seealso: [](ch_matrices), `Mat`, `MatDensePlaceArray()`, `MatDenseGetArray()`, `VecReplaceArray()` 1789 @*/ 1790 PetscErrorCode MatDenseReplaceArray(Mat mat, const PetscScalar *array) 1791 { 1792 PetscFunctionBegin; 1793 PetscValidHeaderSpecific(mat, MAT_CLASSID, 1); 1794 PetscUseMethod(mat, "MatDenseReplaceArray_C", (Mat, const PetscScalar *), (mat, array)); 1795 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 1796 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 1797 mat->offloadmask = PETSC_OFFLOAD_CPU; 1798 #endif 1799 PetscFunctionReturn(PETSC_SUCCESS); 1800 } 1801 1802 /*@C 1803 MatCreateDense - Creates a matrix in `MATDENSE` format. 1804 1805 Collective 1806 1807 Input Parameters: 1808 + comm - MPI communicator 1809 . m - number of local rows (or `PETSC_DECIDE` to have calculated if `M` is given) 1810 . n - number of local columns (or `PETSC_DECIDE` to have calculated if `N` is given) 1811 . M - number of global rows (or `PETSC_DECIDE` to have calculated if `m` is given) 1812 . N - number of global columns (or `PETSC_DECIDE` to have calculated if `n` is given) 1813 - data - optional location of matrix data. Set data to `NULL` (`PETSC_NULL_SCALAR` for Fortran users) for PETSc 1814 to control all matrix memory allocation. 1815 1816 Output Parameter: 1817 . A - the matrix 1818 1819 Level: intermediate 1820 1821 Notes: 1822 The dense format is fully compatible with standard Fortran 1823 storage by columns. 1824 1825 Although local portions of the matrix are stored in column-major 1826 order, the matrix is partitioned across MPI ranks by row. 1827 1828 The data input variable is intended primarily for Fortran programmers 1829 who wish to allocate their own matrix memory space. Most users should 1830 set `data` to `NULL` (`PETSC_NULL_SCALAR` for Fortran users). 1831 1832 The user MUST specify either the local or global matrix dimensions 1833 (possibly both). 1834 1835 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatCreate()`, `MatCreateSeqDense()`, `MatSetValues()` 1836 @*/ 1837 PetscErrorCode MatCreateDense(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscScalar *data, Mat *A) 1838 { 1839 PetscFunctionBegin; 1840 PetscCall(MatCreate(comm, A)); 1841 PetscCall(MatSetSizes(*A, m, n, M, N)); 1842 PetscCall(MatSetType(*A, MATDENSE)); 1843 PetscCall(MatSeqDenseSetPreallocation(*A, data)); 1844 PetscCall(MatMPIDenseSetPreallocation(*A, data)); 1845 PetscFunctionReturn(PETSC_SUCCESS); 1846 } 1847 1848 static PetscErrorCode MatDuplicate_MPIDense(Mat A, MatDuplicateOption cpvalues, Mat *newmat) 1849 { 1850 Mat mat; 1851 Mat_MPIDense *a, *oldmat = (Mat_MPIDense *)A->data; 1852 1853 PetscFunctionBegin; 1854 *newmat = NULL; 1855 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat)); 1856 PetscCall(MatSetSizes(mat, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 1857 PetscCall(MatSetType(mat, ((PetscObject)A)->type_name)); 1858 a = (Mat_MPIDense *)mat->data; 1859 1860 mat->factortype = A->factortype; 1861 mat->assembled = PETSC_TRUE; 1862 mat->preallocated = PETSC_TRUE; 1863 1864 mat->insertmode = NOT_SET_VALUES; 1865 a->donotstash = oldmat->donotstash; 1866 1867 PetscCall(PetscLayoutReference(A->rmap, &mat->rmap)); 1868 PetscCall(PetscLayoutReference(A->cmap, &mat->cmap)); 1869 1870 PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A)); 1871 1872 *newmat = mat; 1873 PetscFunctionReturn(PETSC_SUCCESS); 1874 } 1875 1876 static PetscErrorCode MatLoad_MPIDense(Mat newMat, PetscViewer viewer) 1877 { 1878 PetscBool isbinary; 1879 #if defined(PETSC_HAVE_HDF5) 1880 PetscBool ishdf5; 1881 #endif 1882 1883 PetscFunctionBegin; 1884 PetscValidHeaderSpecific(newMat, MAT_CLASSID, 1); 1885 PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2); 1886 /* force binary viewer to load .info file if it has not yet done so */ 1887 PetscCall(PetscViewerSetUp(viewer)); 1888 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 1889 #if defined(PETSC_HAVE_HDF5) 1890 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5)); 1891 #endif 1892 if (isbinary) { 1893 PetscCall(MatLoad_Dense_Binary(newMat, viewer)); 1894 #if defined(PETSC_HAVE_HDF5) 1895 } else if (ishdf5) { 1896 PetscCall(MatLoad_Dense_HDF5(newMat, viewer)); 1897 #endif 1898 } else SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name); 1899 PetscFunctionReturn(PETSC_SUCCESS); 1900 } 1901 1902 static PetscErrorCode MatEqual_MPIDense(Mat A, Mat B, PetscBool *flag) 1903 { 1904 Mat_MPIDense *matB = (Mat_MPIDense *)B->data, *matA = (Mat_MPIDense *)A->data; 1905 Mat a, b; 1906 1907 PetscFunctionBegin; 1908 a = matA->A; 1909 b = matB->A; 1910 PetscCall(MatEqual(a, b, flag)); 1911 PetscCall(MPIU_Allreduce(MPI_IN_PLACE, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A))); 1912 PetscFunctionReturn(PETSC_SUCCESS); 1913 } 1914 1915 static PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(void *data) 1916 { 1917 Mat_TransMatMultDense *atb = (Mat_TransMatMultDense *)data; 1918 1919 PetscFunctionBegin; 1920 PetscCall(PetscFree2(atb->sendbuf, atb->recvcounts)); 1921 PetscCall(MatDestroy(&atb->atb)); 1922 PetscCall(PetscFree(atb)); 1923 PetscFunctionReturn(PETSC_SUCCESS); 1924 } 1925 1926 static PetscErrorCode MatDestroy_MatMatTransMult_MPIDense_MPIDense(void *data) 1927 { 1928 Mat_MatTransMultDense *abt = (Mat_MatTransMultDense *)data; 1929 1930 PetscFunctionBegin; 1931 PetscCall(PetscFree2(abt->buf[0], abt->buf[1])); 1932 PetscCall(PetscFree2(abt->recvcounts, abt->recvdispls)); 1933 PetscCall(PetscFree(abt)); 1934 PetscFunctionReturn(PETSC_SUCCESS); 1935 } 1936 1937 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C) 1938 { 1939 Mat_MPIDense *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data; 1940 Mat_TransMatMultDense *atb; 1941 MPI_Comm comm; 1942 PetscMPIInt size, *recvcounts; 1943 PetscScalar *carray, *sendbuf; 1944 const PetscScalar *atbarray; 1945 PetscInt i, cN = C->cmap->N, proc, k, j, lda; 1946 const PetscInt *ranges; 1947 1948 PetscFunctionBegin; 1949 MatCheckProduct(C, 3); 1950 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 1951 atb = (Mat_TransMatMultDense *)C->product->data; 1952 recvcounts = atb->recvcounts; 1953 sendbuf = atb->sendbuf; 1954 1955 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 1956 PetscCallMPI(MPI_Comm_size(comm, &size)); 1957 1958 /* compute atbarray = aseq^T * bseq */ 1959 PetscCall(MatTransposeMatMult(a->A, b->A, atb->atb ? MAT_REUSE_MATRIX : MAT_INITIAL_MATRIX, PETSC_DEFAULT, &atb->atb)); 1960 1961 PetscCall(MatGetOwnershipRanges(C, &ranges)); 1962 1963 /* arrange atbarray into sendbuf */ 1964 PetscCall(MatDenseGetArrayRead(atb->atb, &atbarray)); 1965 PetscCall(MatDenseGetLDA(atb->atb, &lda)); 1966 for (proc = 0, k = 0; proc < size; proc++) { 1967 for (j = 0; j < cN; j++) { 1968 for (i = ranges[proc]; i < ranges[proc + 1]; i++) sendbuf[k++] = atbarray[i + j * lda]; 1969 } 1970 } 1971 PetscCall(MatDenseRestoreArrayRead(atb->atb, &atbarray)); 1972 1973 /* sum all atbarray to local values of C */ 1974 PetscCall(MatDenseGetArrayWrite(c->A, &carray)); 1975 PetscCallMPI(MPI_Reduce_scatter(sendbuf, carray, recvcounts, MPIU_SCALAR, MPIU_SUM, comm)); 1976 PetscCall(MatDenseRestoreArrayWrite(c->A, &carray)); 1977 PetscCall(MatSetOption(C, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 1978 PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY)); 1979 PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY)); 1980 PetscFunctionReturn(PETSC_SUCCESS); 1981 } 1982 1983 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C) 1984 { 1985 MPI_Comm comm; 1986 PetscMPIInt size; 1987 PetscInt cm = A->cmap->n, cM, cN = B->cmap->N; 1988 Mat_TransMatMultDense *atb; 1989 PetscBool cisdense = PETSC_FALSE; 1990 PetscInt i; 1991 const PetscInt *ranges; 1992 1993 PetscFunctionBegin; 1994 MatCheckProduct(C, 4); 1995 PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty"); 1996 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 1997 if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) { 1998 SETERRQ(comm, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend); 1999 } 2000 2001 /* create matrix product C */ 2002 PetscCall(MatSetSizes(C, cm, B->cmap->n, A->cmap->N, B->cmap->N)); 2003 #if defined(PETSC_HAVE_CUDA) 2004 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSECUDA, "")); 2005 #endif 2006 #if defined(PETSC_HAVE_HIP) 2007 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSEHIP, "")); 2008 #endif 2009 if (!cisdense) PetscCall(MatSetType(C, ((PetscObject)A)->type_name)); 2010 PetscCall(MatSetUp(C)); 2011 2012 /* create data structure for reuse C */ 2013 PetscCallMPI(MPI_Comm_size(comm, &size)); 2014 PetscCall(PetscNew(&atb)); 2015 cM = C->rmap->N; 2016 PetscCall(PetscMalloc2(cM * cN, &atb->sendbuf, size, &atb->recvcounts)); 2017 PetscCall(MatGetOwnershipRanges(C, &ranges)); 2018 for (i = 0; i < size; i++) atb->recvcounts[i] = (ranges[i + 1] - ranges[i]) * cN; 2019 2020 C->product->data = atb; 2021 C->product->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense; 2022 PetscFunctionReturn(PETSC_SUCCESS); 2023 } 2024 2025 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C) 2026 { 2027 MPI_Comm comm; 2028 PetscMPIInt i, size; 2029 PetscInt maxRows, bufsiz; 2030 PetscMPIInt tag; 2031 PetscInt alg; 2032 Mat_MatTransMultDense *abt; 2033 Mat_Product *product = C->product; 2034 PetscBool flg; 2035 2036 PetscFunctionBegin; 2037 MatCheckProduct(C, 4); 2038 PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty"); 2039 /* check local size of A and B */ 2040 PetscCheck(A->cmap->n == B->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local column dimensions are incompatible, A (%" PetscInt_FMT ") != B (%" PetscInt_FMT ")", A->cmap->n, B->cmap->n); 2041 2042 PetscCall(PetscStrcmp(product->alg, "allgatherv", &flg)); 2043 alg = flg ? 0 : 1; 2044 2045 /* setup matrix product C */ 2046 PetscCall(MatSetSizes(C, A->rmap->n, B->rmap->n, A->rmap->N, B->rmap->N)); 2047 PetscCall(MatSetType(C, MATMPIDENSE)); 2048 PetscCall(MatSetUp(C)); 2049 PetscCall(PetscObjectGetNewTag((PetscObject)C, &tag)); 2050 2051 /* create data structure for reuse C */ 2052 PetscCall(PetscObjectGetComm((PetscObject)C, &comm)); 2053 PetscCallMPI(MPI_Comm_size(comm, &size)); 2054 PetscCall(PetscNew(&abt)); 2055 abt->tag = tag; 2056 abt->alg = alg; 2057 switch (alg) { 2058 case 1: /* alg: "cyclic" */ 2059 for (maxRows = 0, i = 0; i < size; i++) maxRows = PetscMax(maxRows, (B->rmap->range[i + 1] - B->rmap->range[i])); 2060 bufsiz = A->cmap->N * maxRows; 2061 PetscCall(PetscMalloc2(bufsiz, &(abt->buf[0]), bufsiz, &(abt->buf[1]))); 2062 break; 2063 default: /* alg: "allgatherv" */ 2064 PetscCall(PetscMalloc2(B->rmap->n * B->cmap->N, &(abt->buf[0]), B->rmap->N * B->cmap->N, &(abt->buf[1]))); 2065 PetscCall(PetscMalloc2(size, &(abt->recvcounts), size + 1, &(abt->recvdispls))); 2066 for (i = 0; i <= size; i++) abt->recvdispls[i] = B->rmap->range[i] * A->cmap->N; 2067 for (i = 0; i < size; i++) abt->recvcounts[i] = abt->recvdispls[i + 1] - abt->recvdispls[i]; 2068 break; 2069 } 2070 2071 C->product->data = abt; 2072 C->product->destroy = MatDestroy_MatMatTransMult_MPIDense_MPIDense; 2073 C->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_MPIDense_MPIDense; 2074 PetscFunctionReturn(PETSC_SUCCESS); 2075 } 2076 2077 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(Mat A, Mat B, Mat C) 2078 { 2079 Mat_MPIDense *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data; 2080 Mat_MatTransMultDense *abt; 2081 MPI_Comm comm; 2082 PetscMPIInt rank, size, sendsiz, recvsiz, sendto, recvfrom, recvisfrom; 2083 PetscScalar *sendbuf, *recvbuf = NULL, *cv; 2084 PetscInt i, cK = A->cmap->N, k, j, bn; 2085 PetscScalar _DOne = 1.0, _DZero = 0.0; 2086 const PetscScalar *av, *bv; 2087 PetscBLASInt cm, cn, ck, alda, blda = 0, clda; 2088 MPI_Request reqs[2]; 2089 const PetscInt *ranges; 2090 2091 PetscFunctionBegin; 2092 MatCheckProduct(C, 3); 2093 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 2094 abt = (Mat_MatTransMultDense *)C->product->data; 2095 PetscCall(PetscObjectGetComm((PetscObject)C, &comm)); 2096 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 2097 PetscCallMPI(MPI_Comm_size(comm, &size)); 2098 PetscCall(MatDenseGetArrayRead(a->A, &av)); 2099 PetscCall(MatDenseGetArrayRead(b->A, &bv)); 2100 PetscCall(MatDenseGetArrayWrite(c->A, &cv)); 2101 PetscCall(MatDenseGetLDA(a->A, &i)); 2102 PetscCall(PetscBLASIntCast(i, &alda)); 2103 PetscCall(MatDenseGetLDA(b->A, &i)); 2104 PetscCall(PetscBLASIntCast(i, &blda)); 2105 PetscCall(MatDenseGetLDA(c->A, &i)); 2106 PetscCall(PetscBLASIntCast(i, &clda)); 2107 PetscCall(MatGetOwnershipRanges(B, &ranges)); 2108 bn = B->rmap->n; 2109 if (blda == bn) { 2110 sendbuf = (PetscScalar *)bv; 2111 } else { 2112 sendbuf = abt->buf[0]; 2113 for (k = 0, i = 0; i < cK; i++) { 2114 for (j = 0; j < bn; j++, k++) sendbuf[k] = bv[i * blda + j]; 2115 } 2116 } 2117 if (size > 1) { 2118 sendto = (rank + size - 1) % size; 2119 recvfrom = (rank + size + 1) % size; 2120 } else { 2121 sendto = recvfrom = 0; 2122 } 2123 PetscCall(PetscBLASIntCast(cK, &ck)); 2124 PetscCall(PetscBLASIntCast(c->A->rmap->n, &cm)); 2125 recvisfrom = rank; 2126 for (i = 0; i < size; i++) { 2127 /* we have finished receiving in sending, bufs can be read/modified */ 2128 PetscInt nextrecvisfrom = (recvisfrom + 1) % size; /* which process the next recvbuf will originate on */ 2129 PetscInt nextbn = ranges[nextrecvisfrom + 1] - ranges[nextrecvisfrom]; 2130 2131 if (nextrecvisfrom != rank) { 2132 /* start the cyclic sends from sendbuf, to recvbuf (which will switch to sendbuf) */ 2133 sendsiz = cK * bn; 2134 recvsiz = cK * nextbn; 2135 recvbuf = (i & 1) ? abt->buf[0] : abt->buf[1]; 2136 PetscCallMPI(MPI_Isend(sendbuf, sendsiz, MPIU_SCALAR, sendto, abt->tag, comm, &reqs[0])); 2137 PetscCallMPI(MPI_Irecv(recvbuf, recvsiz, MPIU_SCALAR, recvfrom, abt->tag, comm, &reqs[1])); 2138 } 2139 2140 /* local aseq * sendbuf^T */ 2141 PetscCall(PetscBLASIntCast(ranges[recvisfrom + 1] - ranges[recvisfrom], &cn)); 2142 if (cm && cn && ck) PetscCallBLAS("BLASgemm", BLASgemm_("N", "T", &cm, &cn, &ck, &_DOne, av, &alda, sendbuf, &cn, &_DZero, cv + clda * ranges[recvisfrom], &clda)); 2143 2144 if (nextrecvisfrom != rank) { 2145 /* wait for the sends and receives to complete, swap sendbuf and recvbuf */ 2146 PetscCallMPI(MPI_Waitall(2, reqs, MPI_STATUSES_IGNORE)); 2147 } 2148 bn = nextbn; 2149 recvisfrom = nextrecvisfrom; 2150 sendbuf = recvbuf; 2151 } 2152 PetscCall(MatDenseRestoreArrayRead(a->A, &av)); 2153 PetscCall(MatDenseRestoreArrayRead(b->A, &bv)); 2154 PetscCall(MatDenseRestoreArrayWrite(c->A, &cv)); 2155 PetscCall(MatSetOption(C, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 2156 PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY)); 2157 PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY)); 2158 PetscFunctionReturn(PETSC_SUCCESS); 2159 } 2160 2161 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(Mat A, Mat B, Mat C) 2162 { 2163 Mat_MPIDense *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data; 2164 Mat_MatTransMultDense *abt; 2165 MPI_Comm comm; 2166 PetscMPIInt size; 2167 PetscScalar *cv, *sendbuf, *recvbuf; 2168 const PetscScalar *av, *bv; 2169 PetscInt blda, i, cK = A->cmap->N, k, j, bn; 2170 PetscScalar _DOne = 1.0, _DZero = 0.0; 2171 PetscBLASInt cm, cn, ck, alda, clda; 2172 2173 PetscFunctionBegin; 2174 MatCheckProduct(C, 3); 2175 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 2176 abt = (Mat_MatTransMultDense *)C->product->data; 2177 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 2178 PetscCallMPI(MPI_Comm_size(comm, &size)); 2179 PetscCall(MatDenseGetArrayRead(a->A, &av)); 2180 PetscCall(MatDenseGetArrayRead(b->A, &bv)); 2181 PetscCall(MatDenseGetArrayWrite(c->A, &cv)); 2182 PetscCall(MatDenseGetLDA(a->A, &i)); 2183 PetscCall(PetscBLASIntCast(i, &alda)); 2184 PetscCall(MatDenseGetLDA(b->A, &blda)); 2185 PetscCall(MatDenseGetLDA(c->A, &i)); 2186 PetscCall(PetscBLASIntCast(i, &clda)); 2187 /* copy transpose of B into buf[0] */ 2188 bn = B->rmap->n; 2189 sendbuf = abt->buf[0]; 2190 recvbuf = abt->buf[1]; 2191 for (k = 0, j = 0; j < bn; j++) { 2192 for (i = 0; i < cK; i++, k++) sendbuf[k] = bv[i * blda + j]; 2193 } 2194 PetscCall(MatDenseRestoreArrayRead(b->A, &bv)); 2195 PetscCallMPI(MPI_Allgatherv(sendbuf, bn * cK, MPIU_SCALAR, recvbuf, abt->recvcounts, abt->recvdispls, MPIU_SCALAR, comm)); 2196 PetscCall(PetscBLASIntCast(cK, &ck)); 2197 PetscCall(PetscBLASIntCast(c->A->rmap->n, &cm)); 2198 PetscCall(PetscBLASIntCast(c->A->cmap->n, &cn)); 2199 if (cm && cn && ck) PetscCallBLAS("BLASgemm", BLASgemm_("N", "N", &cm, &cn, &ck, &_DOne, av, &alda, recvbuf, &ck, &_DZero, cv, &clda)); 2200 PetscCall(MatDenseRestoreArrayRead(a->A, &av)); 2201 PetscCall(MatDenseRestoreArrayRead(b->A, &bv)); 2202 PetscCall(MatDenseRestoreArrayWrite(c->A, &cv)); 2203 PetscCall(MatSetOption(C, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 2204 PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY)); 2205 PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY)); 2206 PetscFunctionReturn(PETSC_SUCCESS); 2207 } 2208 2209 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C) 2210 { 2211 Mat_MatTransMultDense *abt; 2212 2213 PetscFunctionBegin; 2214 MatCheckProduct(C, 3); 2215 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 2216 abt = (Mat_MatTransMultDense *)C->product->data; 2217 switch (abt->alg) { 2218 case 1: 2219 PetscCall(MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(A, B, C)); 2220 break; 2221 default: 2222 PetscCall(MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(A, B, C)); 2223 break; 2224 } 2225 PetscFunctionReturn(PETSC_SUCCESS); 2226 } 2227 2228 static PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(void *data) 2229 { 2230 Mat_MatMultDense *ab = (Mat_MatMultDense *)data; 2231 2232 PetscFunctionBegin; 2233 PetscCall(MatDestroy(&ab->Ce)); 2234 PetscCall(MatDestroy(&ab->Ae)); 2235 PetscCall(MatDestroy(&ab->Be)); 2236 PetscCall(PetscFree(ab)); 2237 PetscFunctionReturn(PETSC_SUCCESS); 2238 } 2239 2240 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C) 2241 { 2242 Mat_MatMultDense *ab; 2243 Mat_MPIDense *mdn = (Mat_MPIDense *)A->data; 2244 2245 PetscFunctionBegin; 2246 MatCheckProduct(C, 3); 2247 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Missing product data"); 2248 ab = (Mat_MatMultDense *)C->product->data; 2249 if (ab->Ae && ab->Ce) { 2250 #if PetscDefined(HAVE_ELEMENTAL) 2251 PetscCall(MatConvert_MPIDense_Elemental(A, MATELEMENTAL, MAT_REUSE_MATRIX, &ab->Ae)); 2252 PetscCall(MatConvert_MPIDense_Elemental(B, MATELEMENTAL, MAT_REUSE_MATRIX, &ab->Be)); 2253 PetscCall(MatMatMultNumeric_Elemental(ab->Ae, ab->Be, ab->Ce)); 2254 PetscCall(MatConvert(ab->Ce, MATMPIDENSE, MAT_REUSE_MATRIX, &C)); 2255 #else 2256 SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "PETSC_HAVE_ELEMENTAL not defined"); 2257 #endif 2258 } else { 2259 const PetscScalar *read; 2260 PetscScalar *write; 2261 PetscInt lda; 2262 2263 PetscCall(MatDenseGetLDA(B, &lda)); 2264 PetscCall(MatDenseGetArrayRead(B, &read)); 2265 PetscCall(MatDenseGetArrayWrite(ab->Be, &write)); 2266 if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A)); /* cannot be done during the symbolic phase because of possible calls to MatProductReplaceMats() */ 2267 for (PetscInt i = 0; i < C->cmap->N; ++i) { 2268 PetscCall(PetscSFBcastBegin(mdn->Mvctx, MPIU_SCALAR, read + i * lda, write + i * ab->Be->rmap->n, MPI_REPLACE)); 2269 PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, read + i * lda, write + i * ab->Be->rmap->n, MPI_REPLACE)); 2270 } 2271 PetscCall(MatDenseRestoreArrayWrite(ab->Be, &write)); 2272 PetscCall(MatDenseRestoreArrayRead(B, &read)); 2273 PetscCall(MatMatMultNumeric_SeqDense_SeqDense(((Mat_MPIDense *)(A->data))->A, ab->Be, ((Mat_MPIDense *)(C->data))->A)); 2274 } 2275 PetscFunctionReturn(PETSC_SUCCESS); 2276 } 2277 2278 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C) 2279 { 2280 Mat_Product *product = C->product; 2281 PetscInt alg; 2282 Mat_MatMultDense *ab; 2283 PetscBool flg; 2284 2285 PetscFunctionBegin; 2286 MatCheckProduct(C, 4); 2287 PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty"); 2288 /* check local size of A and B */ 2289 PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ", %" PetscInt_FMT ")", 2290 A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend); 2291 2292 PetscCall(PetscStrcmp(product->alg, "petsc", &flg)); 2293 alg = flg ? 0 : 1; 2294 2295 /* setup C */ 2296 PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, A->rmap->N, B->cmap->N)); 2297 PetscCall(MatSetType(C, MATMPIDENSE)); 2298 PetscCall(MatSetUp(C)); 2299 2300 /* create data structure for reuse Cdense */ 2301 PetscCall(PetscNew(&ab)); 2302 2303 switch (alg) { 2304 case 1: /* alg: "elemental" */ 2305 #if PetscDefined(HAVE_ELEMENTAL) 2306 /* create elemental matrices Ae and Be */ 2307 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &ab->Ae)); 2308 PetscCall(MatSetSizes(ab->Ae, PETSC_DECIDE, PETSC_DECIDE, A->rmap->N, A->cmap->N)); 2309 PetscCall(MatSetType(ab->Ae, MATELEMENTAL)); 2310 PetscCall(MatSetUp(ab->Ae)); 2311 PetscCall(MatSetOption(ab->Ae, MAT_ROW_ORIENTED, PETSC_FALSE)); 2312 2313 PetscCall(MatCreate(PetscObjectComm((PetscObject)B), &ab->Be)); 2314 PetscCall(MatSetSizes(ab->Be, PETSC_DECIDE, PETSC_DECIDE, B->rmap->N, B->cmap->N)); 2315 PetscCall(MatSetType(ab->Be, MATELEMENTAL)); 2316 PetscCall(MatSetUp(ab->Be)); 2317 PetscCall(MatSetOption(ab->Be, MAT_ROW_ORIENTED, PETSC_FALSE)); 2318 2319 /* compute symbolic Ce = Ae*Be */ 2320 PetscCall(MatCreate(PetscObjectComm((PetscObject)C), &ab->Ce)); 2321 PetscCall(MatMatMultSymbolic_Elemental(ab->Ae, ab->Be, fill, ab->Ce)); 2322 #else 2323 SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "PETSC_HAVE_ELEMENTAL not defined"); 2324 #endif 2325 break; 2326 default: /* alg: "petsc" */ 2327 ab->Ae = NULL; 2328 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, A->cmap->N, B->cmap->N, NULL, &ab->Be)); 2329 ab->Ce = NULL; 2330 break; 2331 } 2332 2333 C->product->data = ab; 2334 C->product->destroy = MatDestroy_MatMatMult_MPIDense_MPIDense; 2335 C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense; 2336 PetscFunctionReturn(PETSC_SUCCESS); 2337 } 2338 2339 static PetscErrorCode MatProductSetFromOptions_MPIDense_AB(Mat C) 2340 { 2341 Mat_Product *product = C->product; 2342 const char *algTypes[2] = {"petsc", "elemental"}; 2343 PetscInt alg, nalg = PetscDefined(HAVE_ELEMENTAL) ? 2 : 1; 2344 PetscBool flg = PETSC_FALSE; 2345 PetscFunctionBegin; 2346 2347 /* Set default algorithm */ 2348 alg = 0; /* default is petsc */ 2349 PetscCall(PetscStrcmp(product->alg, "default", &flg)); 2350 if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg])); 2351 2352 /* Get runtime option */ 2353 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_AB", "Mat"); 2354 PetscCall(PetscOptionsEList("-mat_product_algorithm", "Algorithmic approach", "MatProduct_AB", algTypes, nalg, algTypes[alg], &alg, &flg)); 2355 PetscOptionsEnd(); 2356 if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg])); 2357 2358 C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIDense; 2359 C->ops->productsymbolic = MatProductSymbolic_AB; 2360 PetscFunctionReturn(PETSC_SUCCESS); 2361 } 2362 2363 static PetscErrorCode MatProductSetFromOptions_MPIDense_AtB(Mat C) 2364 { 2365 Mat_Product *product = C->product; 2366 Mat A = product->A, B = product->B; 2367 2368 PetscFunctionBegin; 2369 PetscCheck(A->rmap->rstart == B->rmap->rstart && A->rmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", 2370 A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend); 2371 C->ops->transposematmultsymbolic = MatTransposeMatMultSymbolic_MPIDense_MPIDense; 2372 C->ops->productsymbolic = MatProductSymbolic_AtB; 2373 PetscFunctionReturn(PETSC_SUCCESS); 2374 } 2375 2376 static PetscErrorCode MatProductSetFromOptions_MPIDense_ABt(Mat C) 2377 { 2378 Mat_Product *product = C->product; 2379 const char *algTypes[2] = {"allgatherv", "cyclic"}; 2380 PetscInt alg, nalg = 2; 2381 PetscBool flg = PETSC_FALSE; 2382 2383 PetscFunctionBegin; 2384 /* Set default algorithm */ 2385 alg = 0; /* default is allgatherv */ 2386 PetscCall(PetscStrcmp(product->alg, "default", &flg)); 2387 if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg])); 2388 2389 /* Get runtime option */ 2390 if (product->api_user) { 2391 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatMatTransposeMult", "Mat"); 2392 PetscCall(PetscOptionsEList("-matmattransmult_mpidense_mpidense_via", "Algorithmic approach", "MatMatTransposeMult", algTypes, nalg, algTypes[alg], &alg, &flg)); 2393 PetscOptionsEnd(); 2394 } else { 2395 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_ABt", "Mat"); 2396 PetscCall(PetscOptionsEList("-mat_product_algorithm", "Algorithmic approach", "MatProduct_ABt", algTypes, nalg, algTypes[alg], &alg, &flg)); 2397 PetscOptionsEnd(); 2398 } 2399 if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg])); 2400 2401 C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_MPIDense_MPIDense; 2402 C->ops->productsymbolic = MatProductSymbolic_ABt; 2403 PetscFunctionReturn(PETSC_SUCCESS); 2404 } 2405 2406 static PetscErrorCode MatProductSetFromOptions_MPIDense(Mat C) 2407 { 2408 Mat_Product *product = C->product; 2409 2410 PetscFunctionBegin; 2411 switch (product->type) { 2412 case MATPRODUCT_AB: 2413 PetscCall(MatProductSetFromOptions_MPIDense_AB(C)); 2414 break; 2415 case MATPRODUCT_AtB: 2416 PetscCall(MatProductSetFromOptions_MPIDense_AtB(C)); 2417 break; 2418 case MATPRODUCT_ABt: 2419 PetscCall(MatProductSetFromOptions_MPIDense_ABt(C)); 2420 break; 2421 default: 2422 break; 2423 } 2424 PetscFunctionReturn(PETSC_SUCCESS); 2425 } 2426