1 /* 2 Basic functions for basic parallel dense matrices. 3 Portions of this code are under: 4 Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. 5 */ 6 7 #include <../src/mat/impls/dense/mpi/mpidense.h> /*I "petscmat.h" I*/ 8 #include <../src/mat/impls/aij/mpi/mpiaij.h> 9 #include <petscblaslapack.h> 10 #include <petsc/private/vecimpl.h> 11 #include <petscdevice.h> 12 #include <petsc/private/deviceimpl.h> 13 14 /*@ 15 MatDenseGetLocalMatrix - For a `MATMPIDENSE` or `MATSEQDENSE` matrix returns the sequential 16 matrix that represents the operator. For sequential matrices it returns itself. 17 18 Input Parameter: 19 . A - the sequential or MPI `MATDENSE` matrix 20 21 Output Parameter: 22 . B - the inner matrix 23 24 Level: intermediate 25 26 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MATMPIDENSE`, `MATSEQDENSE` 27 @*/ 28 PetscErrorCode MatDenseGetLocalMatrix(Mat A, Mat *B) 29 { 30 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 31 PetscBool flg; 32 33 PetscFunctionBegin; 34 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 35 PetscAssertPointer(B, 2); 36 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIDENSE, &flg)); 37 if (flg) *B = mat->A; 38 else { 39 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQDENSE, &flg)); 40 PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Not for matrix type %s", ((PetscObject)A)->type_name); 41 *B = A; 42 } 43 PetscFunctionReturn(PETSC_SUCCESS); 44 } 45 46 static PetscErrorCode MatCopy_MPIDense(Mat A, Mat B, MatStructure s) 47 { 48 Mat_MPIDense *Amat = (Mat_MPIDense *)A->data; 49 Mat_MPIDense *Bmat = (Mat_MPIDense *)B->data; 50 51 PetscFunctionBegin; 52 PetscCall(MatCopy(Amat->A, Bmat->A, s)); 53 PetscFunctionReturn(PETSC_SUCCESS); 54 } 55 56 PetscErrorCode MatShift_MPIDense(Mat A, PetscScalar alpha) 57 { 58 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 59 PetscInt j, lda, rstart = A->rmap->rstart, rend = A->rmap->rend, rend2; 60 PetscScalar *v; 61 62 PetscFunctionBegin; 63 PetscCall(MatDenseGetArray(mat->A, &v)); 64 PetscCall(MatDenseGetLDA(mat->A, &lda)); 65 rend2 = PetscMin(rend, A->cmap->N); 66 if (rend2 > rstart) { 67 for (j = rstart; j < rend2; j++) v[j - rstart + j * lda] += alpha; 68 PetscCall(PetscLogFlops(rend2 - rstart)); 69 } 70 PetscCall(MatDenseRestoreArray(mat->A, &v)); 71 PetscFunctionReturn(PETSC_SUCCESS); 72 } 73 74 static PetscErrorCode MatGetRow_MPIDense(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 75 { 76 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 77 PetscInt lrow, rstart = A->rmap->rstart, rend = A->rmap->rend; 78 79 PetscFunctionBegin; 80 PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "only local rows"); 81 lrow = row - rstart; 82 PetscCall(MatGetRow(mat->A, lrow, nz, (const PetscInt **)idx, (const PetscScalar **)v)); 83 PetscFunctionReturn(PETSC_SUCCESS); 84 } 85 86 static PetscErrorCode MatRestoreRow_MPIDense(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 87 { 88 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 89 PetscInt lrow, rstart = A->rmap->rstart, rend = A->rmap->rend; 90 91 PetscFunctionBegin; 92 PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "only local rows"); 93 lrow = row - rstart; 94 PetscCall(MatRestoreRow(mat->A, lrow, nz, (const PetscInt **)idx, (const PetscScalar **)v)); 95 PetscFunctionReturn(PETSC_SUCCESS); 96 } 97 98 static PetscErrorCode MatGetDiagonalBlock_MPIDense(Mat A, Mat *a) 99 { 100 Mat_MPIDense *mdn = (Mat_MPIDense *)A->data; 101 PetscInt m = A->rmap->n, rstart = A->rmap->rstart; 102 PetscScalar *array; 103 MPI_Comm comm; 104 PetscBool flg; 105 Mat B; 106 107 PetscFunctionBegin; 108 PetscCall(MatHasCongruentLayouts(A, &flg)); 109 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only square matrices supported."); 110 PetscCall(PetscObjectQuery((PetscObject)A, "DiagonalBlock", (PetscObject *)&B)); 111 if (!B) { /* This should use MatDenseGetSubMatrix (not create), but we would need a call like MatRestoreDiagonalBlock */ 112 #if PetscDefined(HAVE_CUDA) 113 PetscCall(PetscObjectTypeCompare((PetscObject)mdn->A, MATSEQDENSECUDA, &flg)); 114 PetscCheck(!flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature", MATSEQDENSECUDA); 115 #elif PetscDefined(HAVE_HIP) 116 PetscCall(PetscObjectTypeCompare((PetscObject)mdn->A, MATSEQDENSEHIP, &flg)); 117 PetscCheck(!flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature", MATSEQDENSEHIP); 118 #endif 119 PetscCall(PetscObjectGetComm((PetscObject)mdn->A, &comm)); 120 PetscCall(MatCreate(comm, &B)); 121 PetscCall(MatSetSizes(B, m, m, m, m)); 122 PetscCall(MatSetType(B, ((PetscObject)mdn->A)->type_name)); 123 PetscCall(MatDenseGetArrayRead(mdn->A, (const PetscScalar **)&array)); 124 PetscCall(MatSeqDenseSetPreallocation(B, array + m * rstart)); 125 PetscCall(MatDenseRestoreArrayRead(mdn->A, (const PetscScalar **)&array)); 126 PetscCall(PetscObjectCompose((PetscObject)A, "DiagonalBlock", (PetscObject)B)); 127 *a = B; 128 PetscCall(MatDestroy(&B)); 129 } else *a = B; 130 PetscFunctionReturn(PETSC_SUCCESS); 131 } 132 133 static PetscErrorCode MatSetValues_MPIDense(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], const PetscScalar v[], InsertMode addv) 134 { 135 Mat_MPIDense *A = (Mat_MPIDense *)mat->data; 136 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend, row; 137 PetscBool roworiented = A->roworiented; 138 139 PetscFunctionBegin; 140 for (i = 0; i < m; i++) { 141 if (idxm[i] < 0) continue; 142 PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large"); 143 if (idxm[i] >= rstart && idxm[i] < rend) { 144 row = idxm[i] - rstart; 145 if (roworiented) { 146 PetscCall(MatSetValues(A->A, 1, &row, n, idxn, PetscSafePointerPlusOffset(v, i * n), addv)); 147 } else { 148 for (j = 0; j < n; j++) { 149 if (idxn[j] < 0) continue; 150 PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large"); 151 PetscCall(MatSetValues(A->A, 1, &row, 1, &idxn[j], PetscSafePointerPlusOffset(v, i + j * m), addv)); 152 } 153 } 154 } else if (!A->donotstash) { 155 mat->assembled = PETSC_FALSE; 156 if (roworiented) { 157 PetscCall(MatStashValuesRow_Private(&mat->stash, idxm[i], n, idxn, PetscSafePointerPlusOffset(v, i * n), PETSC_FALSE)); 158 } else { 159 PetscCall(MatStashValuesCol_Private(&mat->stash, idxm[i], n, idxn, PetscSafePointerPlusOffset(v, i), m, PETSC_FALSE)); 160 } 161 } 162 } 163 PetscFunctionReturn(PETSC_SUCCESS); 164 } 165 166 static PetscErrorCode MatGetValues_MPIDense(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[]) 167 { 168 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 169 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend, row; 170 171 PetscFunctionBegin; 172 for (i = 0; i < m; i++) { 173 if (idxm[i] < 0) continue; /* negative row */ 174 PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large"); 175 PetscCheck(idxm[i] >= rstart && idxm[i] < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported"); 176 row = idxm[i] - rstart; 177 for (j = 0; j < n; j++) { 178 if (idxn[j] < 0) continue; /* negative column */ 179 PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large"); 180 PetscCall(MatGetValues(mdn->A, 1, &row, 1, &idxn[j], v + i * n + j)); 181 } 182 } 183 PetscFunctionReturn(PETSC_SUCCESS); 184 } 185 186 static PetscErrorCode MatDenseGetLDA_MPIDense(Mat A, PetscInt *lda) 187 { 188 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 189 190 PetscFunctionBegin; 191 PetscCall(MatDenseGetLDA(a->A, lda)); 192 PetscFunctionReturn(PETSC_SUCCESS); 193 } 194 195 static PetscErrorCode MatDenseSetLDA_MPIDense(Mat A, PetscInt lda) 196 { 197 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 198 MatType mtype = MATSEQDENSE; 199 200 PetscFunctionBegin; 201 if (!a->A) { 202 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 203 PetscCall(PetscLayoutSetUp(A->rmap)); 204 PetscCall(PetscLayoutSetUp(A->cmap)); 205 PetscCall(MatCreate(PETSC_COMM_SELF, &a->A)); 206 PetscCall(MatSetSizes(a->A, A->rmap->n, A->cmap->N, A->rmap->n, A->cmap->N)); 207 #if PetscDefined(HAVE_CUDA) 208 PetscBool iscuda; 209 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIDENSECUDA, &iscuda)); 210 if (iscuda) mtype = MATSEQDENSECUDA; 211 #elif PetscDefined(HAVE_HIP) 212 PetscBool iship; 213 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIDENSEHIP, &iship)); 214 if (iship) mtype = MATSEQDENSEHIP; 215 #endif 216 PetscCall(MatSetType(a->A, mtype)); 217 } 218 PetscCall(MatDenseSetLDA(a->A, lda)); 219 PetscFunctionReturn(PETSC_SUCCESS); 220 } 221 222 static PetscErrorCode MatDenseGetArray_MPIDense(Mat A, PetscScalar **array) 223 { 224 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 225 226 PetscFunctionBegin; 227 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 228 PetscCall(MatDenseGetArray(a->A, array)); 229 PetscFunctionReturn(PETSC_SUCCESS); 230 } 231 232 static PetscErrorCode MatDenseGetArrayRead_MPIDense(Mat A, PetscScalar **array) 233 { 234 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 235 236 PetscFunctionBegin; 237 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 238 PetscCall(MatDenseGetArrayRead(a->A, (const PetscScalar **)array)); 239 PetscFunctionReturn(PETSC_SUCCESS); 240 } 241 242 static PetscErrorCode MatDenseGetArrayWrite_MPIDense(Mat A, PetscScalar **array) 243 { 244 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 245 246 PetscFunctionBegin; 247 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 248 PetscCall(MatDenseGetArrayWrite(a->A, array)); 249 PetscFunctionReturn(PETSC_SUCCESS); 250 } 251 252 static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A, const PetscScalar *array) 253 { 254 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 255 256 PetscFunctionBegin; 257 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 258 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 259 PetscCall(MatDensePlaceArray(a->A, array)); 260 PetscFunctionReturn(PETSC_SUCCESS); 261 } 262 263 static PetscErrorCode MatDenseResetArray_MPIDense(Mat A) 264 { 265 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 266 267 PetscFunctionBegin; 268 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 269 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 270 PetscCall(MatDenseResetArray(a->A)); 271 PetscFunctionReturn(PETSC_SUCCESS); 272 } 273 274 static PetscErrorCode MatDenseReplaceArray_MPIDense(Mat A, const PetscScalar *array) 275 { 276 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 277 278 PetscFunctionBegin; 279 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 280 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 281 PetscCall(MatDenseReplaceArray(a->A, array)); 282 PetscFunctionReturn(PETSC_SUCCESS); 283 } 284 285 static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A, IS isrow, IS iscol, MatReuse scall, Mat *B) 286 { 287 Mat_MPIDense *mat = (Mat_MPIDense *)A->data, *newmatd; 288 PetscInt lda, i, j, rstart, rend, nrows, ncols, Ncols, nlrows, nlcols; 289 const PetscInt *irow, *icol; 290 const PetscScalar *v; 291 PetscScalar *bv; 292 Mat newmat; 293 IS iscol_local; 294 MPI_Comm comm_is, comm_mat; 295 296 PetscFunctionBegin; 297 PetscCall(PetscObjectGetComm((PetscObject)A, &comm_mat)); 298 PetscCall(PetscObjectGetComm((PetscObject)iscol, &comm_is)); 299 PetscCheck(comm_mat == comm_is, PETSC_COMM_SELF, PETSC_ERR_ARG_NOTSAMECOMM, "IS communicator must match matrix communicator"); 300 301 PetscCall(ISAllGather(iscol, &iscol_local)); 302 PetscCall(ISGetIndices(isrow, &irow)); 303 PetscCall(ISGetIndices(iscol_local, &icol)); 304 PetscCall(ISGetLocalSize(isrow, &nrows)); 305 PetscCall(ISGetLocalSize(iscol, &ncols)); 306 PetscCall(ISGetSize(iscol, &Ncols)); /* global number of columns, size of iscol_local */ 307 308 /* No parallel redistribution currently supported! Should really check each index set 309 to confirm that it is OK. ... Currently supports only submatrix same partitioning as 310 original matrix! */ 311 312 PetscCall(MatGetLocalSize(A, &nlrows, &nlcols)); 313 PetscCall(MatGetOwnershipRange(A, &rstart, &rend)); 314 315 /* Check submatrix call */ 316 if (scall == MAT_REUSE_MATRIX) { 317 /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */ 318 /* Really need to test rows and column sizes! */ 319 newmat = *B; 320 } else { 321 /* Create and fill new matrix */ 322 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &newmat)); 323 PetscCall(MatSetSizes(newmat, nrows, ncols, PETSC_DECIDE, Ncols)); 324 PetscCall(MatSetType(newmat, ((PetscObject)A)->type_name)); 325 PetscCall(MatMPIDenseSetPreallocation(newmat, NULL)); 326 } 327 328 /* Now extract the data pointers and do the copy, column at a time */ 329 newmatd = (Mat_MPIDense *)newmat->data; 330 PetscCall(MatDenseGetArray(newmatd->A, &bv)); 331 PetscCall(MatDenseGetArrayRead(mat->A, &v)); 332 PetscCall(MatDenseGetLDA(mat->A, &lda)); 333 for (i = 0; i < Ncols; i++) { 334 const PetscScalar *av = v + lda * icol[i]; 335 for (j = 0; j < nrows; j++) *bv++ = av[irow[j] - rstart]; 336 } 337 PetscCall(MatDenseRestoreArrayRead(mat->A, &v)); 338 PetscCall(MatDenseRestoreArray(newmatd->A, &bv)); 339 340 /* Assemble the matrices so that the correct flags are set */ 341 PetscCall(MatAssemblyBegin(newmat, MAT_FINAL_ASSEMBLY)); 342 PetscCall(MatAssemblyEnd(newmat, MAT_FINAL_ASSEMBLY)); 343 344 /* Free work space */ 345 PetscCall(ISRestoreIndices(isrow, &irow)); 346 PetscCall(ISRestoreIndices(iscol_local, &icol)); 347 PetscCall(ISDestroy(&iscol_local)); 348 *B = newmat; 349 PetscFunctionReturn(PETSC_SUCCESS); 350 } 351 352 static PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A, PetscScalar **array) 353 { 354 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 355 356 PetscFunctionBegin; 357 PetscCall(MatDenseRestoreArray(a->A, array)); 358 PetscFunctionReturn(PETSC_SUCCESS); 359 } 360 361 static PetscErrorCode MatDenseRestoreArrayRead_MPIDense(Mat A, PetscScalar **array) 362 { 363 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 364 365 PetscFunctionBegin; 366 PetscCall(MatDenseRestoreArrayRead(a->A, (const PetscScalar **)array)); 367 PetscFunctionReturn(PETSC_SUCCESS); 368 } 369 370 static PetscErrorCode MatDenseRestoreArrayWrite_MPIDense(Mat A, PetscScalar **array) 371 { 372 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 373 374 PetscFunctionBegin; 375 PetscCall(MatDenseRestoreArrayWrite(a->A, array)); 376 PetscFunctionReturn(PETSC_SUCCESS); 377 } 378 379 static PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat, MatAssemblyType mode) 380 { 381 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 382 PetscInt nstash, reallocs; 383 384 PetscFunctionBegin; 385 if (mdn->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS); 386 387 PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range)); 388 PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs)); 389 PetscCall(PetscInfo(mdn->A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 390 PetscFunctionReturn(PETSC_SUCCESS); 391 } 392 393 static PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat, MatAssemblyType mode) 394 { 395 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 396 PetscInt i, *row, *col, flg, j, rstart, ncols; 397 PetscMPIInt n; 398 PetscScalar *val; 399 400 PetscFunctionBegin; 401 if (!mdn->donotstash && !mat->nooffprocentries) { 402 /* wait on receives */ 403 while (1) { 404 PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg)); 405 if (!flg) break; 406 407 for (i = 0; i < n;) { 408 /* Now identify the consecutive vals belonging to the same row */ 409 for (j = i, rstart = row[j]; j < n; j++) { 410 if (row[j] != rstart) break; 411 } 412 if (j < n) ncols = j - i; 413 else ncols = n - i; 414 /* Now assemble all these values with a single function call */ 415 PetscCall(MatSetValues_MPIDense(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode)); 416 i = j; 417 } 418 } 419 PetscCall(MatStashScatterEnd_Private(&mat->stash)); 420 } 421 422 PetscCall(MatAssemblyBegin(mdn->A, mode)); 423 PetscCall(MatAssemblyEnd(mdn->A, mode)); 424 PetscFunctionReturn(PETSC_SUCCESS); 425 } 426 427 static PetscErrorCode MatZeroEntries_MPIDense(Mat A) 428 { 429 Mat_MPIDense *l = (Mat_MPIDense *)A->data; 430 431 PetscFunctionBegin; 432 PetscCall(MatZeroEntries(l->A)); 433 PetscFunctionReturn(PETSC_SUCCESS); 434 } 435 436 static PetscErrorCode MatZeroRows_MPIDense(Mat A, PetscInt n, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 437 { 438 Mat_MPIDense *l = (Mat_MPIDense *)A->data; 439 PetscInt i, len, *lrows; 440 441 PetscFunctionBegin; 442 /* get locally owned rows */ 443 PetscCall(PetscLayoutMapLocal(A->rmap, n, rows, &len, &lrows, NULL)); 444 /* fix right-hand side if needed */ 445 if (x && b) { 446 const PetscScalar *xx; 447 PetscScalar *bb; 448 449 PetscCall(VecGetArrayRead(x, &xx)); 450 PetscCall(VecGetArrayWrite(b, &bb)); 451 for (i = 0; i < len; ++i) bb[lrows[i]] = diag * xx[lrows[i]]; 452 PetscCall(VecRestoreArrayRead(x, &xx)); 453 PetscCall(VecRestoreArrayWrite(b, &bb)); 454 } 455 PetscCall(MatZeroRows(l->A, len, lrows, 0.0, NULL, NULL)); 456 if (diag != 0.0) { 457 Vec d; 458 459 PetscCall(MatCreateVecs(A, NULL, &d)); 460 PetscCall(VecSet(d, diag)); 461 PetscCall(MatDiagonalSet(A, d, INSERT_VALUES)); 462 PetscCall(VecDestroy(&d)); 463 } 464 PetscCall(PetscFree(lrows)); 465 PetscFunctionReturn(PETSC_SUCCESS); 466 } 467 468 PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat, Vec, Vec); 469 PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat, Vec, Vec, Vec); 470 PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat, Vec, Vec); 471 PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat, Vec, Vec, Vec); 472 473 static PetscErrorCode MatMultColumnRange_MPIDense(Mat mat, Vec xx, Vec yy, PetscInt c_start, PetscInt c_end) 474 { 475 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 476 const PetscScalar *ax; 477 PetscScalar *ay; 478 PetscMemType axmtype, aymtype; 479 480 PetscFunctionBegin; 481 if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat)); 482 PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype)); 483 PetscCall(VecGetArrayWriteAndMemType(mdn->lvec, &ay, &aymtype)); 484 PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE)); 485 PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE)); 486 PetscCall(VecRestoreArrayWriteAndMemType(mdn->lvec, &ay)); 487 PetscCall(VecRestoreArrayReadAndMemType(xx, &ax)); 488 PetscUseMethod(mdn->A, "MatMultColumnRange_C", (Mat, Vec, Vec, PetscInt, PetscInt), (mdn->A, mdn->lvec, yy, c_start, c_end)); 489 PetscFunctionReturn(PETSC_SUCCESS); 490 } 491 492 static PetscErrorCode MatMult_MPIDense(Mat mat, Vec xx, Vec yy) 493 { 494 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 495 const PetscScalar *ax; 496 PetscScalar *ay; 497 PetscMemType axmtype, aymtype; 498 499 PetscFunctionBegin; 500 if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat)); 501 PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype)); 502 PetscCall(VecGetArrayWriteAndMemType(mdn->lvec, &ay, &aymtype)); 503 PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE)); 504 PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE)); 505 PetscCall(VecRestoreArrayWriteAndMemType(mdn->lvec, &ay)); 506 PetscCall(VecRestoreArrayReadAndMemType(xx, &ax)); 507 PetscCall((*mdn->A->ops->mult)(mdn->A, mdn->lvec, yy)); 508 PetscFunctionReturn(PETSC_SUCCESS); 509 } 510 511 static PetscErrorCode MatMultAddColumnRange_MPIDense(Mat mat, Vec xx, Vec yy, Vec zz, PetscInt c_start, PetscInt c_end) 512 { 513 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 514 const PetscScalar *ax; 515 PetscScalar *ay; 516 PetscMemType axmtype, aymtype; 517 518 PetscFunctionBegin; 519 if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat)); 520 PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype)); 521 PetscCall(VecGetArrayAndMemType(mdn->lvec, &ay, &aymtype)); 522 PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE)); 523 PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE)); 524 PetscCall(VecRestoreArrayAndMemType(mdn->lvec, &ay)); 525 PetscCall(VecRestoreArrayReadAndMemType(xx, &ax)); 526 PetscUseMethod(mdn->A, "MatMultAddColumnRange_C", (Mat, Vec, Vec, Vec, PetscInt, PetscInt), (mdn->A, mdn->lvec, yy, zz, c_start, c_end)); 527 PetscFunctionReturn(PETSC_SUCCESS); 528 } 529 530 static PetscErrorCode MatMultAdd_MPIDense(Mat mat, Vec xx, Vec yy, Vec zz) 531 { 532 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 533 const PetscScalar *ax; 534 PetscScalar *ay; 535 PetscMemType axmtype, aymtype; 536 537 PetscFunctionBegin; 538 if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat)); 539 PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype)); 540 PetscCall(VecGetArrayAndMemType(mdn->lvec, &ay, &aymtype)); 541 PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE)); 542 PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE)); 543 PetscCall(VecRestoreArrayAndMemType(mdn->lvec, &ay)); 544 PetscCall(VecRestoreArrayReadAndMemType(xx, &ax)); 545 PetscCall((*mdn->A->ops->multadd)(mdn->A, mdn->lvec, yy, zz)); 546 PetscFunctionReturn(PETSC_SUCCESS); 547 } 548 549 static PetscErrorCode MatMultHermitianTransposeColumnRange_MPIDense(Mat A, Vec xx, Vec yy, PetscInt c_start, PetscInt c_end) 550 { 551 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 552 const PetscScalar *ax; 553 PetscScalar *ay; 554 PetscMemType axmtype, aymtype; 555 PetscInt r_start, r_end; 556 PetscInt c_start_local, c_end_local; 557 558 PetscFunctionBegin; 559 if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A)); 560 PetscCall(VecZeroEntries(a->lvec)); 561 PetscCall(VecGetOwnershipRange(yy, &r_start, &r_end)); 562 c_start_local = PetscMax(c_start, r_start); 563 c_end_local = PetscMin(c_end, r_end); 564 PetscCall(VecGetArrayAndMemType(yy, &ay, &aymtype)); 565 if (c_end_local > c_start_local) { 566 if (PetscMemTypeHost(aymtype)) { 567 PetscCall(PetscArrayzero(&ay[c_start_local], (size_t)(c_end_local - c_start_local))); 568 } else { 569 PetscCall(PetscDeviceRegisterMemory(ay, aymtype, sizeof(*ay) * ((size_t)(r_end - r_start)))); 570 PetscCall(PetscDeviceArrayZero(NULL, &ay[c_start_local], (size_t)(c_end_local - c_start_local))); 571 } 572 } 573 PetscUseMethod(a->A, "MatMultHermitianTransposeColumnRange_C", (Mat, Vec, Vec, PetscInt, PetscInt), (a->A, xx, a->lvec, c_start, c_end)); 574 PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype)); 575 PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM)); 576 PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM)); 577 PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax)); 578 PetscCall(VecRestoreArrayAndMemType(yy, &ay)); 579 PetscFunctionReturn(PETSC_SUCCESS); 580 } 581 582 static PetscErrorCode MatMultTransposeKernel_MPIDense(Mat A, Vec xx, Vec yy, PetscBool herm) 583 { 584 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 585 const PetscScalar *ax; 586 PetscScalar *ay; 587 PetscMemType axmtype, aymtype; 588 589 PetscFunctionBegin; 590 if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A)); 591 PetscCall(VecSet(yy, 0.0)); 592 if (herm) PetscCall((*a->A->ops->multhermitiantranspose)(a->A, xx, a->lvec)); 593 else PetscCall((*a->A->ops->multtranspose)(a->A, xx, a->lvec)); 594 PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype)); 595 PetscCall(VecGetArrayAndMemType(yy, &ay, &aymtype)); 596 PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM)); 597 PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM)); 598 PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax)); 599 PetscCall(VecRestoreArrayAndMemType(yy, &ay)); 600 PetscFunctionReturn(PETSC_SUCCESS); 601 } 602 603 static PetscErrorCode MatMultHermitianTransposeAddColumnRange_MPIDense(Mat A, Vec xx, Vec yy, Vec zz, PetscInt c_start, PetscInt c_end) 604 { 605 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 606 const PetscScalar *ax; 607 PetscScalar *ay; 608 PetscMemType axmtype, aymtype; 609 610 PetscFunctionBegin; 611 if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A)); 612 PetscCall(VecCopy(yy, zz)); 613 PetscCall(VecZeroEntries(a->lvec)); 614 PetscUseMethod(a->A, "MatMultHermitianTransposeColumnRange_C", (Mat, Vec, Vec, PetscInt, PetscInt), (a->A, xx, a->lvec, c_start, c_end)); 615 PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype)); 616 PetscCall(VecGetArrayAndMemType(zz, &ay, &aymtype)); 617 PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM)); 618 PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM)); 619 PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax)); 620 PetscCall(VecRestoreArrayAndMemType(zz, &ay)); 621 PetscFunctionReturn(PETSC_SUCCESS); 622 } 623 624 static PetscErrorCode MatMultTransposeAddKernel_MPIDense(Mat A, Vec xx, Vec yy, Vec zz, PetscBool herm) 625 { 626 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 627 const PetscScalar *ax; 628 PetscScalar *ay; 629 PetscMemType axmtype, aymtype; 630 631 PetscFunctionBegin; 632 if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A)); 633 PetscCall(VecCopy(yy, zz)); 634 if (herm) PetscCall((*a->A->ops->multhermitiantranspose)(a->A, xx, a->lvec)); 635 else PetscCall((*a->A->ops->multtranspose)(a->A, xx, a->lvec)); 636 PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype)); 637 PetscCall(VecGetArrayAndMemType(zz, &ay, &aymtype)); 638 PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM)); 639 PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM)); 640 PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax)); 641 PetscCall(VecRestoreArrayAndMemType(zz, &ay)); 642 PetscFunctionReturn(PETSC_SUCCESS); 643 } 644 645 static PetscErrorCode MatMultTranspose_MPIDense(Mat A, Vec xx, Vec yy) 646 { 647 PetscFunctionBegin; 648 PetscCall(MatMultTransposeKernel_MPIDense(A, xx, yy, PETSC_FALSE)); 649 PetscFunctionReturn(PETSC_SUCCESS); 650 } 651 652 static PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A, Vec xx, Vec yy, Vec zz) 653 { 654 PetscFunctionBegin; 655 PetscCall(MatMultTransposeAddKernel_MPIDense(A, xx, yy, zz, PETSC_FALSE)); 656 PetscFunctionReturn(PETSC_SUCCESS); 657 } 658 659 static PetscErrorCode MatMultHermitianTranspose_MPIDense(Mat A, Vec xx, Vec yy) 660 { 661 PetscFunctionBegin; 662 PetscCall(MatMultTransposeKernel_MPIDense(A, xx, yy, PETSC_TRUE)); 663 PetscFunctionReturn(PETSC_SUCCESS); 664 } 665 666 static PetscErrorCode MatMultHermitianTransposeAdd_MPIDense(Mat A, Vec xx, Vec yy, Vec zz) 667 { 668 PetscFunctionBegin; 669 PetscCall(MatMultTransposeAddKernel_MPIDense(A, xx, yy, zz, PETSC_TRUE)); 670 PetscFunctionReturn(PETSC_SUCCESS); 671 } 672 673 PetscErrorCode MatGetDiagonal_MPIDense(Mat A, Vec v) 674 { 675 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 676 PetscInt lda, len, i, nl, ng, m = A->rmap->n, radd; 677 PetscScalar *x; 678 const PetscScalar *av; 679 680 PetscFunctionBegin; 681 PetscCall(VecGetArray(v, &x)); 682 PetscCall(VecGetSize(v, &ng)); 683 PetscCheck(ng == A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming mat and vec"); 684 PetscCall(VecGetLocalSize(v, &nl)); 685 len = PetscMin(a->A->rmap->n, a->A->cmap->n); 686 radd = A->rmap->rstart * m; 687 PetscCall(MatDenseGetArrayRead(a->A, &av)); 688 PetscCall(MatDenseGetLDA(a->A, &lda)); 689 for (i = 0; i < len; i++) x[i] = av[radd + i * lda + i]; 690 PetscCall(MatDenseRestoreArrayRead(a->A, &av)); 691 if (nl - i > 0) PetscCall(PetscArrayzero(x + i, nl - i)); 692 PetscCall(VecRestoreArray(v, &x)); 693 PetscFunctionReturn(PETSC_SUCCESS); 694 } 695 696 static PetscErrorCode MatDestroy_MPIDense(Mat mat) 697 { 698 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 699 700 PetscFunctionBegin; 701 PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N)); 702 PetscCall(MatStashDestroy_Private(&mat->stash)); 703 PetscCheck(!mdn->vecinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 704 PetscCheck(!mdn->matinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 705 PetscCall(MatDestroy(&mdn->A)); 706 PetscCall(VecDestroy(&mdn->lvec)); 707 PetscCall(PetscSFDestroy(&mdn->Mvctx)); 708 PetscCall(VecDestroy(&mdn->cvec)); 709 PetscCall(MatDestroy(&mdn->cmat)); 710 711 PetscCall(PetscFree(mat->data)); 712 PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL)); 713 714 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetLDA_C", NULL)); 715 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseSetLDA_C", NULL)); 716 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArray_C", NULL)); 717 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArray_C", NULL)); 718 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayRead_C", NULL)); 719 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayRead_C", NULL)); 720 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayWrite_C", NULL)); 721 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayWrite_C", NULL)); 722 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDensePlaceArray_C", NULL)); 723 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseResetArray_C", NULL)); 724 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseReplaceArray_C", NULL)); 725 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", NULL)); 726 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpiaij_C", NULL)); 727 #if defined(PETSC_HAVE_ELEMENTAL) 728 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_elemental_C", NULL)); 729 #endif 730 #if defined(PETSC_HAVE_SCALAPACK) 731 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_scalapack_C", NULL)); 732 #endif 733 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIDenseSetPreallocation_C", NULL)); 734 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidense_C", NULL)); 735 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaij_C", NULL)); 736 #if defined(PETSC_HAVE_CUDA) 737 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidense_C", NULL)); 738 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijcusparse_C", NULL)); 739 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensecuda_C", NULL)); 740 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidensecuda_mpidense_C", NULL)); 741 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidensecuda_C", NULL)); 742 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C", NULL)); 743 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensecuda_mpiaij_C", NULL)); 744 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C", NULL)); 745 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArray_C", NULL)); 746 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArrayRead_C", NULL)); 747 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArrayWrite_C", NULL)); 748 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArray_C", NULL)); 749 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArrayRead_C", NULL)); 750 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArrayWrite_C", NULL)); 751 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAPlaceArray_C", NULL)); 752 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAResetArray_C", NULL)); 753 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAReplaceArray_C", NULL)); 754 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDASetPreallocation_C", NULL)); 755 #endif 756 #if defined(PETSC_HAVE_HIP) 757 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidense_C", NULL)); 758 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijhipsparse_C", NULL)); 759 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensehip_C", NULL)); 760 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidensehip_mpidense_C", NULL)); 761 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidensehip_C", NULL)); 762 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidensehip_C", NULL)); 763 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensehip_mpiaij_C", NULL)); 764 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensehip_mpiaijhipsparse_C", NULL)); 765 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArray_C", NULL)); 766 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArrayRead_C", NULL)); 767 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArrayWrite_C", NULL)); 768 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArray_C", NULL)); 769 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArrayRead_C", NULL)); 770 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArrayWrite_C", NULL)); 771 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPPlaceArray_C", NULL)); 772 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPResetArray_C", NULL)); 773 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPReplaceArray_C", NULL)); 774 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPSetPreallocation_C", NULL)); 775 #endif 776 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumn_C", NULL)); 777 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumn_C", NULL)); 778 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", NULL)); 779 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", NULL)); 780 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", NULL)); 781 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", NULL)); 782 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", NULL)); 783 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", NULL)); 784 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetSubMatrix_C", NULL)); 785 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreSubMatrix_C", NULL)); 786 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultColumnRange_C", NULL)); 787 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultAddColumnRange_C", NULL)); 788 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultHermitianTransposeColumnRange_C", NULL)); 789 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultHermitianTransposeAddColumnRange_C", NULL)); 790 791 PetscCall(PetscObjectCompose((PetscObject)mat, "DiagonalBlock", NULL)); 792 PetscFunctionReturn(PETSC_SUCCESS); 793 } 794 795 #include <petscdraw.h> 796 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat, PetscViewer viewer) 797 { 798 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 799 PetscMPIInt rank; 800 PetscViewerType vtype; 801 PetscBool isascii, isdraw; 802 PetscViewer sviewer; 803 PetscViewerFormat format; 804 805 PetscFunctionBegin; 806 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 807 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii)); 808 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 809 if (isascii) { 810 PetscCall(PetscViewerGetType(viewer, &vtype)); 811 PetscCall(PetscViewerGetFormat(viewer, &format)); 812 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 813 MatInfo info; 814 PetscCall(MatGetInfo(mat, MAT_LOCAL, &info)); 815 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 816 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %" PetscInt_FMT " \n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, 817 (PetscInt)info.memory)); 818 PetscCall(PetscViewerFlush(viewer)); 819 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 820 if (mdn->Mvctx) PetscCall(PetscSFView(mdn->Mvctx, viewer)); 821 PetscFunctionReturn(PETSC_SUCCESS); 822 } else if (format == PETSC_VIEWER_ASCII_INFO) { 823 PetscFunctionReturn(PETSC_SUCCESS); 824 } 825 } else if (isdraw) { 826 PetscDraw draw; 827 PetscBool isnull; 828 829 PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw)); 830 PetscCall(PetscDrawIsNull(draw, &isnull)); 831 if (isnull) PetscFunctionReturn(PETSC_SUCCESS); 832 } 833 834 { 835 /* assemble the entire matrix onto first processor. */ 836 Mat A; 837 PetscInt M = mat->rmap->N, N = mat->cmap->N, m, row, i, nz; 838 PetscInt *cols; 839 PetscScalar *vals; 840 841 PetscCall(MatCreate(PetscObjectComm((PetscObject)mat), &A)); 842 if (rank == 0) { 843 PetscCall(MatSetSizes(A, M, N, M, N)); 844 } else { 845 PetscCall(MatSetSizes(A, 0, 0, M, N)); 846 } 847 /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */ 848 PetscCall(MatSetType(A, MATMPIDENSE)); 849 PetscCall(MatMPIDenseSetPreallocation(A, NULL)); 850 851 /* Copy the matrix ... This isn't the most efficient means, 852 but it's quick for now */ 853 A->insertmode = INSERT_VALUES; 854 855 row = mat->rmap->rstart; 856 m = mdn->A->rmap->n; 857 for (i = 0; i < m; i++) { 858 PetscCall(MatGetRow_MPIDense(mat, row, &nz, &cols, &vals)); 859 PetscCall(MatSetValues_MPIDense(A, 1, &row, nz, cols, vals, INSERT_VALUES)); 860 PetscCall(MatRestoreRow_MPIDense(mat, row, &nz, &cols, &vals)); 861 row++; 862 } 863 864 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 865 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 866 PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 867 if (rank == 0) { 868 PetscCall(PetscObjectSetName((PetscObject)((Mat_MPIDense *)A->data)->A, ((PetscObject)mat)->name)); 869 PetscCall(MatView_SeqDense(((Mat_MPIDense *)A->data)->A, sviewer)); 870 } 871 PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 872 PetscCall(MatDestroy(&A)); 873 } 874 PetscFunctionReturn(PETSC_SUCCESS); 875 } 876 877 static PetscErrorCode MatView_MPIDense(Mat mat, PetscViewer viewer) 878 { 879 PetscBool isascii, isbinary, isdraw, issocket; 880 881 PetscFunctionBegin; 882 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii)); 883 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 884 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket)); 885 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 886 887 if (isascii || issocket || isdraw) PetscCall(MatView_MPIDense_ASCIIorDraworSocket(mat, viewer)); 888 else if (isbinary) PetscCall(MatView_Dense_Binary(mat, viewer)); 889 PetscFunctionReturn(PETSC_SUCCESS); 890 } 891 892 static PetscErrorCode MatGetInfo_MPIDense(Mat A, MatInfoType flag, MatInfo *info) 893 { 894 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 895 Mat mdn = mat->A; 896 PetscLogDouble isend[5], irecv[5]; 897 898 PetscFunctionBegin; 899 info->block_size = 1.0; 900 901 PetscCall(MatGetInfo(mdn, MAT_LOCAL, info)); 902 903 isend[0] = info->nz_used; 904 isend[1] = info->nz_allocated; 905 isend[2] = info->nz_unneeded; 906 isend[3] = info->memory; 907 isend[4] = info->mallocs; 908 if (flag == MAT_LOCAL) { 909 info->nz_used = isend[0]; 910 info->nz_allocated = isend[1]; 911 info->nz_unneeded = isend[2]; 912 info->memory = isend[3]; 913 info->mallocs = isend[4]; 914 } else if (flag == MAT_GLOBAL_MAX) { 915 PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)A))); 916 917 info->nz_used = irecv[0]; 918 info->nz_allocated = irecv[1]; 919 info->nz_unneeded = irecv[2]; 920 info->memory = irecv[3]; 921 info->mallocs = irecv[4]; 922 } else if (flag == MAT_GLOBAL_SUM) { 923 PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)A))); 924 925 info->nz_used = irecv[0]; 926 info->nz_allocated = irecv[1]; 927 info->nz_unneeded = irecv[2]; 928 info->memory = irecv[3]; 929 info->mallocs = irecv[4]; 930 } 931 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 932 info->fill_ratio_needed = 0; 933 info->factor_mallocs = 0; 934 PetscFunctionReturn(PETSC_SUCCESS); 935 } 936 937 static PetscErrorCode MatSetOption_MPIDense(Mat A, MatOption op, PetscBool flg) 938 { 939 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 940 941 PetscFunctionBegin; 942 switch (op) { 943 case MAT_NEW_NONZERO_LOCATIONS: 944 case MAT_NEW_NONZERO_LOCATION_ERR: 945 case MAT_NEW_NONZERO_ALLOCATION_ERR: 946 MatCheckPreallocated(A, 1); 947 PetscCall(MatSetOption(a->A, op, flg)); 948 break; 949 case MAT_ROW_ORIENTED: 950 MatCheckPreallocated(A, 1); 951 a->roworiented = flg; 952 PetscCall(MatSetOption(a->A, op, flg)); 953 break; 954 case MAT_IGNORE_OFF_PROC_ENTRIES: 955 a->donotstash = flg; 956 break; 957 case MAT_SYMMETRIC: 958 case MAT_STRUCTURALLY_SYMMETRIC: 959 case MAT_HERMITIAN: 960 case MAT_SYMMETRY_ETERNAL: 961 case MAT_STRUCTURAL_SYMMETRY_ETERNAL: 962 case MAT_SPD: 963 case MAT_SPD_ETERNAL: 964 /* if the diagonal matrix is square it inherits some of the properties above */ 965 if (a->A && A->rmap->n == A->cmap->n) PetscCall(MatSetOption(a->A, op, flg)); 966 break; 967 default: 968 break; 969 } 970 PetscFunctionReturn(PETSC_SUCCESS); 971 } 972 973 static PetscErrorCode MatDiagonalScale_MPIDense(Mat A, Vec ll, Vec rr) 974 { 975 Mat_MPIDense *mdn = (Mat_MPIDense *)A->data; 976 const PetscScalar *l; 977 PetscScalar x, *v, *vv, *r; 978 PetscInt i, j, s2a, s3a, s2, s3, m = mdn->A->rmap->n, n = mdn->A->cmap->n, lda; 979 980 PetscFunctionBegin; 981 PetscCall(MatDenseGetArray(mdn->A, &vv)); 982 PetscCall(MatDenseGetLDA(mdn->A, &lda)); 983 PetscCall(MatGetLocalSize(A, &s2, &s3)); 984 if (ll) { 985 PetscCall(VecGetLocalSize(ll, &s2a)); 986 PetscCheck(s2a == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Left scaling vector non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT, s2a, s2); 987 PetscCall(VecGetArrayRead(ll, &l)); 988 for (i = 0; i < m; i++) { 989 x = l[i]; 990 v = vv + i; 991 for (j = 0; j < n; j++) { 992 (*v) *= x; 993 v += lda; 994 } 995 } 996 PetscCall(VecRestoreArrayRead(ll, &l)); 997 PetscCall(PetscLogFlops(1.0 * n * m)); 998 } 999 if (rr) { 1000 const PetscScalar *ar; 1001 1002 PetscCall(VecGetLocalSize(rr, &s3a)); 1003 PetscCheck(s3a == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Right scaling vec non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT ".", s3a, s3); 1004 PetscCall(VecGetArrayRead(rr, &ar)); 1005 if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A)); 1006 PetscCall(VecGetArray(mdn->lvec, &r)); 1007 PetscCall(PetscSFBcastBegin(mdn->Mvctx, MPIU_SCALAR, ar, r, MPI_REPLACE)); 1008 PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ar, r, MPI_REPLACE)); 1009 PetscCall(VecRestoreArrayRead(rr, &ar)); 1010 for (i = 0; i < n; i++) { 1011 x = r[i]; 1012 v = vv + i * lda; 1013 for (j = 0; j < m; j++) (*v++) *= x; 1014 } 1015 PetscCall(VecRestoreArray(mdn->lvec, &r)); 1016 PetscCall(PetscLogFlops(1.0 * n * m)); 1017 } 1018 PetscCall(MatDenseRestoreArray(mdn->A, &vv)); 1019 PetscFunctionReturn(PETSC_SUCCESS); 1020 } 1021 1022 static PetscErrorCode MatNorm_MPIDense(Mat A, NormType type, PetscReal *nrm) 1023 { 1024 Mat_MPIDense *mdn = (Mat_MPIDense *)A->data; 1025 PetscInt i, j; 1026 PetscMPIInt size; 1027 PetscReal sum = 0.0; 1028 const PetscScalar *av, *v; 1029 1030 PetscFunctionBegin; 1031 PetscCall(MatDenseGetArrayRead(mdn->A, &av)); 1032 v = av; 1033 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 1034 if (size == 1) { 1035 PetscCall(MatNorm(mdn->A, type, nrm)); 1036 } else { 1037 if (type == NORM_FROBENIUS) { 1038 for (i = 0; i < mdn->A->cmap->n * mdn->A->rmap->n; i++) { 1039 sum += PetscRealPart(PetscConj(*v) * (*v)); 1040 v++; 1041 } 1042 PetscCallMPI(MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A))); 1043 *nrm = PetscSqrtReal(*nrm); 1044 PetscCall(PetscLogFlops(2.0 * mdn->A->cmap->n * mdn->A->rmap->n)); 1045 } else if (type == NORM_1) { 1046 PetscReal *tmp; 1047 1048 PetscCall(PetscCalloc1(A->cmap->N, &tmp)); 1049 *nrm = 0.0; 1050 v = av; 1051 for (j = 0; j < mdn->A->cmap->n; j++) { 1052 for (i = 0; i < mdn->A->rmap->n; i++) { 1053 tmp[j] += PetscAbsScalar(*v); 1054 v++; 1055 } 1056 } 1057 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, tmp, A->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A))); 1058 for (j = 0; j < A->cmap->N; j++) { 1059 if (tmp[j] > *nrm) *nrm = tmp[j]; 1060 } 1061 PetscCall(PetscFree(tmp)); 1062 PetscCall(PetscLogFlops(A->cmap->n * A->rmap->n)); 1063 } else if (type == NORM_INFINITY) { /* max row norm */ 1064 PetscCall(MatNorm(mdn->A, type, nrm)); 1065 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, nrm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A))); 1066 } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "No support for two norm"); 1067 } 1068 PetscCall(MatDenseRestoreArrayRead(mdn->A, &av)); 1069 PetscFunctionReturn(PETSC_SUCCESS); 1070 } 1071 1072 static PetscErrorCode MatTranspose_MPIDense(Mat A, MatReuse reuse, Mat *matout) 1073 { 1074 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1075 Mat B; 1076 PetscInt M = A->rmap->N, N = A->cmap->N, m, n, *rwork, rstart = A->rmap->rstart; 1077 PetscInt j, i, lda; 1078 PetscScalar *v; 1079 1080 PetscFunctionBegin; 1081 if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout)); 1082 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) { 1083 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 1084 PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M)); 1085 PetscCall(MatSetType(B, ((PetscObject)A)->type_name)); 1086 PetscCall(MatMPIDenseSetPreallocation(B, NULL)); 1087 } else B = *matout; 1088 1089 m = a->A->rmap->n; 1090 n = a->A->cmap->n; 1091 PetscCall(MatDenseGetArrayRead(a->A, (const PetscScalar **)&v)); 1092 PetscCall(MatDenseGetLDA(a->A, &lda)); 1093 PetscCall(PetscMalloc1(m, &rwork)); 1094 for (i = 0; i < m; i++) rwork[i] = rstart + i; 1095 for (j = 0; j < n; j++) { 1096 PetscCall(MatSetValues(B, 1, &j, m, rwork, v, INSERT_VALUES)); 1097 v = PetscSafePointerPlusOffset(v, lda); 1098 } 1099 PetscCall(MatDenseRestoreArrayRead(a->A, (const PetscScalar **)&v)); 1100 PetscCall(PetscFree(rwork)); 1101 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 1102 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 1103 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 1104 *matout = B; 1105 } else { 1106 PetscCall(MatHeaderMerge(A, &B)); 1107 } 1108 PetscFunctionReturn(PETSC_SUCCESS); 1109 } 1110 1111 static PetscErrorCode MatDuplicate_MPIDense(Mat, MatDuplicateOption, Mat *); 1112 PETSC_INTERN PetscErrorCode MatScale_MPIDense(Mat, PetscScalar); 1113 1114 static PetscErrorCode MatSetUp_MPIDense(Mat A) 1115 { 1116 PetscFunctionBegin; 1117 PetscCall(PetscLayoutSetUp(A->rmap)); 1118 PetscCall(PetscLayoutSetUp(A->cmap)); 1119 if (!A->preallocated) PetscCall(MatMPIDenseSetPreallocation(A, NULL)); 1120 PetscFunctionReturn(PETSC_SUCCESS); 1121 } 1122 1123 static PetscErrorCode MatAXPY_MPIDense(Mat Y, PetscScalar alpha, Mat X, MatStructure str) 1124 { 1125 Mat_MPIDense *A = (Mat_MPIDense *)Y->data, *B = (Mat_MPIDense *)X->data; 1126 1127 PetscFunctionBegin; 1128 PetscCall(MatAXPY(A->A, alpha, B->A, str)); 1129 PetscFunctionReturn(PETSC_SUCCESS); 1130 } 1131 1132 static PetscErrorCode MatConjugate_MPIDense(Mat mat) 1133 { 1134 Mat_MPIDense *a = (Mat_MPIDense *)mat->data; 1135 1136 PetscFunctionBegin; 1137 PetscCall(MatConjugate(a->A)); 1138 PetscFunctionReturn(PETSC_SUCCESS); 1139 } 1140 1141 static PetscErrorCode MatRealPart_MPIDense(Mat A) 1142 { 1143 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1144 1145 PetscFunctionBegin; 1146 PetscCall(MatRealPart(a->A)); 1147 PetscFunctionReturn(PETSC_SUCCESS); 1148 } 1149 1150 static PetscErrorCode MatImaginaryPart_MPIDense(Mat A) 1151 { 1152 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1153 1154 PetscFunctionBegin; 1155 PetscCall(MatImaginaryPart(a->A)); 1156 PetscFunctionReturn(PETSC_SUCCESS); 1157 } 1158 1159 static PetscErrorCode MatGetColumnVector_MPIDense(Mat A, Vec v, PetscInt col) 1160 { 1161 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1162 1163 PetscFunctionBegin; 1164 PetscCheck(a->A, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Missing local matrix"); 1165 PetscCheck(a->A->ops->getcolumnvector, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Missing get column operation"); 1166 PetscCall((*a->A->ops->getcolumnvector)(a->A, v, col)); 1167 PetscFunctionReturn(PETSC_SUCCESS); 1168 } 1169 1170 PETSC_INTERN PetscErrorCode MatGetColumnReductions_SeqDense(Mat, PetscInt, PetscReal *); 1171 1172 static PetscErrorCode MatGetColumnReductions_MPIDense(Mat A, PetscInt type, PetscReal *reductions) 1173 { 1174 PetscInt i, m, n; 1175 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1176 1177 PetscFunctionBegin; 1178 PetscCall(MatGetSize(A, &m, &n)); 1179 if (type == REDUCTION_MEAN_REALPART) { 1180 PetscCall(MatGetColumnReductions_SeqDense(a->A, (PetscInt)REDUCTION_SUM_REALPART, reductions)); 1181 } else if (type == REDUCTION_MEAN_IMAGINARYPART) { 1182 PetscCall(MatGetColumnReductions_SeqDense(a->A, (PetscInt)REDUCTION_SUM_IMAGINARYPART, reductions)); 1183 } else { 1184 PetscCall(MatGetColumnReductions_SeqDense(a->A, type, reductions)); 1185 } 1186 if (type == NORM_2) { 1187 for (i = 0; i < n; i++) reductions[i] *= reductions[i]; 1188 } 1189 if (type == NORM_INFINITY) { 1190 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, reductions, n, MPIU_REAL, MPIU_MAX, A->hdr.comm)); 1191 } else { 1192 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, reductions, n, MPIU_REAL, MPIU_SUM, A->hdr.comm)); 1193 } 1194 if (type == NORM_2) { 1195 for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]); 1196 } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 1197 for (i = 0; i < n; i++) reductions[i] /= m; 1198 } 1199 PetscFunctionReturn(PETSC_SUCCESS); 1200 } 1201 1202 static PetscErrorCode MatSetRandom_MPIDense(Mat x, PetscRandom rctx) 1203 { 1204 Mat_MPIDense *d = (Mat_MPIDense *)x->data; 1205 1206 PetscFunctionBegin; 1207 PetscCall(MatSetRandom(d->A, rctx)); 1208 #if defined(PETSC_HAVE_DEVICE) 1209 x->offloadmask = d->A->offloadmask; 1210 #endif 1211 PetscFunctionReturn(PETSC_SUCCESS); 1212 } 1213 1214 static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A, PetscBool *missing, PetscInt *d) 1215 { 1216 PetscFunctionBegin; 1217 *missing = PETSC_FALSE; 1218 PetscFunctionReturn(PETSC_SUCCESS); 1219 } 1220 1221 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat, Mat, PetscReal, Mat); 1222 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat, Mat, Mat); 1223 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat, Mat, PetscReal, Mat); 1224 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat, Mat, Mat); 1225 static PetscErrorCode MatEqual_MPIDense(Mat, Mat, PetscBool *); 1226 static PetscErrorCode MatLoad_MPIDense(Mat, PetscViewer); 1227 static PetscErrorCode MatProductSetFromOptions_MPIDense(Mat); 1228 1229 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense, 1230 MatGetRow_MPIDense, 1231 MatRestoreRow_MPIDense, 1232 MatMult_MPIDense, 1233 /* 4*/ MatMultAdd_MPIDense, 1234 MatMultTranspose_MPIDense, 1235 MatMultTransposeAdd_MPIDense, 1236 NULL, 1237 NULL, 1238 NULL, 1239 /* 10*/ NULL, 1240 NULL, 1241 NULL, 1242 NULL, 1243 MatTranspose_MPIDense, 1244 /* 15*/ MatGetInfo_MPIDense, 1245 MatEqual_MPIDense, 1246 MatGetDiagonal_MPIDense, 1247 MatDiagonalScale_MPIDense, 1248 MatNorm_MPIDense, 1249 /* 20*/ MatAssemblyBegin_MPIDense, 1250 MatAssemblyEnd_MPIDense, 1251 MatSetOption_MPIDense, 1252 MatZeroEntries_MPIDense, 1253 /* 24*/ MatZeroRows_MPIDense, 1254 NULL, 1255 NULL, 1256 NULL, 1257 NULL, 1258 /* 29*/ MatSetUp_MPIDense, 1259 NULL, 1260 NULL, 1261 MatGetDiagonalBlock_MPIDense, 1262 NULL, 1263 /* 34*/ MatDuplicate_MPIDense, 1264 NULL, 1265 NULL, 1266 NULL, 1267 NULL, 1268 /* 39*/ MatAXPY_MPIDense, 1269 MatCreateSubMatrices_MPIDense, 1270 NULL, 1271 MatGetValues_MPIDense, 1272 MatCopy_MPIDense, 1273 /* 44*/ NULL, 1274 MatScale_MPIDense, 1275 MatShift_MPIDense, 1276 NULL, 1277 NULL, 1278 /* 49*/ MatSetRandom_MPIDense, 1279 NULL, 1280 NULL, 1281 NULL, 1282 NULL, 1283 /* 54*/ NULL, 1284 NULL, 1285 NULL, 1286 NULL, 1287 NULL, 1288 /* 59*/ MatCreateSubMatrix_MPIDense, 1289 MatDestroy_MPIDense, 1290 MatView_MPIDense, 1291 NULL, 1292 NULL, 1293 /* 64*/ NULL, 1294 NULL, 1295 NULL, 1296 NULL, 1297 NULL, 1298 /* 69*/ NULL, 1299 NULL, 1300 NULL, 1301 NULL, 1302 NULL, 1303 /* 74*/ NULL, 1304 NULL, 1305 NULL, 1306 NULL, 1307 MatLoad_MPIDense, 1308 /* 79*/ NULL, 1309 NULL, 1310 NULL, 1311 NULL, 1312 /* 83*/ NULL, 1313 NULL, 1314 NULL, 1315 NULL, 1316 MatMatTransposeMultSymbolic_MPIDense_MPIDense, 1317 MatMatTransposeMultNumeric_MPIDense_MPIDense, 1318 /* 89*/ NULL, 1319 MatProductSetFromOptions_MPIDense, 1320 NULL, 1321 NULL, 1322 MatConjugate_MPIDense, 1323 /* 94*/ NULL, 1324 NULL, 1325 MatRealPart_MPIDense, 1326 MatImaginaryPart_MPIDense, 1327 NULL, 1328 /*99*/ NULL, 1329 NULL, 1330 NULL, 1331 NULL, 1332 MatGetColumnVector_MPIDense, 1333 /*104*/ MatMissingDiagonal_MPIDense, 1334 NULL, 1335 NULL, 1336 NULL, 1337 NULL, 1338 /*109*/ NULL, 1339 NULL, 1340 NULL, 1341 MatMultHermitianTranspose_MPIDense, 1342 MatMultHermitianTransposeAdd_MPIDense, 1343 /*114*/ NULL, 1344 NULL, 1345 MatGetColumnReductions_MPIDense, 1346 NULL, 1347 NULL, 1348 NULL, 1349 /*120*/ NULL, 1350 MatTransposeMatMultSymbolic_MPIDense_MPIDense, 1351 MatTransposeMatMultNumeric_MPIDense_MPIDense, 1352 NULL, 1353 /*124*/ NULL, 1354 NULL, 1355 NULL, 1356 NULL, 1357 NULL, 1358 /*129*/ NULL, 1359 NULL, 1360 MatCreateMPIMatConcatenateSeqMat_MPIDense, 1361 NULL, 1362 NULL, 1363 /*134*/ NULL, 1364 NULL, 1365 NULL, 1366 NULL, 1367 NULL, 1368 /*139*/ NULL, 1369 NULL, 1370 NULL, 1371 NULL, 1372 NULL, 1373 NULL}; 1374 1375 static PetscErrorCode MatMPIDenseSetPreallocation_MPIDense(Mat mat, PetscScalar *data) 1376 { 1377 Mat_MPIDense *a = (Mat_MPIDense *)mat->data; 1378 MatType mtype = MATSEQDENSE; 1379 1380 PetscFunctionBegin; 1381 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1382 PetscCall(PetscLayoutSetUp(mat->rmap)); 1383 PetscCall(PetscLayoutSetUp(mat->cmap)); 1384 if (!a->A) { 1385 PetscCall(MatCreate(PETSC_COMM_SELF, &a->A)); 1386 PetscCall(MatSetSizes(a->A, mat->rmap->n, mat->cmap->N, mat->rmap->n, mat->cmap->N)); 1387 } 1388 #if defined(PETSC_HAVE_CUDA) 1389 PetscBool iscuda; 1390 PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPIDENSECUDA, &iscuda)); 1391 if (iscuda) mtype = MATSEQDENSECUDA; 1392 #endif 1393 #if defined(PETSC_HAVE_HIP) 1394 PetscBool iship; 1395 PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPIDENSEHIP, &iship)); 1396 if (iship) mtype = MATSEQDENSEHIP; 1397 #endif 1398 PetscCall(MatSetType(a->A, mtype)); 1399 PetscCall(MatSeqDenseSetPreallocation(a->A, data)); 1400 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 1401 mat->offloadmask = a->A->offloadmask; 1402 #endif 1403 mat->preallocated = PETSC_TRUE; 1404 mat->assembled = PETSC_TRUE; 1405 PetscFunctionReturn(PETSC_SUCCESS); 1406 } 1407 1408 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIDense(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 1409 { 1410 Mat B, C; 1411 1412 PetscFunctionBegin; 1413 PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &C)); 1414 PetscCall(MatConvert_SeqAIJ_SeqDense(C, MATSEQDENSE, MAT_INITIAL_MATRIX, &B)); 1415 PetscCall(MatDestroy(&C)); 1416 if (reuse == MAT_REUSE_MATRIX) { 1417 C = *newmat; 1418 } else C = NULL; 1419 PetscCall(MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A), B, A->cmap->n, !C ? MAT_INITIAL_MATRIX : MAT_REUSE_MATRIX, &C)); 1420 PetscCall(MatDestroy(&B)); 1421 if (reuse == MAT_INPLACE_MATRIX) PetscCall(MatHeaderReplace(A, &C)); 1422 else if (reuse == MAT_INITIAL_MATRIX) *newmat = C; 1423 PetscFunctionReturn(PETSC_SUCCESS); 1424 } 1425 1426 static PetscErrorCode MatConvert_MPIDense_MPIAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 1427 { 1428 Mat B, C; 1429 1430 PetscFunctionBegin; 1431 PetscCall(MatDenseGetLocalMatrix(A, &C)); 1432 PetscCall(MatConvert_SeqDense_SeqAIJ(C, MATSEQAIJ, MAT_INITIAL_MATRIX, &B)); 1433 if (reuse == MAT_REUSE_MATRIX) { 1434 C = *newmat; 1435 } else C = NULL; 1436 PetscCall(MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A), B, A->cmap->n, !C ? MAT_INITIAL_MATRIX : MAT_REUSE_MATRIX, &C)); 1437 PetscCall(MatDestroy(&B)); 1438 if (reuse == MAT_INPLACE_MATRIX) PetscCall(MatHeaderReplace(A, &C)); 1439 else if (reuse == MAT_INITIAL_MATRIX) *newmat = C; 1440 PetscFunctionReturn(PETSC_SUCCESS); 1441 } 1442 1443 #if defined(PETSC_HAVE_ELEMENTAL) 1444 PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 1445 { 1446 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1447 Mat mat_elemental; 1448 PetscScalar *v; 1449 PetscInt m = A->rmap->n, N = A->cmap->N, rstart = A->rmap->rstart, i, *rows, *cols, lda; 1450 1451 PetscFunctionBegin; 1452 if (reuse == MAT_REUSE_MATRIX) { 1453 mat_elemental = *newmat; 1454 PetscCall(MatZeroEntries(*newmat)); 1455 } else { 1456 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental)); 1457 PetscCall(MatSetSizes(mat_elemental, PETSC_DECIDE, PETSC_DECIDE, A->rmap->N, A->cmap->N)); 1458 PetscCall(MatSetType(mat_elemental, MATELEMENTAL)); 1459 PetscCall(MatSetUp(mat_elemental)); 1460 PetscCall(MatSetOption(mat_elemental, MAT_ROW_ORIENTED, PETSC_FALSE)); 1461 } 1462 1463 PetscCall(PetscMalloc2(m, &rows, N, &cols)); 1464 for (i = 0; i < N; i++) cols[i] = i; 1465 for (i = 0; i < m; i++) rows[i] = rstart + i; 1466 1467 /* PETSc-Elemental interface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */ 1468 PetscCall(MatDenseGetArray(A, &v)); 1469 PetscCall(MatDenseGetLDA(a->A, &lda)); 1470 if (lda == m) PetscCall(MatSetValues(mat_elemental, m, rows, N, cols, v, ADD_VALUES)); 1471 else { 1472 for (i = 0; i < N; i++) PetscCall(MatSetValues(mat_elemental, m, rows, 1, &i, v + lda * i, ADD_VALUES)); 1473 } 1474 PetscCall(MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY)); 1475 PetscCall(MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY)); 1476 PetscCall(MatDenseRestoreArray(A, &v)); 1477 PetscCall(PetscFree2(rows, cols)); 1478 1479 if (reuse == MAT_INPLACE_MATRIX) { 1480 PetscCall(MatHeaderReplace(A, &mat_elemental)); 1481 } else { 1482 *newmat = mat_elemental; 1483 } 1484 PetscFunctionReturn(PETSC_SUCCESS); 1485 } 1486 #endif 1487 1488 static PetscErrorCode MatDenseGetColumn_MPIDense(Mat A, PetscInt col, PetscScalar **vals) 1489 { 1490 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 1491 1492 PetscFunctionBegin; 1493 PetscCall(MatDenseGetColumn(mat->A, col, vals)); 1494 PetscFunctionReturn(PETSC_SUCCESS); 1495 } 1496 1497 static PetscErrorCode MatDenseRestoreColumn_MPIDense(Mat A, PetscScalar **vals) 1498 { 1499 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 1500 1501 PetscFunctionBegin; 1502 PetscCall(MatDenseRestoreColumn(mat->A, vals)); 1503 PetscFunctionReturn(PETSC_SUCCESS); 1504 } 1505 1506 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIDense(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat) 1507 { 1508 Mat_MPIDense *mat; 1509 PetscInt m, nloc, N; 1510 1511 PetscFunctionBegin; 1512 PetscCall(MatGetSize(inmat, &m, &N)); 1513 PetscCall(MatGetLocalSize(inmat, NULL, &nloc)); 1514 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 1515 PetscInt sum; 1516 1517 if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnership(comm, &n, &N)); 1518 /* Check sum(n) = N */ 1519 PetscCallMPI(MPIU_Allreduce(&n, &sum, 1, MPIU_INT, MPI_SUM, comm)); 1520 PetscCheck(sum == N, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT, sum, N); 1521 1522 PetscCall(MatCreateDense(comm, m, n, PETSC_DETERMINE, N, NULL, outmat)); 1523 PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 1524 } 1525 1526 /* numeric phase */ 1527 mat = (Mat_MPIDense *)(*outmat)->data; 1528 PetscCall(MatCopy(inmat, mat->A, SAME_NONZERO_PATTERN)); 1529 PetscFunctionReturn(PETSC_SUCCESS); 1530 } 1531 1532 PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat A, PetscInt col, Vec *v) 1533 { 1534 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1535 PetscInt lda; 1536 1537 PetscFunctionBegin; 1538 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1539 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1540 if (!a->cvec) PetscCall(MatDenseCreateColumnVec_Private(A, &a->cvec)); 1541 a->vecinuse = col + 1; 1542 PetscCall(MatDenseGetLDA(a->A, &lda)); 1543 PetscCall(MatDenseGetArray(a->A, (PetscScalar **)&a->ptrinuse)); 1544 PetscCall(VecPlaceArray(a->cvec, PetscSafePointerPlusOffset(a->ptrinuse, (size_t)col * (size_t)lda))); 1545 *v = a->cvec; 1546 PetscFunctionReturn(PETSC_SUCCESS); 1547 } 1548 1549 PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat A, PetscInt col, Vec *v) 1550 { 1551 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1552 1553 PetscFunctionBegin; 1554 PetscCheck(a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first"); 1555 PetscCheck(a->cvec, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing internal column vector"); 1556 VecCheckAssembled(a->cvec); 1557 a->vecinuse = 0; 1558 PetscCall(MatDenseRestoreArray(a->A, (PetscScalar **)&a->ptrinuse)); 1559 PetscCall(VecResetArray(a->cvec)); 1560 if (v) *v = NULL; 1561 PetscFunctionReturn(PETSC_SUCCESS); 1562 } 1563 1564 PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat A, PetscInt col, Vec *v) 1565 { 1566 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1567 PetscInt lda; 1568 1569 PetscFunctionBegin; 1570 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1571 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1572 if (!a->cvec) PetscCall(MatDenseCreateColumnVec_Private(A, &a->cvec)); 1573 a->vecinuse = col + 1; 1574 PetscCall(MatDenseGetLDA(a->A, &lda)); 1575 PetscCall(MatDenseGetArrayRead(a->A, &a->ptrinuse)); 1576 PetscCall(VecPlaceArray(a->cvec, PetscSafePointerPlusOffset(a->ptrinuse, (size_t)col * (size_t)lda))); 1577 PetscCall(VecLockReadPush(a->cvec)); 1578 *v = a->cvec; 1579 PetscFunctionReturn(PETSC_SUCCESS); 1580 } 1581 1582 PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat A, PetscInt col, Vec *v) 1583 { 1584 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1585 1586 PetscFunctionBegin; 1587 PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first"); 1588 PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector"); 1589 VecCheckAssembled(a->cvec); 1590 a->vecinuse = 0; 1591 PetscCall(MatDenseRestoreArrayRead(a->A, &a->ptrinuse)); 1592 PetscCall(VecLockReadPop(a->cvec)); 1593 PetscCall(VecResetArray(a->cvec)); 1594 if (v) *v = NULL; 1595 PetscFunctionReturn(PETSC_SUCCESS); 1596 } 1597 1598 PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat A, PetscInt col, Vec *v) 1599 { 1600 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1601 PetscInt lda; 1602 1603 PetscFunctionBegin; 1604 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1605 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1606 if (!a->cvec) PetscCall(MatDenseCreateColumnVec_Private(A, &a->cvec)); 1607 a->vecinuse = col + 1; 1608 PetscCall(MatDenseGetLDA(a->A, &lda)); 1609 PetscCall(MatDenseGetArrayWrite(a->A, (PetscScalar **)&a->ptrinuse)); 1610 PetscCall(VecPlaceArray(a->cvec, PetscSafePointerPlusOffset(a->ptrinuse, (size_t)col * (size_t)lda))); 1611 *v = a->cvec; 1612 PetscFunctionReturn(PETSC_SUCCESS); 1613 } 1614 1615 PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat A, PetscInt col, Vec *v) 1616 { 1617 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1618 1619 PetscFunctionBegin; 1620 PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first"); 1621 PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector"); 1622 VecCheckAssembled(a->cvec); 1623 a->vecinuse = 0; 1624 PetscCall(MatDenseRestoreArrayWrite(a->A, (PetscScalar **)&a->ptrinuse)); 1625 PetscCall(VecResetArray(a->cvec)); 1626 if (v) *v = NULL; 1627 PetscFunctionReturn(PETSC_SUCCESS); 1628 } 1629 1630 static PetscErrorCode MatDenseGetSubMatrix_MPIDense(Mat A, PetscInt rbegin, PetscInt rend, PetscInt cbegin, PetscInt cend, Mat *v) 1631 { 1632 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1633 Mat_MPIDense *c; 1634 MPI_Comm comm; 1635 PetscInt prbegin, prend, pcbegin, pcend; 1636 1637 PetscFunctionBegin; 1638 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 1639 PetscCheck(!a->vecinuse, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1640 PetscCheck(!a->matinuse, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1641 prbegin = PetscMax(0, PetscMin(A->rmap->rend, rbegin) - A->rmap->rstart); 1642 prend = PetscMin(A->rmap->n, PetscMax(0, rend - A->rmap->rstart)); 1643 pcbegin = PetscMax(0, PetscMin(A->cmap->rend, cbegin) - A->cmap->rstart); 1644 pcend = PetscMin(A->cmap->n, PetscMax(0, cend - A->cmap->rstart)); 1645 if (!a->cmat) { 1646 PetscCall(MatCreate(comm, &a->cmat)); 1647 PetscCall(MatSetType(a->cmat, ((PetscObject)A)->type_name)); 1648 if (rend - rbegin == A->rmap->N) PetscCall(PetscLayoutReference(A->rmap, &a->cmat->rmap)); 1649 else { 1650 PetscCall(PetscLayoutSetLocalSize(a->cmat->rmap, prend - prbegin)); 1651 PetscCall(PetscLayoutSetSize(a->cmat->rmap, rend - rbegin)); 1652 PetscCall(PetscLayoutSetUp(a->cmat->rmap)); 1653 } 1654 if (cend - cbegin == A->cmap->N) PetscCall(PetscLayoutReference(A->cmap, &a->cmat->cmap)); 1655 else { 1656 PetscCall(PetscLayoutSetLocalSize(a->cmat->cmap, pcend - pcbegin)); 1657 PetscCall(PetscLayoutSetSize(a->cmat->cmap, cend - cbegin)); 1658 PetscCall(PetscLayoutSetUp(a->cmat->cmap)); 1659 } 1660 c = (Mat_MPIDense *)a->cmat->data; 1661 c->sub_rbegin = rbegin; 1662 c->sub_rend = rend; 1663 c->sub_cbegin = cbegin; 1664 c->sub_cend = cend; 1665 } 1666 c = (Mat_MPIDense *)a->cmat->data; 1667 if (c->sub_rbegin != rbegin || c->sub_rend != rend) { 1668 PetscCall(PetscLayoutDestroy(&a->cmat->rmap)); 1669 PetscCall(PetscLayoutCreate(comm, &a->cmat->rmap)); 1670 PetscCall(PetscLayoutSetLocalSize(a->cmat->rmap, prend - prbegin)); 1671 PetscCall(PetscLayoutSetSize(a->cmat->rmap, rend - rbegin)); 1672 PetscCall(PetscLayoutSetUp(a->cmat->rmap)); 1673 c->sub_rbegin = rbegin; 1674 c->sub_rend = rend; 1675 } 1676 if (c->sub_cbegin != cbegin || c->sub_cend != cend) { 1677 // special optimization: check if all columns are owned by rank 0, in which case no communication is necessary 1678 if ((cend - cbegin != a->cmat->cmap->N) || (A->cmap->range[1] != A->cmap->N)) { 1679 PetscCall(PetscLayoutDestroy(&a->cmat->cmap)); 1680 PetscCall(PetscLayoutCreate(comm, &a->cmat->cmap)); 1681 PetscCall(PetscLayoutSetLocalSize(a->cmat->cmap, pcend - pcbegin)); 1682 PetscCall(PetscLayoutSetSize(a->cmat->cmap, cend - cbegin)); 1683 PetscCall(PetscLayoutSetUp(a->cmat->cmap)); 1684 PetscCall(VecDestroy(&c->lvec)); 1685 PetscCall(PetscSFDestroy(&c->Mvctx)); 1686 } 1687 c->sub_cbegin = cbegin; 1688 c->sub_cend = cend; 1689 } 1690 PetscCheck(!c->A, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1691 PetscCall(MatDenseGetSubMatrix(a->A, prbegin, prend, cbegin, cend, &c->A)); 1692 1693 a->cmat->preallocated = PETSC_TRUE; 1694 a->cmat->assembled = PETSC_TRUE; 1695 #if defined(PETSC_HAVE_DEVICE) 1696 a->cmat->offloadmask = c->A->offloadmask; 1697 #endif 1698 a->matinuse = cbegin + 1; 1699 *v = a->cmat; 1700 PetscFunctionReturn(PETSC_SUCCESS); 1701 } 1702 1703 static PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat A, Mat *v) 1704 { 1705 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1706 Mat_MPIDense *c; 1707 1708 PetscFunctionBegin; 1709 PetscCheck(a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetSubMatrix() first"); 1710 PetscCheck(a->cmat, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal matrix"); 1711 PetscCheck(*v == a->cmat, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Not the matrix obtained from MatDenseGetSubMatrix()"); 1712 a->matinuse = 0; 1713 c = (Mat_MPIDense *)a->cmat->data; 1714 PetscCall(MatDenseRestoreSubMatrix(a->A, &c->A)); 1715 if (v) *v = NULL; 1716 #if defined(PETSC_HAVE_DEVICE) 1717 A->offloadmask = a->A->offloadmask; 1718 #endif 1719 PetscFunctionReturn(PETSC_SUCCESS); 1720 } 1721 1722 /*MC 1723 MATMPIDENSE - MATMPIDENSE = "mpidense" - A matrix type to be used for distributed dense matrices. 1724 1725 Options Database Key: 1726 . -mat_type mpidense - sets the matrix type to `MATMPIDENSE` during a call to `MatSetFromOptions()` 1727 1728 Level: beginner 1729 1730 .seealso: [](ch_matrices), `Mat`, `MatCreateDense()`, `MATSEQDENSE`, `MATDENSE` 1731 M*/ 1732 PetscErrorCode MatCreate_MPIDense(Mat mat) 1733 { 1734 Mat_MPIDense *a; 1735 1736 PetscFunctionBegin; 1737 PetscCall(PetscNew(&a)); 1738 mat->data = (void *)a; 1739 mat->ops[0] = MatOps_Values; 1740 1741 mat->insertmode = NOT_SET_VALUES; 1742 1743 /* build cache for off array entries formed */ 1744 a->donotstash = PETSC_FALSE; 1745 1746 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)mat), 1, &mat->stash)); 1747 1748 /* stuff used for matrix vector multiply */ 1749 a->lvec = NULL; 1750 a->Mvctx = NULL; 1751 a->roworiented = PETSC_TRUE; 1752 1753 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetLDA_C", MatDenseGetLDA_MPIDense)); 1754 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseSetLDA_C", MatDenseSetLDA_MPIDense)); 1755 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArray_C", MatDenseGetArray_MPIDense)); 1756 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArray_C", MatDenseRestoreArray_MPIDense)); 1757 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayRead_C", MatDenseGetArrayRead_MPIDense)); 1758 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayRead_C", MatDenseRestoreArrayRead_MPIDense)); 1759 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayWrite_C", MatDenseGetArrayWrite_MPIDense)); 1760 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayWrite_C", MatDenseRestoreArrayWrite_MPIDense)); 1761 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDensePlaceArray_C", MatDensePlaceArray_MPIDense)); 1762 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseResetArray_C", MatDenseResetArray_MPIDense)); 1763 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseReplaceArray_C", MatDenseReplaceArray_MPIDense)); 1764 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", MatDenseGetColumnVec_MPIDense)); 1765 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", MatDenseRestoreColumnVec_MPIDense)); 1766 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", MatDenseGetColumnVecRead_MPIDense)); 1767 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", MatDenseRestoreColumnVecRead_MPIDense)); 1768 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", MatDenseGetColumnVecWrite_MPIDense)); 1769 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", MatDenseRestoreColumnVecWrite_MPIDense)); 1770 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetSubMatrix_C", MatDenseGetSubMatrix_MPIDense)); 1771 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreSubMatrix_C", MatDenseRestoreSubMatrix_MPIDense)); 1772 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", MatConvert_MPIAIJ_MPIDense)); 1773 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpiaij_C", MatConvert_MPIDense_MPIAIJ)); 1774 #if defined(PETSC_HAVE_ELEMENTAL) 1775 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_elemental_C", MatConvert_MPIDense_Elemental)); 1776 #endif 1777 #if defined(PETSC_HAVE_SCALAPACK) 1778 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_scalapack_C", MatConvert_Dense_ScaLAPACK)); 1779 #endif 1780 #if defined(PETSC_HAVE_CUDA) 1781 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensecuda_C", MatConvert_MPIDense_MPIDenseCUDA)); 1782 #endif 1783 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIDenseSetPreallocation_C", MatMPIDenseSetPreallocation_MPIDense)); 1784 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense)); 1785 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaij_C", MatProductSetFromOptions_MPIDense_MPIAIJ)); 1786 #if defined(PETSC_HAVE_CUDA) 1787 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense)); 1788 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijcusparse_C", MatProductSetFromOptions_MPIDense_MPIAIJ)); 1789 #endif 1790 #if defined(PETSC_HAVE_HIP) 1791 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensehip_C", MatConvert_MPIDense_MPIDenseHIP)); 1792 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense)); 1793 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijhipsparse_C", MatProductSetFromOptions_MPIDense_MPIAIJ)); 1794 #endif 1795 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumn_C", MatDenseGetColumn_MPIDense)); 1796 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumn_C", MatDenseRestoreColumn_MPIDense)); 1797 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultColumnRange_C", MatMultColumnRange_MPIDense)); 1798 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultAddColumnRange_C", MatMultAddColumnRange_MPIDense)); 1799 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultHermitianTransposeColumnRange_C", MatMultHermitianTransposeColumnRange_MPIDense)); 1800 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultHermitianTransposeAddColumnRange_C", MatMultHermitianTransposeAddColumnRange_MPIDense)); 1801 PetscCall(PetscObjectChangeTypeName((PetscObject)mat, MATMPIDENSE)); 1802 PetscFunctionReturn(PETSC_SUCCESS); 1803 } 1804 1805 /*MC 1806 MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices. 1807 1808 This matrix type is identical to `MATSEQDENSE` when constructed with a single process communicator, 1809 and `MATMPIDENSE` otherwise. 1810 1811 Options Database Key: 1812 . -mat_type dense - sets the matrix type to `MATDENSE` during a call to `MatSetFromOptions()` 1813 1814 Level: beginner 1815 1816 .seealso: [](ch_matrices), `Mat`, `MATSEQDENSE`, `MATMPIDENSE`, `MATDENSECUDA`, `MATDENSEHIP` 1817 M*/ 1818 1819 /*@ 1820 MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries 1821 1822 Collective 1823 1824 Input Parameters: 1825 + B - the matrix 1826 - data - optional location of matrix data. Set to `NULL` for PETSc 1827 to control all matrix memory allocation. 1828 1829 Level: intermediate 1830 1831 Notes: 1832 The dense format is fully compatible with standard Fortran 1833 storage by columns. 1834 1835 The data input variable is intended primarily for Fortran programmers 1836 who wish to allocate their own matrix memory space. Most users should 1837 set `data` to `NULL`. 1838 1839 .seealso: [](ch_matrices), `Mat`, `MATMPIDENSE`, `MatCreate()`, `MatCreateSeqDense()`, `MatSetValues()` 1840 @*/ 1841 PetscErrorCode MatMPIDenseSetPreallocation(Mat B, PetscScalar *data) 1842 { 1843 PetscFunctionBegin; 1844 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 1845 PetscTryMethod(B, "MatMPIDenseSetPreallocation_C", (Mat, PetscScalar *), (B, data)); 1846 PetscFunctionReturn(PETSC_SUCCESS); 1847 } 1848 1849 /*@ 1850 MatDensePlaceArray - Allows one to replace the array in a `MATDENSE` matrix with an 1851 array provided by the user. This is useful to avoid copying an array 1852 into a matrix 1853 1854 Not Collective 1855 1856 Input Parameters: 1857 + mat - the matrix 1858 - array - the array in column major order 1859 1860 Level: developer 1861 1862 Note: 1863 Adding `const` to `array` was an oversight, see notes in `VecPlaceArray()`. 1864 1865 You can return to the original array with a call to `MatDenseResetArray()`. The user is responsible for freeing this array; it will not be 1866 freed when the matrix is destroyed. 1867 1868 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseGetArray()`, `MatDenseResetArray()`, `VecPlaceArray()`, `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`, `VecResetArray()`, 1869 `MatDenseReplaceArray()` 1870 @*/ 1871 PetscErrorCode MatDensePlaceArray(Mat mat, const PetscScalar *array) 1872 { 1873 PetscFunctionBegin; 1874 PetscValidHeaderSpecific(mat, MAT_CLASSID, 1); 1875 PetscUseMethod(mat, "MatDensePlaceArray_C", (Mat, const PetscScalar *), (mat, array)); 1876 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 1877 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 1878 mat->offloadmask = PETSC_OFFLOAD_CPU; 1879 #endif 1880 PetscFunctionReturn(PETSC_SUCCESS); 1881 } 1882 1883 /*@ 1884 MatDenseResetArray - Resets the matrix array to that it previously had before the call to `MatDensePlaceArray()` 1885 1886 Not Collective 1887 1888 Input Parameter: 1889 . mat - the matrix 1890 1891 Level: developer 1892 1893 Note: 1894 You can only call this after a call to `MatDensePlaceArray()` 1895 1896 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseGetArray()`, `MatDensePlaceArray()`, `VecPlaceArray()`, `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`, `VecResetArray()` 1897 @*/ 1898 PetscErrorCode MatDenseResetArray(Mat mat) 1899 { 1900 PetscFunctionBegin; 1901 PetscValidHeaderSpecific(mat, MAT_CLASSID, 1); 1902 PetscUseMethod(mat, "MatDenseResetArray_C", (Mat), (mat)); 1903 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 1904 PetscFunctionReturn(PETSC_SUCCESS); 1905 } 1906 1907 /*@ 1908 MatDenseReplaceArray - Allows one to replace the array in a dense matrix with an 1909 array provided by the user. This is useful to avoid copying an array 1910 into a matrix 1911 1912 Not Collective 1913 1914 Input Parameters: 1915 + mat - the matrix 1916 - array - the array in column major order 1917 1918 Level: developer 1919 1920 Note: 1921 Adding `const` to `array` was an oversight, see notes in `VecPlaceArray()`. 1922 1923 The memory passed in MUST be obtained with `PetscMalloc()` and CANNOT be 1924 freed by the user. It will be freed when the matrix is destroyed. 1925 1926 .seealso: [](ch_matrices), `Mat`, `MatDensePlaceArray()`, `MatDenseGetArray()`, `VecReplaceArray()` 1927 @*/ 1928 PetscErrorCode MatDenseReplaceArray(Mat mat, const PetscScalar *array) 1929 { 1930 PetscFunctionBegin; 1931 PetscValidHeaderSpecific(mat, MAT_CLASSID, 1); 1932 PetscUseMethod(mat, "MatDenseReplaceArray_C", (Mat, const PetscScalar *), (mat, array)); 1933 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 1934 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 1935 mat->offloadmask = PETSC_OFFLOAD_CPU; 1936 #endif 1937 PetscFunctionReturn(PETSC_SUCCESS); 1938 } 1939 1940 /*@ 1941 MatCreateDense - Creates a matrix in `MATDENSE` format. 1942 1943 Collective 1944 1945 Input Parameters: 1946 + comm - MPI communicator 1947 . m - number of local rows (or `PETSC_DECIDE` to have calculated if `M` is given) 1948 . n - number of local columns (or `PETSC_DECIDE` to have calculated if `N` is given) 1949 . M - number of global rows (or `PETSC_DECIDE` to have calculated if `m` is given) 1950 . N - number of global columns (or `PETSC_DECIDE` to have calculated if `n` is given) 1951 - data - optional location of matrix data. Set data to `NULL` (`PETSC_NULL_SCALAR_ARRAY` for Fortran users) for PETSc 1952 to control all matrix memory allocation. 1953 1954 Output Parameter: 1955 . A - the matrix 1956 1957 Level: intermediate 1958 1959 Notes: 1960 The dense format is fully compatible with standard Fortran 1961 storage by columns. 1962 1963 Although local portions of the matrix are stored in column-major 1964 order, the matrix is partitioned across MPI ranks by row. 1965 1966 The data input variable is intended primarily for Fortran programmers 1967 who wish to allocate their own matrix memory space. Most users should 1968 set `data` to `NULL` (`PETSC_NULL_SCALAR_ARRAY` for Fortran users). 1969 1970 The user MUST specify either the local or global matrix dimensions 1971 (possibly both). 1972 1973 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatCreate()`, `MatCreateSeqDense()`, `MatSetValues()` 1974 @*/ 1975 PetscErrorCode MatCreateDense(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscScalar data[], Mat *A) 1976 { 1977 PetscFunctionBegin; 1978 PetscCall(MatCreate(comm, A)); 1979 PetscCall(MatSetSizes(*A, m, n, M, N)); 1980 PetscCall(MatSetType(*A, MATDENSE)); 1981 PetscCall(MatSeqDenseSetPreallocation(*A, data)); 1982 PetscCall(MatMPIDenseSetPreallocation(*A, data)); 1983 PetscFunctionReturn(PETSC_SUCCESS); 1984 } 1985 1986 static PetscErrorCode MatDuplicate_MPIDense(Mat A, MatDuplicateOption cpvalues, Mat *newmat) 1987 { 1988 Mat mat; 1989 Mat_MPIDense *a, *oldmat = (Mat_MPIDense *)A->data; 1990 1991 PetscFunctionBegin; 1992 *newmat = NULL; 1993 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat)); 1994 PetscCall(MatSetSizes(mat, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 1995 PetscCall(MatSetType(mat, ((PetscObject)A)->type_name)); 1996 a = (Mat_MPIDense *)mat->data; 1997 1998 mat->factortype = A->factortype; 1999 mat->assembled = PETSC_TRUE; 2000 mat->preallocated = PETSC_TRUE; 2001 2002 mat->insertmode = NOT_SET_VALUES; 2003 a->donotstash = oldmat->donotstash; 2004 2005 PetscCall(PetscLayoutReference(A->rmap, &mat->rmap)); 2006 PetscCall(PetscLayoutReference(A->cmap, &mat->cmap)); 2007 2008 PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A)); 2009 2010 *newmat = mat; 2011 PetscFunctionReturn(PETSC_SUCCESS); 2012 } 2013 2014 static PetscErrorCode MatLoad_MPIDense(Mat newMat, PetscViewer viewer) 2015 { 2016 PetscBool isbinary; 2017 #if defined(PETSC_HAVE_HDF5) 2018 PetscBool ishdf5; 2019 #endif 2020 2021 PetscFunctionBegin; 2022 PetscValidHeaderSpecific(newMat, MAT_CLASSID, 1); 2023 PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2); 2024 /* force binary viewer to load .info file if it has not yet done so */ 2025 PetscCall(PetscViewerSetUp(viewer)); 2026 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 2027 #if defined(PETSC_HAVE_HDF5) 2028 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5)); 2029 #endif 2030 if (isbinary) { 2031 PetscCall(MatLoad_Dense_Binary(newMat, viewer)); 2032 #if defined(PETSC_HAVE_HDF5) 2033 } else if (ishdf5) { 2034 PetscCall(MatLoad_Dense_HDF5(newMat, viewer)); 2035 #endif 2036 } else SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name); 2037 PetscFunctionReturn(PETSC_SUCCESS); 2038 } 2039 2040 static PetscErrorCode MatEqual_MPIDense(Mat A, Mat B, PetscBool *flag) 2041 { 2042 Mat_MPIDense *matB = (Mat_MPIDense *)B->data, *matA = (Mat_MPIDense *)A->data; 2043 Mat a, b; 2044 2045 PetscFunctionBegin; 2046 a = matA->A; 2047 b = matB->A; 2048 PetscCall(MatEqual(a, b, flag)); 2049 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, flag, 1, MPI_C_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A))); 2050 PetscFunctionReturn(PETSC_SUCCESS); 2051 } 2052 2053 static PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(void *data) 2054 { 2055 Mat_TransMatMultDense *atb = (Mat_TransMatMultDense *)data; 2056 2057 PetscFunctionBegin; 2058 PetscCall(PetscFree2(atb->sendbuf, atb->recvcounts)); 2059 PetscCall(MatDestroy(&atb->atb)); 2060 PetscCall(PetscFree(atb)); 2061 PetscFunctionReturn(PETSC_SUCCESS); 2062 } 2063 2064 static PetscErrorCode MatDestroy_MatMatTransMult_MPIDense_MPIDense(void *data) 2065 { 2066 Mat_MatTransMultDense *abt = (Mat_MatTransMultDense *)data; 2067 2068 PetscFunctionBegin; 2069 PetscCall(PetscFree2(abt->buf[0], abt->buf[1])); 2070 PetscCall(PetscFree2(abt->recvcounts, abt->recvdispls)); 2071 PetscCall(PetscFree(abt)); 2072 PetscFunctionReturn(PETSC_SUCCESS); 2073 } 2074 2075 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C) 2076 { 2077 Mat_MPIDense *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data; 2078 Mat_TransMatMultDense *atb; 2079 MPI_Comm comm; 2080 PetscMPIInt size, *recvcounts; 2081 PetscScalar *carray, *sendbuf; 2082 const PetscScalar *atbarray; 2083 PetscInt i, cN = C->cmap->N, proc, k, j, lda; 2084 const PetscInt *ranges; 2085 2086 PetscFunctionBegin; 2087 MatCheckProduct(C, 3); 2088 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 2089 atb = (Mat_TransMatMultDense *)C->product->data; 2090 recvcounts = atb->recvcounts; 2091 sendbuf = atb->sendbuf; 2092 2093 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 2094 PetscCallMPI(MPI_Comm_size(comm, &size)); 2095 2096 /* compute atbarray = aseq^T * bseq */ 2097 PetscCall(MatTransposeMatMult(a->A, b->A, atb->atb ? MAT_REUSE_MATRIX : MAT_INITIAL_MATRIX, PETSC_DETERMINE, &atb->atb)); 2098 2099 PetscCall(MatGetOwnershipRanges(C, &ranges)); 2100 2101 if (ranges[1] == C->rmap->N) { 2102 /* all of the values are being reduced to rank 0: optimize this case to use MPI_Reduce and GPU aware MPI if available */ 2103 PetscInt atb_lda, c_lda; 2104 Mat atb_local = atb->atb; 2105 Mat atb_alloc = NULL; 2106 Mat c_local = c->A; 2107 Mat c_alloc = NULL; 2108 PetscMemType atb_memtype, c_memtype; 2109 const PetscScalar *atb_array = NULL; 2110 MPI_Datatype vector_type; 2111 PetscScalar *c_array = NULL; 2112 PetscMPIInt rank; 2113 2114 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 2115 2116 PetscCall(MatDenseGetLDA(atb_local, &atb_lda)); 2117 if (atb_lda != C->rmap->N) { 2118 // copy atb to a matrix that will have lda == the number of rows 2119 PetscCall(MatDuplicate(atb_local, MAT_DO_NOT_COPY_VALUES, &atb_alloc)); 2120 PetscCall(MatCopy(atb_local, atb_alloc, DIFFERENT_NONZERO_PATTERN)); 2121 atb_local = atb_alloc; 2122 } 2123 2124 if (rank == 0) { 2125 PetscCall(MatDenseGetLDA(c_local, &c_lda)); 2126 if (c_lda != C->rmap->N) { 2127 // copy c to a matrix that will have lda == the number of rows 2128 PetscCall(MatDuplicate(c_local, MAT_DO_NOT_COPY_VALUES, &c_alloc)); 2129 c_local = c_alloc; 2130 } 2131 PetscCall(MatZeroEntries(c_local)); 2132 } 2133 /* atb_local and c_local have nrows = lda = A->cmap->N and ncols = 2134 * B->cmap->N: use the a->Mvctx to use the best reduction method */ 2135 if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A)); 2136 vector_type = MPIU_SCALAR; 2137 if (B->cmap->N > 1) { 2138 PetscMPIInt mpi_N; 2139 2140 PetscCall(PetscMPIIntCast(B->cmap->N, &mpi_N)); 2141 PetscCallMPI(MPI_Type_contiguous(mpi_N, MPIU_SCALAR, &vector_type)); 2142 PetscCallMPI(MPI_Type_commit(&vector_type)); 2143 } 2144 PetscCall(MatDenseGetArrayReadAndMemType(atb_local, &atb_array, &atb_memtype)); 2145 PetscCall(MatDenseGetArrayWriteAndMemType(c_local, &c_array, &c_memtype)); 2146 PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, vector_type, atb_memtype, atb_array, c_memtype, c_array, MPIU_SUM)); 2147 PetscCall(PetscSFReduceEnd(a->Mvctx, vector_type, atb_array, c_array, MPIU_SUM)); 2148 PetscCall(MatDenseRestoreArrayWriteAndMemType(c_local, &c_array)); 2149 PetscCall(MatDenseRestoreArrayReadAndMemType(atb_local, &atb_array)); 2150 if (rank == 0 && c_local != c->A) PetscCall(MatCopy(c_local, c->A, DIFFERENT_NONZERO_PATTERN)); 2151 if (B->cmap->N > 1) PetscCallMPI(MPI_Type_free(&vector_type)); 2152 PetscCall(MatDestroy(&atb_alloc)); 2153 PetscCall(MatDestroy(&c_alloc)); 2154 PetscCall(MatSetOption(C, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 2155 PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY)); 2156 PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY)); 2157 PetscFunctionReturn(PETSC_SUCCESS); 2158 } 2159 2160 /* arrange atbarray into sendbuf */ 2161 PetscCall(MatDenseGetArrayRead(atb->atb, &atbarray)); 2162 PetscCall(MatDenseGetLDA(atb->atb, &lda)); 2163 for (proc = 0, k = 0; proc < size; proc++) { 2164 for (j = 0; j < cN; j++) { 2165 for (i = ranges[proc]; i < ranges[proc + 1]; i++) sendbuf[k++] = atbarray[i + j * lda]; 2166 } 2167 } 2168 PetscCall(MatDenseRestoreArrayRead(atb->atb, &atbarray)); 2169 2170 /* sum all atbarray to local values of C */ 2171 PetscCall(MatDenseGetArrayWrite(c->A, &carray)); 2172 PetscCallMPI(MPI_Reduce_scatter(sendbuf, carray, recvcounts, MPIU_SCALAR, MPIU_SUM, comm)); 2173 PetscCall(MatDenseRestoreArrayWrite(c->A, &carray)); 2174 PetscCall(MatSetOption(C, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 2175 PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY)); 2176 PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY)); 2177 PetscFunctionReturn(PETSC_SUCCESS); 2178 } 2179 2180 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C) 2181 { 2182 MPI_Comm comm; 2183 PetscMPIInt size; 2184 PetscInt cm = A->cmap->n, cM, cN = B->cmap->N; 2185 Mat_TransMatMultDense *atb; 2186 PetscBool cisdense = PETSC_FALSE; 2187 const PetscInt *ranges; 2188 2189 PetscFunctionBegin; 2190 MatCheckProduct(C, 4); 2191 PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty"); 2192 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 2193 PetscCheck(A->rmap->rstart == B->rmap->rstart && A->rmap->rend == B->rmap->rend, comm, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->rmap->rstart, 2194 A->rmap->rend, B->rmap->rstart, B->rmap->rend); 2195 2196 /* create matrix product C */ 2197 PetscCall(MatSetSizes(C, cm, B->cmap->n, A->cmap->N, B->cmap->N)); 2198 #if defined(PETSC_HAVE_CUDA) 2199 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSECUDA, "")); 2200 #endif 2201 #if defined(PETSC_HAVE_HIP) 2202 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSEHIP, "")); 2203 #endif 2204 if (!cisdense) PetscCall(MatSetType(C, ((PetscObject)A)->type_name)); 2205 PetscCall(MatSetUp(C)); 2206 2207 /* create data structure for reuse C */ 2208 PetscCallMPI(MPI_Comm_size(comm, &size)); 2209 PetscCall(PetscNew(&atb)); 2210 cM = C->rmap->N; 2211 PetscCall(PetscMalloc2(cM * cN, &atb->sendbuf, size, &atb->recvcounts)); 2212 PetscCall(MatGetOwnershipRanges(C, &ranges)); 2213 for (PetscMPIInt i = 0; i < size; i++) PetscCall(PetscMPIIntCast((ranges[i + 1] - ranges[i]) * cN, &atb->recvcounts[i])); 2214 C->product->data = atb; 2215 C->product->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense; 2216 PetscFunctionReturn(PETSC_SUCCESS); 2217 } 2218 2219 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C) 2220 { 2221 MPI_Comm comm; 2222 PetscMPIInt i, size; 2223 PetscInt maxRows, bufsiz; 2224 PetscMPIInt tag; 2225 PetscInt alg; 2226 Mat_MatTransMultDense *abt; 2227 Mat_Product *product = C->product; 2228 PetscBool flg; 2229 2230 PetscFunctionBegin; 2231 MatCheckProduct(C, 4); 2232 PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty"); 2233 /* check local size of A and B */ 2234 PetscCheck(A->cmap->n == B->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local column dimensions are incompatible, A (%" PetscInt_FMT ") != B (%" PetscInt_FMT ")", A->cmap->n, B->cmap->n); 2235 2236 PetscCall(PetscStrcmp(product->alg, "allgatherv", &flg)); 2237 alg = flg ? 0 : 1; 2238 2239 /* setup matrix product C */ 2240 PetscCall(MatSetSizes(C, A->rmap->n, B->rmap->n, A->rmap->N, B->rmap->N)); 2241 PetscCall(MatSetType(C, MATMPIDENSE)); 2242 PetscCall(MatSetUp(C)); 2243 PetscCall(PetscObjectGetNewTag((PetscObject)C, &tag)); 2244 2245 /* create data structure for reuse C */ 2246 PetscCall(PetscObjectGetComm((PetscObject)C, &comm)); 2247 PetscCallMPI(MPI_Comm_size(comm, &size)); 2248 PetscCall(PetscNew(&abt)); 2249 abt->tag = tag; 2250 abt->alg = alg; 2251 switch (alg) { 2252 case 1: /* alg: "cyclic" */ 2253 for (maxRows = 0, i = 0; i < size; i++) maxRows = PetscMax(maxRows, B->rmap->range[i + 1] - B->rmap->range[i]); 2254 bufsiz = A->cmap->N * maxRows; 2255 PetscCall(PetscMalloc2(bufsiz, &abt->buf[0], bufsiz, &abt->buf[1])); 2256 break; 2257 default: /* alg: "allgatherv" */ 2258 PetscCall(PetscMalloc2(B->rmap->n * B->cmap->N, &abt->buf[0], B->rmap->N * B->cmap->N, &abt->buf[1])); 2259 PetscCall(PetscMalloc2(size, &abt->recvcounts, size + 1, &abt->recvdispls)); 2260 for (i = 0; i <= size; i++) PetscCall(PetscMPIIntCast(B->rmap->range[i] * A->cmap->N, &abt->recvdispls[i])); 2261 for (i = 0; i < size; i++) PetscCall(PetscMPIIntCast(abt->recvdispls[i + 1] - abt->recvdispls[i], &abt->recvcounts[i])); 2262 break; 2263 } 2264 2265 C->product->data = abt; 2266 C->product->destroy = MatDestroy_MatMatTransMult_MPIDense_MPIDense; 2267 C->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_MPIDense_MPIDense; 2268 PetscFunctionReturn(PETSC_SUCCESS); 2269 } 2270 2271 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(Mat A, Mat B, Mat C) 2272 { 2273 Mat_MPIDense *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data; 2274 Mat_MatTransMultDense *abt; 2275 MPI_Comm comm; 2276 PetscMPIInt rank, size, sendto, recvfrom, recvisfrom; 2277 PetscScalar *sendbuf, *recvbuf = NULL, *cv; 2278 PetscInt i, cK = A->cmap->N, sendsiz, recvsiz, k, j, bn; 2279 PetscScalar _DOne = 1.0, _DZero = 0.0; 2280 const PetscScalar *av, *bv; 2281 PetscBLASInt cm, cn, ck, alda, blda = 0, clda; 2282 MPI_Request reqs[2]; 2283 const PetscInt *ranges; 2284 2285 PetscFunctionBegin; 2286 MatCheckProduct(C, 3); 2287 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 2288 abt = (Mat_MatTransMultDense *)C->product->data; 2289 PetscCall(PetscObjectGetComm((PetscObject)C, &comm)); 2290 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 2291 PetscCallMPI(MPI_Comm_size(comm, &size)); 2292 PetscCall(MatDenseGetArrayRead(a->A, &av)); 2293 PetscCall(MatDenseGetArrayRead(b->A, &bv)); 2294 PetscCall(MatDenseGetArrayWrite(c->A, &cv)); 2295 PetscCall(MatDenseGetLDA(a->A, &i)); 2296 PetscCall(PetscBLASIntCast(i, &alda)); 2297 PetscCall(MatDenseGetLDA(b->A, &i)); 2298 PetscCall(PetscBLASIntCast(i, &blda)); 2299 PetscCall(MatDenseGetLDA(c->A, &i)); 2300 PetscCall(PetscBLASIntCast(i, &clda)); 2301 PetscCall(MatGetOwnershipRanges(B, &ranges)); 2302 bn = B->rmap->n; 2303 if (blda == bn) { 2304 sendbuf = (PetscScalar *)bv; 2305 } else { 2306 sendbuf = abt->buf[0]; 2307 for (k = 0, i = 0; i < cK; i++) { 2308 for (j = 0; j < bn; j++, k++) sendbuf[k] = bv[i * blda + j]; 2309 } 2310 } 2311 if (size > 1) { 2312 sendto = (rank + size - 1) % size; 2313 recvfrom = (rank + size + 1) % size; 2314 } else { 2315 sendto = recvfrom = 0; 2316 } 2317 PetscCall(PetscBLASIntCast(cK, &ck)); 2318 PetscCall(PetscBLASIntCast(c->A->rmap->n, &cm)); 2319 recvisfrom = rank; 2320 for (i = 0; i < size; i++) { 2321 /* we have finished receiving in sending, bufs can be read/modified */ 2322 PetscMPIInt nextrecvisfrom = (recvisfrom + 1) % size; /* which process the next recvbuf will originate on */ 2323 PetscInt nextbn = ranges[nextrecvisfrom + 1] - ranges[nextrecvisfrom]; 2324 2325 if (nextrecvisfrom != rank) { 2326 /* start the cyclic sends from sendbuf, to recvbuf (which will switch to sendbuf) */ 2327 sendsiz = cK * bn; 2328 recvsiz = cK * nextbn; 2329 recvbuf = (i & 1) ? abt->buf[0] : abt->buf[1]; 2330 PetscCallMPI(MPIU_Isend(sendbuf, sendsiz, MPIU_SCALAR, sendto, abt->tag, comm, &reqs[0])); 2331 PetscCallMPI(MPIU_Irecv(recvbuf, recvsiz, MPIU_SCALAR, recvfrom, abt->tag, comm, &reqs[1])); 2332 } 2333 2334 /* local aseq * sendbuf^T */ 2335 PetscCall(PetscBLASIntCast(ranges[recvisfrom + 1] - ranges[recvisfrom], &cn)); 2336 if (cm && cn && ck) PetscCallBLAS("BLASgemm", BLASgemm_("N", "T", &cm, &cn, &ck, &_DOne, av, &alda, sendbuf, &cn, &_DZero, cv + clda * ranges[recvisfrom], &clda)); 2337 2338 if (nextrecvisfrom != rank) { 2339 /* wait for the sends and receives to complete, swap sendbuf and recvbuf */ 2340 PetscCallMPI(MPI_Waitall(2, reqs, MPI_STATUSES_IGNORE)); 2341 } 2342 bn = nextbn; 2343 recvisfrom = nextrecvisfrom; 2344 sendbuf = recvbuf; 2345 } 2346 PetscCall(MatDenseRestoreArrayRead(a->A, &av)); 2347 PetscCall(MatDenseRestoreArrayRead(b->A, &bv)); 2348 PetscCall(MatDenseRestoreArrayWrite(c->A, &cv)); 2349 PetscCall(MatSetOption(C, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 2350 PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY)); 2351 PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY)); 2352 PetscFunctionReturn(PETSC_SUCCESS); 2353 } 2354 2355 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(Mat A, Mat B, Mat C) 2356 { 2357 Mat_MPIDense *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data; 2358 Mat_MatTransMultDense *abt; 2359 MPI_Comm comm; 2360 PetscMPIInt size, ibn; 2361 PetscScalar *cv, *sendbuf, *recvbuf; 2362 const PetscScalar *av, *bv; 2363 PetscInt blda, i, cK = A->cmap->N, k, j, bn; 2364 PetscScalar _DOne = 1.0, _DZero = 0.0; 2365 PetscBLASInt cm, cn, ck, alda, clda; 2366 2367 PetscFunctionBegin; 2368 MatCheckProduct(C, 3); 2369 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 2370 abt = (Mat_MatTransMultDense *)C->product->data; 2371 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 2372 PetscCallMPI(MPI_Comm_size(comm, &size)); 2373 PetscCall(MatDenseGetArrayRead(a->A, &av)); 2374 PetscCall(MatDenseGetArrayRead(b->A, &bv)); 2375 PetscCall(MatDenseGetArrayWrite(c->A, &cv)); 2376 PetscCall(MatDenseGetLDA(a->A, &i)); 2377 PetscCall(PetscBLASIntCast(i, &alda)); 2378 PetscCall(MatDenseGetLDA(b->A, &blda)); 2379 PetscCall(MatDenseGetLDA(c->A, &i)); 2380 PetscCall(PetscBLASIntCast(i, &clda)); 2381 /* copy transpose of B into buf[0] */ 2382 bn = B->rmap->n; 2383 sendbuf = abt->buf[0]; 2384 recvbuf = abt->buf[1]; 2385 for (k = 0, j = 0; j < bn; j++) { 2386 for (i = 0; i < cK; i++, k++) sendbuf[k] = bv[i * blda + j]; 2387 } 2388 PetscCall(MatDenseRestoreArrayRead(b->A, &bv)); 2389 PetscCall(PetscMPIIntCast(bn * cK, &ibn)); 2390 PetscCallMPI(MPI_Allgatherv(sendbuf, ibn, MPIU_SCALAR, recvbuf, abt->recvcounts, abt->recvdispls, MPIU_SCALAR, comm)); 2391 PetscCall(PetscBLASIntCast(cK, &ck)); 2392 PetscCall(PetscBLASIntCast(c->A->rmap->n, &cm)); 2393 PetscCall(PetscBLASIntCast(c->A->cmap->n, &cn)); 2394 if (cm && cn && ck) PetscCallBLAS("BLASgemm", BLASgemm_("N", "N", &cm, &cn, &ck, &_DOne, av, &alda, recvbuf, &ck, &_DZero, cv, &clda)); 2395 PetscCall(MatDenseRestoreArrayRead(a->A, &av)); 2396 PetscCall(MatDenseRestoreArrayRead(b->A, &bv)); 2397 PetscCall(MatDenseRestoreArrayWrite(c->A, &cv)); 2398 PetscCall(MatSetOption(C, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 2399 PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY)); 2400 PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY)); 2401 PetscFunctionReturn(PETSC_SUCCESS); 2402 } 2403 2404 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C) 2405 { 2406 Mat_MatTransMultDense *abt; 2407 2408 PetscFunctionBegin; 2409 MatCheckProduct(C, 3); 2410 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 2411 abt = (Mat_MatTransMultDense *)C->product->data; 2412 switch (abt->alg) { 2413 case 1: 2414 PetscCall(MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(A, B, C)); 2415 break; 2416 default: 2417 PetscCall(MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(A, B, C)); 2418 break; 2419 } 2420 PetscFunctionReturn(PETSC_SUCCESS); 2421 } 2422 2423 static PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(void *data) 2424 { 2425 Mat_MatMultDense *ab = (Mat_MatMultDense *)data; 2426 2427 PetscFunctionBegin; 2428 PetscCall(MatDestroy(&ab->Ce)); 2429 PetscCall(MatDestroy(&ab->Ae)); 2430 PetscCall(MatDestroy(&ab->Be)); 2431 PetscCall(PetscFree(ab)); 2432 PetscFunctionReturn(PETSC_SUCCESS); 2433 } 2434 2435 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C) 2436 { 2437 Mat_MatMultDense *ab; 2438 Mat_MPIDense *mdn = (Mat_MPIDense *)A->data; 2439 Mat_MPIDense *b = (Mat_MPIDense *)B->data; 2440 2441 PetscFunctionBegin; 2442 MatCheckProduct(C, 3); 2443 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Missing product data"); 2444 ab = (Mat_MatMultDense *)C->product->data; 2445 if (ab->Ae && ab->Ce) { 2446 #if PetscDefined(HAVE_ELEMENTAL) 2447 PetscCall(MatConvert_MPIDense_Elemental(A, MATELEMENTAL, MAT_REUSE_MATRIX, &ab->Ae)); 2448 PetscCall(MatConvert_MPIDense_Elemental(B, MATELEMENTAL, MAT_REUSE_MATRIX, &ab->Be)); 2449 PetscCall(MatMatMultNumeric_Elemental(ab->Ae, ab->Be, ab->Ce)); 2450 PetscCall(MatConvert(ab->Ce, MATMPIDENSE, MAT_REUSE_MATRIX, &C)); 2451 #else 2452 SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "PETSC_HAVE_ELEMENTAL not defined"); 2453 #endif 2454 } else { 2455 MPI_Comm comm; 2456 const PetscScalar *read; 2457 PetscScalar *write; 2458 PetscInt lda; 2459 const PetscInt *ranges; 2460 PetscMPIInt size; 2461 2462 if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A)); /* cannot be done during the symbolic phase because of possible calls to MatProductReplaceMats() */ 2463 comm = PetscObjectComm((PetscObject)B); 2464 PetscCallMPI(MPI_Comm_size(comm, &size)); 2465 PetscCall(PetscLayoutGetRanges(B->rmap, &ranges)); 2466 if (ranges[1] == ranges[size]) { 2467 // optimize for the case where the B matrix is broadcast from rank 0 2468 PetscInt b_lda, be_lda; 2469 Mat b_local = b->A; 2470 Mat b_alloc = NULL; 2471 Mat be_local = ab->Be; 2472 Mat be_alloc = NULL; 2473 PetscMemType b_memtype, be_memtype; 2474 const PetscScalar *b_array = NULL; 2475 MPI_Datatype vector_type; 2476 PetscScalar *be_array = NULL; 2477 PetscMPIInt rank; 2478 2479 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 2480 PetscCall(MatDenseGetLDA(be_local, &be_lda)); 2481 if (be_lda != B->rmap->N) { 2482 PetscCall(MatDuplicate(be_local, MAT_DO_NOT_COPY_VALUES, &be_alloc)); 2483 be_local = be_alloc; 2484 } 2485 2486 if (rank == 0) { 2487 PetscCall(MatDenseGetLDA(b_local, &b_lda)); 2488 if (b_lda != B->rmap->N) { 2489 PetscCall(MatDuplicate(b_local, MAT_DO_NOT_COPY_VALUES, &b_alloc)); 2490 PetscCall(MatCopy(b_local, b_alloc, DIFFERENT_NONZERO_PATTERN)); 2491 b_local = b_alloc; 2492 } 2493 } 2494 vector_type = MPIU_SCALAR; 2495 if (B->cmap->N > 1) { 2496 PetscMPIInt mpi_N; 2497 2498 PetscCall(PetscMPIIntCast(B->cmap->N, &mpi_N)); 2499 PetscCallMPI(MPI_Type_contiguous(mpi_N, MPIU_SCALAR, &vector_type)); 2500 PetscCallMPI(MPI_Type_commit(&vector_type)); 2501 } 2502 PetscCall(MatDenseGetArrayReadAndMemType(b_local, &b_array, &b_memtype)); 2503 PetscCall(MatDenseGetArrayWriteAndMemType(be_local, &be_array, &be_memtype)); 2504 PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, vector_type, b_memtype, b_array, be_memtype, be_array, MPI_REPLACE)); 2505 PetscCall(PetscSFBcastEnd(mdn->Mvctx, vector_type, b_array, be_array, MPI_REPLACE)); 2506 PetscCall(MatDenseRestoreArrayWriteAndMemType(be_local, &be_array)); 2507 PetscCall(MatDenseRestoreArrayReadAndMemType(b_local, &b_array)); 2508 if (be_local != ab->Be) PetscCall(MatCopy(be_local, ab->Be, DIFFERENT_NONZERO_PATTERN)); 2509 if (B->cmap->N > 1) PetscCallMPI(MPI_Type_free(&vector_type)); 2510 PetscCall(MatDestroy(&be_alloc)); 2511 PetscCall(MatDestroy(&b_alloc)); 2512 } else { 2513 PetscCall(MatDenseGetLDA(B, &lda)); 2514 PetscCall(MatDenseGetArrayRead(B, &read)); 2515 PetscCall(MatDenseGetArrayWrite(ab->Be, &write)); 2516 for (PetscInt i = 0; i < C->cmap->N; ++i) { 2517 PetscCall(PetscSFBcastBegin(mdn->Mvctx, MPIU_SCALAR, read + i * lda, write + i * ab->Be->rmap->n, MPI_REPLACE)); 2518 PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, read + i * lda, write + i * ab->Be->rmap->n, MPI_REPLACE)); 2519 } 2520 PetscCall(MatDenseRestoreArrayWrite(ab->Be, &write)); 2521 PetscCall(MatDenseRestoreArrayRead(B, &read)); 2522 } 2523 PetscCall(MatMatMultNumeric_SeqDense_SeqDense(((Mat_MPIDense *)A->data)->A, ab->Be, ((Mat_MPIDense *)C->data)->A)); 2524 } 2525 PetscFunctionReturn(PETSC_SUCCESS); 2526 } 2527 2528 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C) 2529 { 2530 Mat_Product *product = C->product; 2531 PetscInt alg; 2532 Mat_MatMultDense *ab; 2533 PetscBool flg; 2534 2535 PetscFunctionBegin; 2536 MatCheckProduct(C, 4); 2537 PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty"); 2538 /* check local size of A and B */ 2539 PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ", %" PetscInt_FMT ")", 2540 A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend); 2541 2542 PetscCall(PetscStrcmp(product->alg, "petsc", &flg)); 2543 alg = flg ? 0 : 1; 2544 2545 /* setup C */ 2546 PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, A->rmap->N, B->cmap->N)); 2547 PetscCall(MatSetType(C, MATMPIDENSE)); 2548 PetscCall(MatSetUp(C)); 2549 2550 /* create data structure for reuse Cdense */ 2551 PetscCall(PetscNew(&ab)); 2552 2553 switch (alg) { 2554 case 1: /* alg: "elemental" */ 2555 #if PetscDefined(HAVE_ELEMENTAL) 2556 /* create elemental matrices Ae and Be */ 2557 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &ab->Ae)); 2558 PetscCall(MatSetSizes(ab->Ae, PETSC_DECIDE, PETSC_DECIDE, A->rmap->N, A->cmap->N)); 2559 PetscCall(MatSetType(ab->Ae, MATELEMENTAL)); 2560 PetscCall(MatSetUp(ab->Ae)); 2561 PetscCall(MatSetOption(ab->Ae, MAT_ROW_ORIENTED, PETSC_FALSE)); 2562 2563 PetscCall(MatCreate(PetscObjectComm((PetscObject)B), &ab->Be)); 2564 PetscCall(MatSetSizes(ab->Be, PETSC_DECIDE, PETSC_DECIDE, B->rmap->N, B->cmap->N)); 2565 PetscCall(MatSetType(ab->Be, MATELEMENTAL)); 2566 PetscCall(MatSetUp(ab->Be)); 2567 PetscCall(MatSetOption(ab->Be, MAT_ROW_ORIENTED, PETSC_FALSE)); 2568 2569 /* compute symbolic Ce = Ae*Be */ 2570 PetscCall(MatCreate(PetscObjectComm((PetscObject)C), &ab->Ce)); 2571 PetscCall(MatMatMultSymbolic_Elemental(ab->Ae, ab->Be, fill, ab->Ce)); 2572 #else 2573 SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "PETSC_HAVE_ELEMENTAL not defined"); 2574 #endif 2575 break; 2576 default: /* alg: "petsc" */ 2577 ab->Ae = NULL; 2578 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, A->cmap->N, B->cmap->N, NULL, &ab->Be)); 2579 ab->Ce = NULL; 2580 break; 2581 } 2582 2583 C->product->data = ab; 2584 C->product->destroy = MatDestroy_MatMatMult_MPIDense_MPIDense; 2585 C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense; 2586 PetscFunctionReturn(PETSC_SUCCESS); 2587 } 2588 2589 static PetscErrorCode MatProductSetFromOptions_MPIDense_AB(Mat C) 2590 { 2591 Mat_Product *product = C->product; 2592 const char *algTypes[2] = {"petsc", "elemental"}; 2593 PetscInt alg, nalg = PetscDefined(HAVE_ELEMENTAL) ? 2 : 1; 2594 PetscBool flg = PETSC_FALSE; 2595 2596 PetscFunctionBegin; 2597 /* Set default algorithm */ 2598 alg = 0; /* default is PETSc */ 2599 PetscCall(PetscStrcmp(product->alg, "default", &flg)); 2600 if (flg) PetscCall(MatProductSetAlgorithm(C, algTypes[alg])); 2601 2602 /* Get runtime option */ 2603 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_AB", "Mat"); 2604 PetscCall(PetscOptionsEList("-mat_product_algorithm", "Algorithmic approach", "MatProduct_AB", algTypes, nalg, algTypes[alg], &alg, &flg)); 2605 PetscOptionsEnd(); 2606 if (flg) PetscCall(MatProductSetAlgorithm(C, algTypes[alg])); 2607 2608 C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIDense; 2609 C->ops->productsymbolic = MatProductSymbolic_AB; 2610 PetscFunctionReturn(PETSC_SUCCESS); 2611 } 2612 2613 static PetscErrorCode MatProductSetFromOptions_MPIDense_AtB(Mat C) 2614 { 2615 Mat_Product *product = C->product; 2616 Mat A = product->A, B = product->B; 2617 2618 PetscFunctionBegin; 2619 PetscCheck(A->rmap->rstart == B->rmap->rstart && A->rmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", 2620 A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend); 2621 C->ops->transposematmultsymbolic = MatTransposeMatMultSymbolic_MPIDense_MPIDense; 2622 C->ops->productsymbolic = MatProductSymbolic_AtB; 2623 PetscFunctionReturn(PETSC_SUCCESS); 2624 } 2625 2626 static PetscErrorCode MatProductSetFromOptions_MPIDense_ABt(Mat C) 2627 { 2628 Mat_Product *product = C->product; 2629 const char *algTypes[2] = {"allgatherv", "cyclic"}; 2630 PetscInt alg, nalg = 2; 2631 PetscBool flg = PETSC_FALSE; 2632 2633 PetscFunctionBegin; 2634 /* Set default algorithm */ 2635 alg = 0; /* default is allgatherv */ 2636 PetscCall(PetscStrcmp(product->alg, "default", &flg)); 2637 if (flg) PetscCall(MatProductSetAlgorithm(C, algTypes[alg])); 2638 2639 /* Get runtime option */ 2640 if (product->api_user) { 2641 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatMatTransposeMult", "Mat"); 2642 PetscCall(PetscOptionsEList("-matmattransmult_mpidense_mpidense_via", "Algorithmic approach", "MatMatTransposeMult", algTypes, nalg, algTypes[alg], &alg, &flg)); 2643 PetscOptionsEnd(); 2644 } else { 2645 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_ABt", "Mat"); 2646 PetscCall(PetscOptionsEList("-mat_product_algorithm", "Algorithmic approach", "MatProduct_ABt", algTypes, nalg, algTypes[alg], &alg, &flg)); 2647 PetscOptionsEnd(); 2648 } 2649 if (flg) PetscCall(MatProductSetAlgorithm(C, algTypes[alg])); 2650 2651 C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_MPIDense_MPIDense; 2652 C->ops->productsymbolic = MatProductSymbolic_ABt; 2653 PetscFunctionReturn(PETSC_SUCCESS); 2654 } 2655 2656 static PetscErrorCode MatProductSetFromOptions_MPIDense(Mat C) 2657 { 2658 Mat_Product *product = C->product; 2659 2660 PetscFunctionBegin; 2661 switch (product->type) { 2662 case MATPRODUCT_AB: 2663 PetscCall(MatProductSetFromOptions_MPIDense_AB(C)); 2664 break; 2665 case MATPRODUCT_AtB: 2666 PetscCall(MatProductSetFromOptions_MPIDense_AtB(C)); 2667 break; 2668 case MATPRODUCT_ABt: 2669 PetscCall(MatProductSetFromOptions_MPIDense_ABt(C)); 2670 break; 2671 default: 2672 break; 2673 } 2674 PetscFunctionReturn(PETSC_SUCCESS); 2675 } 2676