1 /* 2 Basic functions for basic parallel dense matrices. 3 Portions of this code are under: 4 Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. 5 */ 6 7 #include <../src/mat/impls/dense/mpi/mpidense.h> /*I "petscmat.h" I*/ 8 #include <../src/mat/impls/aij/mpi/mpiaij.h> 9 #include <petscblaslapack.h> 10 #include <petsc/private/vecimpl.h> 11 #include <petscdevice.h> 12 #include <petsc/private/deviceimpl.h> 13 14 /*@ 15 MatDenseGetLocalMatrix - For a `MATMPIDENSE` or `MATSEQDENSE` matrix returns the sequential 16 matrix that represents the operator. For sequential matrices it returns itself. 17 18 Input Parameter: 19 . A - the sequential or MPI `MATDENSE` matrix 20 21 Output Parameter: 22 . B - the inner matrix 23 24 Level: intermediate 25 26 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MATMPIDENSE`, `MATSEQDENSE` 27 @*/ 28 PetscErrorCode MatDenseGetLocalMatrix(Mat A, Mat *B) 29 { 30 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 31 PetscBool flg; 32 33 PetscFunctionBegin; 34 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 35 PetscAssertPointer(B, 2); 36 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIDENSE, &flg)); 37 if (flg) *B = mat->A; 38 else { 39 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQDENSE, &flg)); 40 PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Not for matrix type %s", ((PetscObject)A)->type_name); 41 *B = A; 42 } 43 PetscFunctionReturn(PETSC_SUCCESS); 44 } 45 46 static PetscErrorCode MatCopy_MPIDense(Mat A, Mat B, MatStructure s) 47 { 48 Mat_MPIDense *Amat = (Mat_MPIDense *)A->data; 49 Mat_MPIDense *Bmat = (Mat_MPIDense *)B->data; 50 51 PetscFunctionBegin; 52 PetscCall(MatCopy(Amat->A, Bmat->A, s)); 53 PetscFunctionReturn(PETSC_SUCCESS); 54 } 55 56 PetscErrorCode MatShift_MPIDense(Mat A, PetscScalar alpha) 57 { 58 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 59 PetscInt j, lda, rstart = A->rmap->rstart, rend = A->rmap->rend, rend2; 60 PetscScalar *v; 61 62 PetscFunctionBegin; 63 PetscCall(MatDenseGetArray(mat->A, &v)); 64 PetscCall(MatDenseGetLDA(mat->A, &lda)); 65 rend2 = PetscMin(rend, A->cmap->N); 66 if (rend2 > rstart) { 67 for (j = rstart; j < rend2; j++) v[j - rstart + j * lda] += alpha; 68 PetscCall(PetscLogFlops(rend2 - rstart)); 69 } 70 PetscCall(MatDenseRestoreArray(mat->A, &v)); 71 PetscFunctionReturn(PETSC_SUCCESS); 72 } 73 74 static PetscErrorCode MatGetRow_MPIDense(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 75 { 76 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 77 PetscInt lrow, rstart = A->rmap->rstart, rend = A->rmap->rend; 78 79 PetscFunctionBegin; 80 PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "only local rows"); 81 lrow = row - rstart; 82 PetscCall(MatGetRow(mat->A, lrow, nz, (const PetscInt **)idx, (const PetscScalar **)v)); 83 PetscFunctionReturn(PETSC_SUCCESS); 84 } 85 86 static PetscErrorCode MatRestoreRow_MPIDense(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 87 { 88 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 89 PetscInt lrow, rstart = A->rmap->rstart, rend = A->rmap->rend; 90 91 PetscFunctionBegin; 92 PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "only local rows"); 93 lrow = row - rstart; 94 PetscCall(MatRestoreRow(mat->A, lrow, nz, (const PetscInt **)idx, (const PetscScalar **)v)); 95 PetscFunctionReturn(PETSC_SUCCESS); 96 } 97 98 static PetscErrorCode MatGetDiagonalBlock_MPIDense(Mat A, Mat *a) 99 { 100 Mat_MPIDense *mdn = (Mat_MPIDense *)A->data; 101 PetscInt m = A->rmap->n, rstart = A->rmap->rstart; 102 PetscScalar *array; 103 MPI_Comm comm; 104 PetscBool flg; 105 Mat B; 106 107 PetscFunctionBegin; 108 PetscCall(MatHasCongruentLayouts(A, &flg)); 109 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only square matrices supported."); 110 PetscCall(PetscObjectQuery((PetscObject)A, "DiagonalBlock", (PetscObject *)&B)); 111 if (!B) { /* This should use MatDenseGetSubMatrix (not create), but we would need a call like MatRestoreDiagonalBlock */ 112 #if PetscDefined(HAVE_CUDA) 113 PetscCall(PetscObjectTypeCompare((PetscObject)mdn->A, MATSEQDENSECUDA, &flg)); 114 PetscCheck(!flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature", MATSEQDENSECUDA); 115 #elif PetscDefined(HAVE_HIP) 116 PetscCall(PetscObjectTypeCompare((PetscObject)mdn->A, MATSEQDENSEHIP, &flg)); 117 PetscCheck(!flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature", MATSEQDENSEHIP); 118 #endif 119 PetscCall(PetscObjectGetComm((PetscObject)mdn->A, &comm)); 120 PetscCall(MatCreate(comm, &B)); 121 PetscCall(MatSetSizes(B, m, m, m, m)); 122 PetscCall(MatSetType(B, ((PetscObject)mdn->A)->type_name)); 123 PetscCall(MatDenseGetArrayRead(mdn->A, (const PetscScalar **)&array)); 124 PetscCall(MatSeqDenseSetPreallocation(B, array + m * rstart)); 125 PetscCall(MatDenseRestoreArrayRead(mdn->A, (const PetscScalar **)&array)); 126 PetscCall(PetscObjectCompose((PetscObject)A, "DiagonalBlock", (PetscObject)B)); 127 *a = B; 128 PetscCall(MatDestroy(&B)); 129 } else *a = B; 130 PetscFunctionReturn(PETSC_SUCCESS); 131 } 132 133 static PetscErrorCode MatSetValues_MPIDense(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], const PetscScalar v[], InsertMode addv) 134 { 135 Mat_MPIDense *A = (Mat_MPIDense *)mat->data; 136 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend, row; 137 PetscBool roworiented = A->roworiented; 138 139 PetscFunctionBegin; 140 for (i = 0; i < m; i++) { 141 if (idxm[i] < 0) continue; 142 PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large"); 143 if (idxm[i] >= rstart && idxm[i] < rend) { 144 row = idxm[i] - rstart; 145 if (roworiented) { 146 PetscCall(MatSetValues(A->A, 1, &row, n, idxn, PetscSafePointerPlusOffset(v, i * n), addv)); 147 } else { 148 for (j = 0; j < n; j++) { 149 if (idxn[j] < 0) continue; 150 PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large"); 151 PetscCall(MatSetValues(A->A, 1, &row, 1, &idxn[j], PetscSafePointerPlusOffset(v, i + j * m), addv)); 152 } 153 } 154 } else if (!A->donotstash) { 155 mat->assembled = PETSC_FALSE; 156 if (roworiented) { 157 PetscCall(MatStashValuesRow_Private(&mat->stash, idxm[i], n, idxn, PetscSafePointerPlusOffset(v, i * n), PETSC_FALSE)); 158 } else { 159 PetscCall(MatStashValuesCol_Private(&mat->stash, idxm[i], n, idxn, PetscSafePointerPlusOffset(v, i), m, PETSC_FALSE)); 160 } 161 } 162 } 163 PetscFunctionReturn(PETSC_SUCCESS); 164 } 165 166 static PetscErrorCode MatGetValues_MPIDense(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[]) 167 { 168 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 169 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend, row; 170 171 PetscFunctionBegin; 172 for (i = 0; i < m; i++) { 173 if (idxm[i] < 0) continue; /* negative row */ 174 PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large"); 175 PetscCheck(idxm[i] >= rstart && idxm[i] < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported"); 176 row = idxm[i] - rstart; 177 for (j = 0; j < n; j++) { 178 if (idxn[j] < 0) continue; /* negative column */ 179 PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large"); 180 PetscCall(MatGetValues(mdn->A, 1, &row, 1, &idxn[j], v + i * n + j)); 181 } 182 } 183 PetscFunctionReturn(PETSC_SUCCESS); 184 } 185 186 static PetscErrorCode MatDenseGetLDA_MPIDense(Mat A, PetscInt *lda) 187 { 188 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 189 190 PetscFunctionBegin; 191 PetscCall(MatDenseGetLDA(a->A, lda)); 192 PetscFunctionReturn(PETSC_SUCCESS); 193 } 194 195 static PetscErrorCode MatDenseSetLDA_MPIDense(Mat A, PetscInt lda) 196 { 197 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 198 MatType mtype = MATSEQDENSE; 199 200 PetscFunctionBegin; 201 if (!a->A) { 202 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 203 PetscCall(PetscLayoutSetUp(A->rmap)); 204 PetscCall(PetscLayoutSetUp(A->cmap)); 205 PetscCall(MatCreate(PETSC_COMM_SELF, &a->A)); 206 PetscCall(MatSetSizes(a->A, A->rmap->n, A->cmap->N, A->rmap->n, A->cmap->N)); 207 #if PetscDefined(HAVE_CUDA) 208 PetscBool iscuda; 209 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIDENSECUDA, &iscuda)); 210 if (iscuda) mtype = MATSEQDENSECUDA; 211 #elif PetscDefined(HAVE_HIP) 212 PetscBool iship; 213 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIDENSEHIP, &iship)); 214 if (iship) mtype = MATSEQDENSEHIP; 215 #endif 216 PetscCall(MatSetType(a->A, mtype)); 217 } 218 PetscCall(MatDenseSetLDA(a->A, lda)); 219 PetscFunctionReturn(PETSC_SUCCESS); 220 } 221 222 static PetscErrorCode MatDenseGetArray_MPIDense(Mat A, PetscScalar **array) 223 { 224 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 225 226 PetscFunctionBegin; 227 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 228 PetscCall(MatDenseGetArray(a->A, array)); 229 PetscFunctionReturn(PETSC_SUCCESS); 230 } 231 232 static PetscErrorCode MatDenseGetArrayRead_MPIDense(Mat A, PetscScalar **array) 233 { 234 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 235 236 PetscFunctionBegin; 237 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 238 PetscCall(MatDenseGetArrayRead(a->A, (const PetscScalar **)array)); 239 PetscFunctionReturn(PETSC_SUCCESS); 240 } 241 242 static PetscErrorCode MatDenseGetArrayWrite_MPIDense(Mat A, PetscScalar **array) 243 { 244 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 245 246 PetscFunctionBegin; 247 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 248 PetscCall(MatDenseGetArrayWrite(a->A, array)); 249 PetscFunctionReturn(PETSC_SUCCESS); 250 } 251 252 static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A, const PetscScalar *array) 253 { 254 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 255 256 PetscFunctionBegin; 257 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 258 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 259 PetscCall(MatDensePlaceArray(a->A, array)); 260 PetscFunctionReturn(PETSC_SUCCESS); 261 } 262 263 static PetscErrorCode MatDenseResetArray_MPIDense(Mat A) 264 { 265 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 266 267 PetscFunctionBegin; 268 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 269 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 270 PetscCall(MatDenseResetArray(a->A)); 271 PetscFunctionReturn(PETSC_SUCCESS); 272 } 273 274 static PetscErrorCode MatDenseReplaceArray_MPIDense(Mat A, const PetscScalar *array) 275 { 276 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 277 278 PetscFunctionBegin; 279 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 280 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 281 PetscCall(MatDenseReplaceArray(a->A, array)); 282 PetscFunctionReturn(PETSC_SUCCESS); 283 } 284 285 static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A, IS isrow, IS iscol, MatReuse scall, Mat *B) 286 { 287 Mat_MPIDense *mat = (Mat_MPIDense *)A->data, *newmatd; 288 PetscInt lda, i, j, rstart, rend, nrows, ncols, Ncols, nlrows, nlcols; 289 const PetscInt *irow, *icol; 290 const PetscScalar *v; 291 PetscScalar *bv; 292 Mat newmat; 293 IS iscol_local; 294 MPI_Comm comm_is, comm_mat; 295 296 PetscFunctionBegin; 297 PetscCall(PetscObjectGetComm((PetscObject)A, &comm_mat)); 298 PetscCall(PetscObjectGetComm((PetscObject)iscol, &comm_is)); 299 PetscCheck(comm_mat == comm_is, PETSC_COMM_SELF, PETSC_ERR_ARG_NOTSAMECOMM, "IS communicator must match matrix communicator"); 300 301 PetscCall(ISAllGather(iscol, &iscol_local)); 302 PetscCall(ISGetIndices(isrow, &irow)); 303 PetscCall(ISGetIndices(iscol_local, &icol)); 304 PetscCall(ISGetLocalSize(isrow, &nrows)); 305 PetscCall(ISGetLocalSize(iscol, &ncols)); 306 PetscCall(ISGetSize(iscol, &Ncols)); /* global number of columns, size of iscol_local */ 307 308 /* No parallel redistribution currently supported! Should really check each index set 309 to confirm that it is OK. ... Currently supports only submatrix same partitioning as 310 original matrix! */ 311 312 PetscCall(MatGetLocalSize(A, &nlrows, &nlcols)); 313 PetscCall(MatGetOwnershipRange(A, &rstart, &rend)); 314 315 /* Check submatrix call */ 316 if (scall == MAT_REUSE_MATRIX) { 317 /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */ 318 /* Really need to test rows and column sizes! */ 319 newmat = *B; 320 } else { 321 /* Create and fill new matrix */ 322 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &newmat)); 323 PetscCall(MatSetSizes(newmat, nrows, ncols, PETSC_DECIDE, Ncols)); 324 PetscCall(MatSetType(newmat, ((PetscObject)A)->type_name)); 325 PetscCall(MatMPIDenseSetPreallocation(newmat, NULL)); 326 } 327 328 /* Now extract the data pointers and do the copy, column at a time */ 329 newmatd = (Mat_MPIDense *)newmat->data; 330 PetscCall(MatDenseGetArray(newmatd->A, &bv)); 331 PetscCall(MatDenseGetArrayRead(mat->A, &v)); 332 PetscCall(MatDenseGetLDA(mat->A, &lda)); 333 for (i = 0; i < Ncols; i++) { 334 const PetscScalar *av = v + lda * icol[i]; 335 for (j = 0; j < nrows; j++) *bv++ = av[irow[j] - rstart]; 336 } 337 PetscCall(MatDenseRestoreArrayRead(mat->A, &v)); 338 PetscCall(MatDenseRestoreArray(newmatd->A, &bv)); 339 340 /* Assemble the matrices so that the correct flags are set */ 341 PetscCall(MatAssemblyBegin(newmat, MAT_FINAL_ASSEMBLY)); 342 PetscCall(MatAssemblyEnd(newmat, MAT_FINAL_ASSEMBLY)); 343 344 /* Free work space */ 345 PetscCall(ISRestoreIndices(isrow, &irow)); 346 PetscCall(ISRestoreIndices(iscol_local, &icol)); 347 PetscCall(ISDestroy(&iscol_local)); 348 *B = newmat; 349 PetscFunctionReturn(PETSC_SUCCESS); 350 } 351 352 static PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A, PetscScalar **array) 353 { 354 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 355 356 PetscFunctionBegin; 357 PetscCall(MatDenseRestoreArray(a->A, array)); 358 PetscFunctionReturn(PETSC_SUCCESS); 359 } 360 361 static PetscErrorCode MatDenseRestoreArrayRead_MPIDense(Mat A, PetscScalar **array) 362 { 363 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 364 365 PetscFunctionBegin; 366 PetscCall(MatDenseRestoreArrayRead(a->A, (const PetscScalar **)array)); 367 PetscFunctionReturn(PETSC_SUCCESS); 368 } 369 370 static PetscErrorCode MatDenseRestoreArrayWrite_MPIDense(Mat A, PetscScalar **array) 371 { 372 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 373 374 PetscFunctionBegin; 375 PetscCall(MatDenseRestoreArrayWrite(a->A, array)); 376 PetscFunctionReturn(PETSC_SUCCESS); 377 } 378 379 static PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat, MatAssemblyType mode) 380 { 381 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 382 PetscInt nstash, reallocs; 383 384 PetscFunctionBegin; 385 if (mdn->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS); 386 387 PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range)); 388 PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs)); 389 PetscCall(PetscInfo(mdn->A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 390 PetscFunctionReturn(PETSC_SUCCESS); 391 } 392 393 static PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat, MatAssemblyType mode) 394 { 395 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 396 PetscInt i, *row, *col, flg, j, rstart, ncols; 397 PetscMPIInt n; 398 PetscScalar *val; 399 400 PetscFunctionBegin; 401 if (!mdn->donotstash && !mat->nooffprocentries) { 402 /* wait on receives */ 403 while (1) { 404 PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg)); 405 if (!flg) break; 406 407 for (i = 0; i < n;) { 408 /* Now identify the consecutive vals belonging to the same row */ 409 for (j = i, rstart = row[j]; j < n; j++) { 410 if (row[j] != rstart) break; 411 } 412 if (j < n) ncols = j - i; 413 else ncols = n - i; 414 /* Now assemble all these values with a single function call */ 415 PetscCall(MatSetValues_MPIDense(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode)); 416 i = j; 417 } 418 } 419 PetscCall(MatStashScatterEnd_Private(&mat->stash)); 420 } 421 422 PetscCall(MatAssemblyBegin(mdn->A, mode)); 423 PetscCall(MatAssemblyEnd(mdn->A, mode)); 424 PetscFunctionReturn(PETSC_SUCCESS); 425 } 426 427 static PetscErrorCode MatZeroEntries_MPIDense(Mat A) 428 { 429 Mat_MPIDense *l = (Mat_MPIDense *)A->data; 430 431 PetscFunctionBegin; 432 PetscCall(MatZeroEntries(l->A)); 433 PetscFunctionReturn(PETSC_SUCCESS); 434 } 435 436 static PetscErrorCode MatZeroRows_MPIDense(Mat A, PetscInt n, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 437 { 438 Mat_MPIDense *l = (Mat_MPIDense *)A->data; 439 PetscInt i, len, *lrows; 440 441 PetscFunctionBegin; 442 /* get locally owned rows */ 443 PetscCall(PetscLayoutMapLocal(A->rmap, n, rows, &len, &lrows, NULL)); 444 /* fix right-hand side if needed */ 445 if (x && b) { 446 const PetscScalar *xx; 447 PetscScalar *bb; 448 449 PetscCall(VecGetArrayRead(x, &xx)); 450 PetscCall(VecGetArrayWrite(b, &bb)); 451 for (i = 0; i < len; ++i) bb[lrows[i]] = diag * xx[lrows[i]]; 452 PetscCall(VecRestoreArrayRead(x, &xx)); 453 PetscCall(VecRestoreArrayWrite(b, &bb)); 454 } 455 PetscCall(MatZeroRows(l->A, len, lrows, 0.0, NULL, NULL)); 456 if (diag != 0.0) { 457 Vec d; 458 459 PetscCall(MatCreateVecs(A, NULL, &d)); 460 PetscCall(VecSet(d, diag)); 461 PetscCall(MatDiagonalSet(A, d, INSERT_VALUES)); 462 PetscCall(VecDestroy(&d)); 463 } 464 PetscCall(PetscFree(lrows)); 465 PetscFunctionReturn(PETSC_SUCCESS); 466 } 467 468 PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat, Vec, Vec); 469 PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat, Vec, Vec, Vec); 470 PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat, Vec, Vec); 471 PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat, Vec, Vec, Vec); 472 473 static PetscErrorCode MatMultColumnRange_MPIDense(Mat mat, Vec xx, Vec yy, PetscInt c_start, PetscInt c_end) 474 { 475 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 476 const PetscScalar *ax; 477 PetscScalar *ay; 478 PetscMemType axmtype, aymtype; 479 480 PetscFunctionBegin; 481 if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat)); 482 PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype)); 483 PetscCall(VecGetArrayWriteAndMemType(mdn->lvec, &ay, &aymtype)); 484 PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE)); 485 PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE)); 486 PetscCall(VecRestoreArrayWriteAndMemType(mdn->lvec, &ay)); 487 PetscCall(VecRestoreArrayReadAndMemType(xx, &ax)); 488 PetscUseMethod(mdn->A, "MatMultColumnRange_C", (Mat, Vec, Vec, PetscInt, PetscInt), (mdn->A, mdn->lvec, yy, c_start, c_end)); 489 PetscFunctionReturn(PETSC_SUCCESS); 490 } 491 492 static PetscErrorCode MatMult_MPIDense(Mat mat, Vec xx, Vec yy) 493 { 494 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 495 const PetscScalar *ax; 496 PetscScalar *ay; 497 PetscMemType axmtype, aymtype; 498 499 PetscFunctionBegin; 500 if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat)); 501 PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype)); 502 PetscCall(VecGetArrayWriteAndMemType(mdn->lvec, &ay, &aymtype)); 503 PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE)); 504 PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE)); 505 PetscCall(VecRestoreArrayWriteAndMemType(mdn->lvec, &ay)); 506 PetscCall(VecRestoreArrayReadAndMemType(xx, &ax)); 507 PetscCall((*mdn->A->ops->mult)(mdn->A, mdn->lvec, yy)); 508 PetscFunctionReturn(PETSC_SUCCESS); 509 } 510 511 static PetscErrorCode MatMultAddColumnRange_MPIDense(Mat mat, Vec xx, Vec yy, Vec zz, PetscInt c_start, PetscInt c_end) 512 { 513 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 514 const PetscScalar *ax; 515 PetscScalar *ay; 516 PetscMemType axmtype, aymtype; 517 518 PetscFunctionBegin; 519 if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat)); 520 PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype)); 521 PetscCall(VecGetArrayAndMemType(mdn->lvec, &ay, &aymtype)); 522 PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE)); 523 PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE)); 524 PetscCall(VecRestoreArrayAndMemType(mdn->lvec, &ay)); 525 PetscCall(VecRestoreArrayReadAndMemType(xx, &ax)); 526 PetscUseMethod(mdn->A, "MatMultAddColumnRange_C", (Mat, Vec, Vec, Vec, PetscInt, PetscInt), (mdn->A, mdn->lvec, yy, zz, c_start, c_end)); 527 PetscFunctionReturn(PETSC_SUCCESS); 528 } 529 530 static PetscErrorCode MatMultAdd_MPIDense(Mat mat, Vec xx, Vec yy, Vec zz) 531 { 532 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 533 const PetscScalar *ax; 534 PetscScalar *ay; 535 PetscMemType axmtype, aymtype; 536 537 PetscFunctionBegin; 538 if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat)); 539 PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype)); 540 PetscCall(VecGetArrayAndMemType(mdn->lvec, &ay, &aymtype)); 541 PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE)); 542 PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE)); 543 PetscCall(VecRestoreArrayAndMemType(mdn->lvec, &ay)); 544 PetscCall(VecRestoreArrayReadAndMemType(xx, &ax)); 545 PetscCall((*mdn->A->ops->multadd)(mdn->A, mdn->lvec, yy, zz)); 546 PetscFunctionReturn(PETSC_SUCCESS); 547 } 548 549 static PetscErrorCode MatMultHermitianTransposeColumnRange_MPIDense(Mat A, Vec xx, Vec yy, PetscInt c_start, PetscInt c_end) 550 { 551 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 552 const PetscScalar *ax; 553 PetscScalar *ay; 554 PetscMemType axmtype, aymtype; 555 PetscInt r_start, r_end; 556 PetscInt c_start_local, c_end_local; 557 558 PetscFunctionBegin; 559 if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A)); 560 PetscCall(VecZeroEntries(a->lvec)); 561 PetscCall(VecGetOwnershipRange(yy, &r_start, &r_end)); 562 c_start_local = PetscMax(c_start, r_start); 563 c_end_local = PetscMin(c_end, r_end); 564 PetscCall(VecGetArrayAndMemType(yy, &ay, &aymtype)); 565 if (c_end_local > c_start_local) { 566 if (PetscMemTypeHost(aymtype)) { 567 PetscCall(PetscArrayzero(&ay[c_start_local], (size_t)(c_end_local - c_start_local))); 568 } else { 569 PetscCall(PetscDeviceRegisterMemory(ay, aymtype, sizeof(*ay) * ((size_t)(r_end - r_start)))); 570 PetscCall(PetscDeviceArrayZero(NULL, &ay[c_start_local], (size_t)(c_end_local - c_start_local))); 571 } 572 } 573 PetscUseMethod(a->A, "MatMultHermitianTransposeColumnRange_C", (Mat, Vec, Vec, PetscInt, PetscInt), (a->A, xx, a->lvec, c_start, c_end)); 574 PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype)); 575 PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM)); 576 PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM)); 577 PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax)); 578 PetscCall(VecRestoreArrayAndMemType(yy, &ay)); 579 PetscFunctionReturn(PETSC_SUCCESS); 580 } 581 582 static PetscErrorCode MatMultTransposeKernel_MPIDense(Mat A, Vec xx, Vec yy, PetscBool herm) 583 { 584 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 585 const PetscScalar *ax; 586 PetscScalar *ay; 587 PetscMemType axmtype, aymtype; 588 589 PetscFunctionBegin; 590 if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A)); 591 PetscCall(VecSet(yy, 0.0)); 592 if (herm) PetscCall((*a->A->ops->multhermitiantranspose)(a->A, xx, a->lvec)); 593 else PetscCall((*a->A->ops->multtranspose)(a->A, xx, a->lvec)); 594 PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype)); 595 PetscCall(VecGetArrayAndMemType(yy, &ay, &aymtype)); 596 PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM)); 597 PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM)); 598 PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax)); 599 PetscCall(VecRestoreArrayAndMemType(yy, &ay)); 600 PetscFunctionReturn(PETSC_SUCCESS); 601 } 602 603 static PetscErrorCode MatMultHermitianTransposeAddColumnRange_MPIDense(Mat A, Vec xx, Vec yy, Vec zz, PetscInt c_start, PetscInt c_end) 604 { 605 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 606 const PetscScalar *ax; 607 PetscScalar *ay; 608 PetscMemType axmtype, aymtype; 609 610 PetscFunctionBegin; 611 if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A)); 612 PetscCall(VecCopy(yy, zz)); 613 PetscCall(VecZeroEntries(a->lvec)); 614 PetscUseMethod(a->A, "MatMultHermitianTransposeColumnRange_C", (Mat, Vec, Vec, PetscInt, PetscInt), (a->A, xx, a->lvec, c_start, c_end)); 615 PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype)); 616 PetscCall(VecGetArrayAndMemType(zz, &ay, &aymtype)); 617 PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM)); 618 PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM)); 619 PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax)); 620 PetscCall(VecRestoreArrayAndMemType(zz, &ay)); 621 PetscFunctionReturn(PETSC_SUCCESS); 622 } 623 624 static PetscErrorCode MatMultTransposeAddKernel_MPIDense(Mat A, Vec xx, Vec yy, Vec zz, PetscBool herm) 625 { 626 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 627 const PetscScalar *ax; 628 PetscScalar *ay; 629 PetscMemType axmtype, aymtype; 630 631 PetscFunctionBegin; 632 if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A)); 633 PetscCall(VecCopy(yy, zz)); 634 if (herm) PetscCall((*a->A->ops->multhermitiantranspose)(a->A, xx, a->lvec)); 635 else PetscCall((*a->A->ops->multtranspose)(a->A, xx, a->lvec)); 636 PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype)); 637 PetscCall(VecGetArrayAndMemType(zz, &ay, &aymtype)); 638 PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM)); 639 PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM)); 640 PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax)); 641 PetscCall(VecRestoreArrayAndMemType(zz, &ay)); 642 PetscFunctionReturn(PETSC_SUCCESS); 643 } 644 645 static PetscErrorCode MatMultTranspose_MPIDense(Mat A, Vec xx, Vec yy) 646 { 647 PetscFunctionBegin; 648 PetscCall(MatMultTransposeKernel_MPIDense(A, xx, yy, PETSC_FALSE)); 649 PetscFunctionReturn(PETSC_SUCCESS); 650 } 651 652 static PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A, Vec xx, Vec yy, Vec zz) 653 { 654 PetscFunctionBegin; 655 PetscCall(MatMultTransposeAddKernel_MPIDense(A, xx, yy, zz, PETSC_FALSE)); 656 PetscFunctionReturn(PETSC_SUCCESS); 657 } 658 659 static PetscErrorCode MatMultHermitianTranspose_MPIDense(Mat A, Vec xx, Vec yy) 660 { 661 PetscFunctionBegin; 662 PetscCall(MatMultTransposeKernel_MPIDense(A, xx, yy, PETSC_TRUE)); 663 PetscFunctionReturn(PETSC_SUCCESS); 664 } 665 666 static PetscErrorCode MatMultHermitianTransposeAdd_MPIDense(Mat A, Vec xx, Vec yy, Vec zz) 667 { 668 PetscFunctionBegin; 669 PetscCall(MatMultTransposeAddKernel_MPIDense(A, xx, yy, zz, PETSC_TRUE)); 670 PetscFunctionReturn(PETSC_SUCCESS); 671 } 672 673 PetscErrorCode MatGetDiagonal_MPIDense(Mat A, Vec v) 674 { 675 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 676 PetscInt lda, len, i, nl, ng, m = A->rmap->n, radd; 677 PetscScalar *x; 678 const PetscScalar *av; 679 680 PetscFunctionBegin; 681 PetscCall(VecGetArray(v, &x)); 682 PetscCall(VecGetSize(v, &ng)); 683 PetscCheck(ng == A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming mat and vec"); 684 PetscCall(VecGetLocalSize(v, &nl)); 685 len = PetscMin(a->A->rmap->n, a->A->cmap->n); 686 radd = A->rmap->rstart * m; 687 PetscCall(MatDenseGetArrayRead(a->A, &av)); 688 PetscCall(MatDenseGetLDA(a->A, &lda)); 689 for (i = 0; i < len; i++) x[i] = av[radd + i * lda + i]; 690 PetscCall(MatDenseRestoreArrayRead(a->A, &av)); 691 if (nl - i > 0) PetscCall(PetscArrayzero(x + i, nl - i)); 692 PetscCall(VecRestoreArray(v, &x)); 693 PetscFunctionReturn(PETSC_SUCCESS); 694 } 695 696 static PetscErrorCode MatDestroy_MPIDense(Mat mat) 697 { 698 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 699 700 PetscFunctionBegin; 701 PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N)); 702 PetscCall(MatStashDestroy_Private(&mat->stash)); 703 PetscCheck(!mdn->vecinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 704 PetscCheck(!mdn->matinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 705 PetscCall(MatDestroy(&mdn->A)); 706 PetscCall(VecDestroy(&mdn->lvec)); 707 PetscCall(PetscSFDestroy(&mdn->Mvctx)); 708 PetscCall(VecDestroy(&mdn->cvec)); 709 PetscCall(MatDestroy(&mdn->cmat)); 710 711 PetscCall(PetscFree(mat->data)); 712 PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL)); 713 714 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetLDA_C", NULL)); 715 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseSetLDA_C", NULL)); 716 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArray_C", NULL)); 717 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArray_C", NULL)); 718 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayRead_C", NULL)); 719 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayRead_C", NULL)); 720 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayWrite_C", NULL)); 721 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayWrite_C", NULL)); 722 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDensePlaceArray_C", NULL)); 723 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseResetArray_C", NULL)); 724 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseReplaceArray_C", NULL)); 725 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", NULL)); 726 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpiaij_C", NULL)); 727 #if defined(PETSC_HAVE_ELEMENTAL) 728 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_elemental_C", NULL)); 729 #endif 730 #if defined(PETSC_HAVE_SCALAPACK) 731 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_scalapack_C", NULL)); 732 #endif 733 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIDenseSetPreallocation_C", NULL)); 734 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidense_C", NULL)); 735 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaij_C", NULL)); 736 #if defined(PETSC_HAVE_CUDA) 737 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidense_C", NULL)); 738 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijcusparse_C", NULL)); 739 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensecuda_C", NULL)); 740 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidensecuda_mpidense_C", NULL)); 741 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidensecuda_C", NULL)); 742 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C", NULL)); 743 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensecuda_mpiaij_C", NULL)); 744 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C", NULL)); 745 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArray_C", NULL)); 746 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArrayRead_C", NULL)); 747 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArrayWrite_C", NULL)); 748 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArray_C", NULL)); 749 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArrayRead_C", NULL)); 750 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArrayWrite_C", NULL)); 751 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAPlaceArray_C", NULL)); 752 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAResetArray_C", NULL)); 753 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAReplaceArray_C", NULL)); 754 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDASetPreallocation_C", NULL)); 755 #endif 756 #if defined(PETSC_HAVE_HIP) 757 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidense_C", NULL)); 758 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijhipsparse_C", NULL)); 759 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensehip_C", NULL)); 760 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidensehip_mpidense_C", NULL)); 761 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidensehip_C", NULL)); 762 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidensehip_C", NULL)); 763 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensehip_mpiaij_C", NULL)); 764 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensehip_mpiaijhipsparse_C", NULL)); 765 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArray_C", NULL)); 766 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArrayRead_C", NULL)); 767 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArrayWrite_C", NULL)); 768 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArray_C", NULL)); 769 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArrayRead_C", NULL)); 770 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArrayWrite_C", NULL)); 771 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPPlaceArray_C", NULL)); 772 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPResetArray_C", NULL)); 773 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPReplaceArray_C", NULL)); 774 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPSetPreallocation_C", NULL)); 775 #endif 776 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumn_C", NULL)); 777 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumn_C", NULL)); 778 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", NULL)); 779 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", NULL)); 780 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", NULL)); 781 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", NULL)); 782 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", NULL)); 783 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", NULL)); 784 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetSubMatrix_C", NULL)); 785 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreSubMatrix_C", NULL)); 786 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultColumnRange_C", NULL)); 787 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultAddColumnRange_C", NULL)); 788 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultHermitianTransposeColumnRange_C", NULL)); 789 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultHermitianTransposeAddColumnRange_C", NULL)); 790 791 PetscCall(PetscObjectCompose((PetscObject)mat, "DiagonalBlock", NULL)); 792 PetscFunctionReturn(PETSC_SUCCESS); 793 } 794 795 #include <petscdraw.h> 796 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat, PetscViewer viewer) 797 { 798 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 799 PetscMPIInt rank; 800 PetscViewerType vtype; 801 PetscBool isascii, isdraw; 802 PetscViewer sviewer; 803 PetscViewerFormat format; 804 805 PetscFunctionBegin; 806 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 807 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii)); 808 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 809 if (isascii) { 810 PetscCall(PetscViewerGetType(viewer, &vtype)); 811 PetscCall(PetscViewerGetFormat(viewer, &format)); 812 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 813 MatInfo info; 814 PetscCall(MatGetInfo(mat, MAT_LOCAL, &info)); 815 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 816 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %" PetscInt_FMT " \n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, 817 (PetscInt)info.memory)); 818 PetscCall(PetscViewerFlush(viewer)); 819 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 820 if (mdn->Mvctx) PetscCall(PetscSFView(mdn->Mvctx, viewer)); 821 PetscFunctionReturn(PETSC_SUCCESS); 822 } else if (format == PETSC_VIEWER_ASCII_INFO) { 823 PetscFunctionReturn(PETSC_SUCCESS); 824 } 825 } else if (isdraw) { 826 PetscDraw draw; 827 PetscBool isnull; 828 829 PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw)); 830 PetscCall(PetscDrawIsNull(draw, &isnull)); 831 if (isnull) PetscFunctionReturn(PETSC_SUCCESS); 832 } 833 834 { 835 /* assemble the entire matrix onto first processor. */ 836 Mat A; 837 PetscInt M = mat->rmap->N, N = mat->cmap->N, m, row, i, nz; 838 PetscInt *cols; 839 PetscScalar *vals; 840 841 PetscCall(MatCreate(PetscObjectComm((PetscObject)mat), &A)); 842 if (rank == 0) { 843 PetscCall(MatSetSizes(A, M, N, M, N)); 844 } else { 845 PetscCall(MatSetSizes(A, 0, 0, M, N)); 846 } 847 /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */ 848 PetscCall(MatSetType(A, MATMPIDENSE)); 849 PetscCall(MatMPIDenseSetPreallocation(A, NULL)); 850 851 /* Copy the matrix ... This isn't the most efficient means, 852 but it's quick for now */ 853 A->insertmode = INSERT_VALUES; 854 855 row = mat->rmap->rstart; 856 m = mdn->A->rmap->n; 857 for (i = 0; i < m; i++) { 858 PetscCall(MatGetRow_MPIDense(mat, row, &nz, &cols, &vals)); 859 PetscCall(MatSetValues_MPIDense(A, 1, &row, nz, cols, vals, INSERT_VALUES)); 860 PetscCall(MatRestoreRow_MPIDense(mat, row, &nz, &cols, &vals)); 861 row++; 862 } 863 864 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 865 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 866 PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 867 if (rank == 0) { 868 PetscCall(PetscObjectSetName((PetscObject)((Mat_MPIDense *)A->data)->A, ((PetscObject)mat)->name)); 869 PetscCall(MatView_SeqDense(((Mat_MPIDense *)A->data)->A, sviewer)); 870 } 871 PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 872 PetscCall(MatDestroy(&A)); 873 } 874 PetscFunctionReturn(PETSC_SUCCESS); 875 } 876 877 static PetscErrorCode MatView_MPIDense(Mat mat, PetscViewer viewer) 878 { 879 PetscBool isascii, isbinary, isdraw, issocket; 880 881 PetscFunctionBegin; 882 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii)); 883 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 884 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket)); 885 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 886 887 if (isascii || issocket || isdraw) PetscCall(MatView_MPIDense_ASCIIorDraworSocket(mat, viewer)); 888 else if (isbinary) PetscCall(MatView_Dense_Binary(mat, viewer)); 889 PetscFunctionReturn(PETSC_SUCCESS); 890 } 891 892 static PetscErrorCode MatGetInfo_MPIDense(Mat A, MatInfoType flag, MatInfo *info) 893 { 894 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 895 Mat mdn = mat->A; 896 PetscLogDouble isend[5], irecv[5]; 897 898 PetscFunctionBegin; 899 info->block_size = 1.0; 900 901 PetscCall(MatGetInfo(mdn, MAT_LOCAL, info)); 902 903 isend[0] = info->nz_used; 904 isend[1] = info->nz_allocated; 905 isend[2] = info->nz_unneeded; 906 isend[3] = info->memory; 907 isend[4] = info->mallocs; 908 if (flag == MAT_LOCAL) { 909 info->nz_used = isend[0]; 910 info->nz_allocated = isend[1]; 911 info->nz_unneeded = isend[2]; 912 info->memory = isend[3]; 913 info->mallocs = isend[4]; 914 } else if (flag == MAT_GLOBAL_MAX) { 915 PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)A))); 916 917 info->nz_used = irecv[0]; 918 info->nz_allocated = irecv[1]; 919 info->nz_unneeded = irecv[2]; 920 info->memory = irecv[3]; 921 info->mallocs = irecv[4]; 922 } else if (flag == MAT_GLOBAL_SUM) { 923 PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)A))); 924 925 info->nz_used = irecv[0]; 926 info->nz_allocated = irecv[1]; 927 info->nz_unneeded = irecv[2]; 928 info->memory = irecv[3]; 929 info->mallocs = irecv[4]; 930 } 931 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 932 info->fill_ratio_needed = 0; 933 info->factor_mallocs = 0; 934 PetscFunctionReturn(PETSC_SUCCESS); 935 } 936 937 static PetscErrorCode MatSetOption_MPIDense(Mat A, MatOption op, PetscBool flg) 938 { 939 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 940 941 PetscFunctionBegin; 942 switch (op) { 943 case MAT_NEW_NONZERO_LOCATIONS: 944 case MAT_NEW_NONZERO_LOCATION_ERR: 945 case MAT_NEW_NONZERO_ALLOCATION_ERR: 946 MatCheckPreallocated(A, 1); 947 PetscCall(MatSetOption(a->A, op, flg)); 948 break; 949 case MAT_ROW_ORIENTED: 950 MatCheckPreallocated(A, 1); 951 a->roworiented = flg; 952 PetscCall(MatSetOption(a->A, op, flg)); 953 break; 954 case MAT_IGNORE_OFF_PROC_ENTRIES: 955 a->donotstash = flg; 956 break; 957 case MAT_SYMMETRIC: 958 case MAT_STRUCTURALLY_SYMMETRIC: 959 case MAT_HERMITIAN: 960 case MAT_SYMMETRY_ETERNAL: 961 case MAT_STRUCTURAL_SYMMETRY_ETERNAL: 962 case MAT_SPD: 963 case MAT_SPD_ETERNAL: 964 /* if the diagonal matrix is square it inherits some of the properties above */ 965 if (a->A && A->rmap->n == A->cmap->n) PetscCall(MatSetOption(a->A, op, flg)); 966 break; 967 default: 968 break; 969 } 970 PetscFunctionReturn(PETSC_SUCCESS); 971 } 972 973 static PetscErrorCode MatDiagonalScale_MPIDense(Mat A, Vec ll, Vec rr) 974 { 975 Mat_MPIDense *mdn = (Mat_MPIDense *)A->data; 976 const PetscScalar *l; 977 PetscScalar x, *v, *vv, *r; 978 PetscInt i, j, s2a, s3a, s2, s3, m = mdn->A->rmap->n, n = mdn->A->cmap->n, lda; 979 980 PetscFunctionBegin; 981 PetscCall(MatDenseGetArray(mdn->A, &vv)); 982 PetscCall(MatDenseGetLDA(mdn->A, &lda)); 983 PetscCall(MatGetLocalSize(A, &s2, &s3)); 984 if (ll) { 985 PetscCall(VecGetLocalSize(ll, &s2a)); 986 PetscCheck(s2a == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Left scaling vector non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT, s2a, s2); 987 PetscCall(VecGetArrayRead(ll, &l)); 988 for (i = 0; i < m; i++) { 989 x = l[i]; 990 v = vv + i; 991 for (j = 0; j < n; j++) { 992 (*v) *= x; 993 v += lda; 994 } 995 } 996 PetscCall(VecRestoreArrayRead(ll, &l)); 997 PetscCall(PetscLogFlops(1.0 * n * m)); 998 } 999 if (rr) { 1000 const PetscScalar *ar; 1001 1002 PetscCall(VecGetLocalSize(rr, &s3a)); 1003 PetscCheck(s3a == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Right scaling vec non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT ".", s3a, s3); 1004 PetscCall(VecGetArrayRead(rr, &ar)); 1005 if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A)); 1006 PetscCall(VecGetArray(mdn->lvec, &r)); 1007 PetscCall(PetscSFBcastBegin(mdn->Mvctx, MPIU_SCALAR, ar, r, MPI_REPLACE)); 1008 PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ar, r, MPI_REPLACE)); 1009 PetscCall(VecRestoreArrayRead(rr, &ar)); 1010 for (i = 0; i < n; i++) { 1011 x = r[i]; 1012 v = vv + i * lda; 1013 for (j = 0; j < m; j++) (*v++) *= x; 1014 } 1015 PetscCall(VecRestoreArray(mdn->lvec, &r)); 1016 PetscCall(PetscLogFlops(1.0 * n * m)); 1017 } 1018 PetscCall(MatDenseRestoreArray(mdn->A, &vv)); 1019 PetscFunctionReturn(PETSC_SUCCESS); 1020 } 1021 1022 static PetscErrorCode MatNorm_MPIDense(Mat A, NormType type, PetscReal *nrm) 1023 { 1024 Mat_MPIDense *mdn = (Mat_MPIDense *)A->data; 1025 PetscInt i, j; 1026 PetscMPIInt size; 1027 PetscReal sum = 0.0; 1028 const PetscScalar *av, *v; 1029 1030 PetscFunctionBegin; 1031 PetscCall(MatDenseGetArrayRead(mdn->A, &av)); 1032 v = av; 1033 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 1034 if (size == 1) { 1035 PetscCall(MatNorm(mdn->A, type, nrm)); 1036 } else { 1037 if (type == NORM_FROBENIUS) { 1038 for (i = 0; i < mdn->A->cmap->n * mdn->A->rmap->n; i++) { 1039 sum += PetscRealPart(PetscConj(*v) * (*v)); 1040 v++; 1041 } 1042 PetscCallMPI(MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A))); 1043 *nrm = PetscSqrtReal(*nrm); 1044 PetscCall(PetscLogFlops(2.0 * mdn->A->cmap->n * mdn->A->rmap->n)); 1045 } else if (type == NORM_1) { 1046 PetscReal *tmp; 1047 1048 PetscCall(PetscCalloc1(A->cmap->N, &tmp)); 1049 *nrm = 0.0; 1050 v = av; 1051 for (j = 0; j < mdn->A->cmap->n; j++) { 1052 for (i = 0; i < mdn->A->rmap->n; i++) { 1053 tmp[j] += PetscAbsScalar(*v); 1054 v++; 1055 } 1056 } 1057 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, tmp, A->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A))); 1058 for (j = 0; j < A->cmap->N; j++) { 1059 if (tmp[j] > *nrm) *nrm = tmp[j]; 1060 } 1061 PetscCall(PetscFree(tmp)); 1062 PetscCall(PetscLogFlops(A->cmap->n * A->rmap->n)); 1063 } else if (type == NORM_INFINITY) { /* max row norm */ 1064 PetscCall(MatNorm(mdn->A, type, nrm)); 1065 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, nrm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A))); 1066 } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "No support for two norm"); 1067 } 1068 PetscCall(MatDenseRestoreArrayRead(mdn->A, &av)); 1069 PetscFunctionReturn(PETSC_SUCCESS); 1070 } 1071 1072 static PetscErrorCode MatTranspose_MPIDense(Mat A, MatReuse reuse, Mat *matout) 1073 { 1074 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1075 Mat B; 1076 PetscInt M = A->rmap->N, N = A->cmap->N, m, n, *rwork, rstart = A->rmap->rstart; 1077 PetscInt j, i, lda; 1078 PetscScalar *v; 1079 1080 PetscFunctionBegin; 1081 if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout)); 1082 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) { 1083 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 1084 PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M)); 1085 PetscCall(MatSetType(B, ((PetscObject)A)->type_name)); 1086 PetscCall(MatMPIDenseSetPreallocation(B, NULL)); 1087 } else B = *matout; 1088 1089 m = a->A->rmap->n; 1090 n = a->A->cmap->n; 1091 PetscCall(MatDenseGetArrayRead(a->A, (const PetscScalar **)&v)); 1092 PetscCall(MatDenseGetLDA(a->A, &lda)); 1093 PetscCall(PetscMalloc1(m, &rwork)); 1094 for (i = 0; i < m; i++) rwork[i] = rstart + i; 1095 for (j = 0; j < n; j++) { 1096 PetscCall(MatSetValues(B, 1, &j, m, rwork, v, INSERT_VALUES)); 1097 v = PetscSafePointerPlusOffset(v, lda); 1098 } 1099 PetscCall(MatDenseRestoreArrayRead(a->A, (const PetscScalar **)&v)); 1100 PetscCall(PetscFree(rwork)); 1101 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 1102 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 1103 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 1104 *matout = B; 1105 } else { 1106 PetscCall(MatHeaderMerge(A, &B)); 1107 } 1108 PetscFunctionReturn(PETSC_SUCCESS); 1109 } 1110 1111 static PetscErrorCode MatDuplicate_MPIDense(Mat, MatDuplicateOption, Mat *); 1112 PETSC_INTERN PetscErrorCode MatScale_MPIDense(Mat, PetscScalar); 1113 1114 static PetscErrorCode MatSetUp_MPIDense(Mat A) 1115 { 1116 PetscFunctionBegin; 1117 PetscCall(PetscLayoutSetUp(A->rmap)); 1118 PetscCall(PetscLayoutSetUp(A->cmap)); 1119 if (!A->preallocated) PetscCall(MatMPIDenseSetPreallocation(A, NULL)); 1120 PetscFunctionReturn(PETSC_SUCCESS); 1121 } 1122 1123 static PetscErrorCode MatAXPY_MPIDense(Mat Y, PetscScalar alpha, Mat X, MatStructure str) 1124 { 1125 Mat_MPIDense *A = (Mat_MPIDense *)Y->data, *B = (Mat_MPIDense *)X->data; 1126 1127 PetscFunctionBegin; 1128 PetscCall(MatAXPY(A->A, alpha, B->A, str)); 1129 PetscFunctionReturn(PETSC_SUCCESS); 1130 } 1131 1132 static PetscErrorCode MatConjugate_MPIDense(Mat mat) 1133 { 1134 Mat_MPIDense *a = (Mat_MPIDense *)mat->data; 1135 1136 PetscFunctionBegin; 1137 PetscCall(MatConjugate(a->A)); 1138 PetscFunctionReturn(PETSC_SUCCESS); 1139 } 1140 1141 static PetscErrorCode MatRealPart_MPIDense(Mat A) 1142 { 1143 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1144 1145 PetscFunctionBegin; 1146 PetscCall(MatRealPart(a->A)); 1147 PetscFunctionReturn(PETSC_SUCCESS); 1148 } 1149 1150 static PetscErrorCode MatImaginaryPart_MPIDense(Mat A) 1151 { 1152 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1153 1154 PetscFunctionBegin; 1155 PetscCall(MatImaginaryPart(a->A)); 1156 PetscFunctionReturn(PETSC_SUCCESS); 1157 } 1158 1159 static PetscErrorCode MatGetColumnVector_MPIDense(Mat A, Vec v, PetscInt col) 1160 { 1161 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1162 1163 PetscFunctionBegin; 1164 PetscCheck(a->A, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Missing local matrix"); 1165 PetscCheck(a->A->ops->getcolumnvector, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Missing get column operation"); 1166 PetscCall((*a->A->ops->getcolumnvector)(a->A, v, col)); 1167 PetscFunctionReturn(PETSC_SUCCESS); 1168 } 1169 1170 PETSC_INTERN PetscErrorCode MatGetColumnReductions_SeqDense(Mat, PetscInt, PetscReal *); 1171 1172 static PetscErrorCode MatGetColumnReductions_MPIDense(Mat A, PetscInt type, PetscReal *reductions) 1173 { 1174 PetscInt i, m, n; 1175 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1176 1177 PetscFunctionBegin; 1178 PetscCall(MatGetSize(A, &m, &n)); 1179 if (type == REDUCTION_MEAN_REALPART) { 1180 PetscCall(MatGetColumnReductions_SeqDense(a->A, (PetscInt)REDUCTION_SUM_REALPART, reductions)); 1181 } else if (type == REDUCTION_MEAN_IMAGINARYPART) { 1182 PetscCall(MatGetColumnReductions_SeqDense(a->A, (PetscInt)REDUCTION_SUM_IMAGINARYPART, reductions)); 1183 } else { 1184 PetscCall(MatGetColumnReductions_SeqDense(a->A, type, reductions)); 1185 } 1186 if (type == NORM_2) { 1187 for (i = 0; i < n; i++) reductions[i] *= reductions[i]; 1188 } 1189 if (type == NORM_INFINITY) { 1190 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, reductions, n, MPIU_REAL, MPIU_MAX, A->hdr.comm)); 1191 } else { 1192 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, reductions, n, MPIU_REAL, MPIU_SUM, A->hdr.comm)); 1193 } 1194 if (type == NORM_2) { 1195 for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]); 1196 } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 1197 for (i = 0; i < n; i++) reductions[i] /= m; 1198 } 1199 PetscFunctionReturn(PETSC_SUCCESS); 1200 } 1201 1202 static PetscErrorCode MatSetRandom_MPIDense(Mat x, PetscRandom rctx) 1203 { 1204 Mat_MPIDense *d = (Mat_MPIDense *)x->data; 1205 1206 PetscFunctionBegin; 1207 PetscCall(MatSetRandom(d->A, rctx)); 1208 #if defined(PETSC_HAVE_DEVICE) 1209 x->offloadmask = d->A->offloadmask; 1210 #endif 1211 PetscFunctionReturn(PETSC_SUCCESS); 1212 } 1213 1214 static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A, PetscBool *missing, PetscInt *d) 1215 { 1216 PetscFunctionBegin; 1217 *missing = PETSC_FALSE; 1218 PetscFunctionReturn(PETSC_SUCCESS); 1219 } 1220 1221 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat, Mat, PetscReal, Mat); 1222 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat, Mat, Mat); 1223 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat, Mat, PetscReal, Mat); 1224 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat, Mat, Mat); 1225 static PetscErrorCode MatEqual_MPIDense(Mat, Mat, PetscBool *); 1226 static PetscErrorCode MatLoad_MPIDense(Mat, PetscViewer); 1227 static PetscErrorCode MatProductSetFromOptions_MPIDense(Mat); 1228 1229 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense, 1230 MatGetRow_MPIDense, 1231 MatRestoreRow_MPIDense, 1232 MatMult_MPIDense, 1233 /* 4*/ MatMultAdd_MPIDense, 1234 MatMultTranspose_MPIDense, 1235 MatMultTransposeAdd_MPIDense, 1236 NULL, 1237 NULL, 1238 NULL, 1239 /* 10*/ NULL, 1240 NULL, 1241 NULL, 1242 NULL, 1243 MatTranspose_MPIDense, 1244 /* 15*/ MatGetInfo_MPIDense, 1245 MatEqual_MPIDense, 1246 MatGetDiagonal_MPIDense, 1247 MatDiagonalScale_MPIDense, 1248 MatNorm_MPIDense, 1249 /* 20*/ MatAssemblyBegin_MPIDense, 1250 MatAssemblyEnd_MPIDense, 1251 MatSetOption_MPIDense, 1252 MatZeroEntries_MPIDense, 1253 /* 24*/ MatZeroRows_MPIDense, 1254 NULL, 1255 NULL, 1256 NULL, 1257 NULL, 1258 /* 29*/ MatSetUp_MPIDense, 1259 NULL, 1260 NULL, 1261 MatGetDiagonalBlock_MPIDense, 1262 NULL, 1263 /* 34*/ MatDuplicate_MPIDense, 1264 NULL, 1265 NULL, 1266 NULL, 1267 NULL, 1268 /* 39*/ MatAXPY_MPIDense, 1269 MatCreateSubMatrices_MPIDense, 1270 NULL, 1271 MatGetValues_MPIDense, 1272 MatCopy_MPIDense, 1273 /* 44*/ NULL, 1274 MatScale_MPIDense, 1275 MatShift_MPIDense, 1276 NULL, 1277 NULL, 1278 /* 49*/ MatSetRandom_MPIDense, 1279 NULL, 1280 NULL, 1281 NULL, 1282 NULL, 1283 /* 54*/ NULL, 1284 NULL, 1285 NULL, 1286 NULL, 1287 NULL, 1288 /* 59*/ MatCreateSubMatrix_MPIDense, 1289 MatDestroy_MPIDense, 1290 MatView_MPIDense, 1291 NULL, 1292 NULL, 1293 /* 64*/ NULL, 1294 NULL, 1295 NULL, 1296 NULL, 1297 NULL, 1298 /* 69*/ NULL, 1299 NULL, 1300 NULL, 1301 NULL, 1302 NULL, 1303 /* 74*/ NULL, 1304 NULL, 1305 NULL, 1306 NULL, 1307 MatLoad_MPIDense, 1308 /* 79*/ NULL, 1309 NULL, 1310 NULL, 1311 NULL, 1312 /* 83*/ NULL, 1313 NULL, 1314 NULL, 1315 NULL, 1316 MatMatTransposeMultSymbolic_MPIDense_MPIDense, 1317 MatMatTransposeMultNumeric_MPIDense_MPIDense, 1318 /* 89*/ NULL, 1319 MatProductSetFromOptions_MPIDense, 1320 NULL, 1321 NULL, 1322 MatConjugate_MPIDense, 1323 /* 94*/ NULL, 1324 NULL, 1325 MatRealPart_MPIDense, 1326 MatImaginaryPart_MPIDense, 1327 NULL, 1328 /*99*/ NULL, 1329 NULL, 1330 NULL, 1331 NULL, 1332 MatGetColumnVector_MPIDense, 1333 /*104*/ MatMissingDiagonal_MPIDense, 1334 NULL, 1335 NULL, 1336 NULL, 1337 NULL, 1338 /*109*/ NULL, 1339 NULL, 1340 NULL, 1341 MatMultHermitianTranspose_MPIDense, 1342 MatMultHermitianTransposeAdd_MPIDense, 1343 /*114*/ NULL, 1344 NULL, 1345 MatGetColumnReductions_MPIDense, 1346 NULL, 1347 NULL, 1348 NULL, 1349 /*120*/ NULL, 1350 MatTransposeMatMultSymbolic_MPIDense_MPIDense, 1351 MatTransposeMatMultNumeric_MPIDense_MPIDense, 1352 NULL, 1353 /*124*/ NULL, 1354 NULL, 1355 NULL, 1356 NULL, 1357 NULL, 1358 /*129*/ NULL, 1359 NULL, 1360 MatCreateMPIMatConcatenateSeqMat_MPIDense, 1361 NULL, 1362 NULL, 1363 /*134*/ NULL, 1364 NULL, 1365 NULL, 1366 NULL, 1367 NULL, 1368 /*139*/ NULL, 1369 NULL, 1370 NULL, 1371 NULL, 1372 NULL}; 1373 1374 static PetscErrorCode MatMPIDenseSetPreallocation_MPIDense(Mat mat, PetscScalar *data) 1375 { 1376 Mat_MPIDense *a = (Mat_MPIDense *)mat->data; 1377 MatType mtype = MATSEQDENSE; 1378 1379 PetscFunctionBegin; 1380 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1381 PetscCall(PetscLayoutSetUp(mat->rmap)); 1382 PetscCall(PetscLayoutSetUp(mat->cmap)); 1383 if (!a->A) { 1384 PetscCall(MatCreate(PETSC_COMM_SELF, &a->A)); 1385 PetscCall(MatSetSizes(a->A, mat->rmap->n, mat->cmap->N, mat->rmap->n, mat->cmap->N)); 1386 } 1387 #if defined(PETSC_HAVE_CUDA) 1388 PetscBool iscuda; 1389 PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPIDENSECUDA, &iscuda)); 1390 if (iscuda) mtype = MATSEQDENSECUDA; 1391 #endif 1392 #if defined(PETSC_HAVE_HIP) 1393 PetscBool iship; 1394 PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPIDENSEHIP, &iship)); 1395 if (iship) mtype = MATSEQDENSEHIP; 1396 #endif 1397 PetscCall(MatSetType(a->A, mtype)); 1398 PetscCall(MatSeqDenseSetPreallocation(a->A, data)); 1399 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 1400 mat->offloadmask = a->A->offloadmask; 1401 #endif 1402 mat->preallocated = PETSC_TRUE; 1403 mat->assembled = PETSC_TRUE; 1404 PetscFunctionReturn(PETSC_SUCCESS); 1405 } 1406 1407 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIDense(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 1408 { 1409 Mat B, C; 1410 1411 PetscFunctionBegin; 1412 PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &C)); 1413 PetscCall(MatConvert_SeqAIJ_SeqDense(C, MATSEQDENSE, MAT_INITIAL_MATRIX, &B)); 1414 PetscCall(MatDestroy(&C)); 1415 if (reuse == MAT_REUSE_MATRIX) { 1416 C = *newmat; 1417 } else C = NULL; 1418 PetscCall(MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A), B, A->cmap->n, !C ? MAT_INITIAL_MATRIX : MAT_REUSE_MATRIX, &C)); 1419 PetscCall(MatDestroy(&B)); 1420 if (reuse == MAT_INPLACE_MATRIX) PetscCall(MatHeaderReplace(A, &C)); 1421 else if (reuse == MAT_INITIAL_MATRIX) *newmat = C; 1422 PetscFunctionReturn(PETSC_SUCCESS); 1423 } 1424 1425 static PetscErrorCode MatConvert_MPIDense_MPIAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 1426 { 1427 Mat B, C; 1428 1429 PetscFunctionBegin; 1430 PetscCall(MatDenseGetLocalMatrix(A, &C)); 1431 PetscCall(MatConvert_SeqDense_SeqAIJ(C, MATSEQAIJ, MAT_INITIAL_MATRIX, &B)); 1432 if (reuse == MAT_REUSE_MATRIX) { 1433 C = *newmat; 1434 } else C = NULL; 1435 PetscCall(MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A), B, A->cmap->n, !C ? MAT_INITIAL_MATRIX : MAT_REUSE_MATRIX, &C)); 1436 PetscCall(MatDestroy(&B)); 1437 if (reuse == MAT_INPLACE_MATRIX) PetscCall(MatHeaderReplace(A, &C)); 1438 else if (reuse == MAT_INITIAL_MATRIX) *newmat = C; 1439 PetscFunctionReturn(PETSC_SUCCESS); 1440 } 1441 1442 #if defined(PETSC_HAVE_ELEMENTAL) 1443 PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 1444 { 1445 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1446 Mat mat_elemental; 1447 PetscScalar *v; 1448 PetscInt m = A->rmap->n, N = A->cmap->N, rstart = A->rmap->rstart, i, *rows, *cols, lda; 1449 1450 PetscFunctionBegin; 1451 if (reuse == MAT_REUSE_MATRIX) { 1452 mat_elemental = *newmat; 1453 PetscCall(MatZeroEntries(*newmat)); 1454 } else { 1455 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental)); 1456 PetscCall(MatSetSizes(mat_elemental, PETSC_DECIDE, PETSC_DECIDE, A->rmap->N, A->cmap->N)); 1457 PetscCall(MatSetType(mat_elemental, MATELEMENTAL)); 1458 PetscCall(MatSetUp(mat_elemental)); 1459 PetscCall(MatSetOption(mat_elemental, MAT_ROW_ORIENTED, PETSC_FALSE)); 1460 } 1461 1462 PetscCall(PetscMalloc2(m, &rows, N, &cols)); 1463 for (i = 0; i < N; i++) cols[i] = i; 1464 for (i = 0; i < m; i++) rows[i] = rstart + i; 1465 1466 /* PETSc-Elemental interface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */ 1467 PetscCall(MatDenseGetArray(A, &v)); 1468 PetscCall(MatDenseGetLDA(a->A, &lda)); 1469 if (lda == m) PetscCall(MatSetValues(mat_elemental, m, rows, N, cols, v, ADD_VALUES)); 1470 else { 1471 for (i = 0; i < N; i++) PetscCall(MatSetValues(mat_elemental, m, rows, 1, &i, v + lda * i, ADD_VALUES)); 1472 } 1473 PetscCall(MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY)); 1474 PetscCall(MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY)); 1475 PetscCall(MatDenseRestoreArray(A, &v)); 1476 PetscCall(PetscFree2(rows, cols)); 1477 1478 if (reuse == MAT_INPLACE_MATRIX) { 1479 PetscCall(MatHeaderReplace(A, &mat_elemental)); 1480 } else { 1481 *newmat = mat_elemental; 1482 } 1483 PetscFunctionReturn(PETSC_SUCCESS); 1484 } 1485 #endif 1486 1487 static PetscErrorCode MatDenseGetColumn_MPIDense(Mat A, PetscInt col, PetscScalar **vals) 1488 { 1489 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 1490 1491 PetscFunctionBegin; 1492 PetscCall(MatDenseGetColumn(mat->A, col, vals)); 1493 PetscFunctionReturn(PETSC_SUCCESS); 1494 } 1495 1496 static PetscErrorCode MatDenseRestoreColumn_MPIDense(Mat A, PetscScalar **vals) 1497 { 1498 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 1499 1500 PetscFunctionBegin; 1501 PetscCall(MatDenseRestoreColumn(mat->A, vals)); 1502 PetscFunctionReturn(PETSC_SUCCESS); 1503 } 1504 1505 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIDense(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat) 1506 { 1507 Mat_MPIDense *mat; 1508 PetscInt m, nloc, N; 1509 1510 PetscFunctionBegin; 1511 PetscCall(MatGetSize(inmat, &m, &N)); 1512 PetscCall(MatGetLocalSize(inmat, NULL, &nloc)); 1513 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 1514 PetscInt sum; 1515 1516 if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnership(comm, &n, &N)); 1517 /* Check sum(n) = N */ 1518 PetscCallMPI(MPIU_Allreduce(&n, &sum, 1, MPIU_INT, MPI_SUM, comm)); 1519 PetscCheck(sum == N, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT, sum, N); 1520 1521 PetscCall(MatCreateDense(comm, m, n, PETSC_DETERMINE, N, NULL, outmat)); 1522 PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 1523 } 1524 1525 /* numeric phase */ 1526 mat = (Mat_MPIDense *)(*outmat)->data; 1527 PetscCall(MatCopy(inmat, mat->A, SAME_NONZERO_PATTERN)); 1528 PetscFunctionReturn(PETSC_SUCCESS); 1529 } 1530 1531 PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat A, PetscInt col, Vec *v) 1532 { 1533 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1534 PetscInt lda; 1535 1536 PetscFunctionBegin; 1537 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1538 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1539 if (!a->cvec) PetscCall(MatDenseCreateColumnVec_Private(A, &a->cvec)); 1540 a->vecinuse = col + 1; 1541 PetscCall(MatDenseGetLDA(a->A, &lda)); 1542 PetscCall(MatDenseGetArray(a->A, (PetscScalar **)&a->ptrinuse)); 1543 PetscCall(VecPlaceArray(a->cvec, PetscSafePointerPlusOffset(a->ptrinuse, (size_t)col * (size_t)lda))); 1544 *v = a->cvec; 1545 PetscFunctionReturn(PETSC_SUCCESS); 1546 } 1547 1548 PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat A, PetscInt col, Vec *v) 1549 { 1550 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1551 1552 PetscFunctionBegin; 1553 PetscCheck(a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first"); 1554 PetscCheck(a->cvec, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing internal column vector"); 1555 VecCheckAssembled(a->cvec); 1556 a->vecinuse = 0; 1557 PetscCall(MatDenseRestoreArray(a->A, (PetscScalar **)&a->ptrinuse)); 1558 PetscCall(VecResetArray(a->cvec)); 1559 if (v) *v = NULL; 1560 PetscFunctionReturn(PETSC_SUCCESS); 1561 } 1562 1563 PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat A, PetscInt col, Vec *v) 1564 { 1565 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1566 PetscInt lda; 1567 1568 PetscFunctionBegin; 1569 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1570 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1571 if (!a->cvec) PetscCall(MatDenseCreateColumnVec_Private(A, &a->cvec)); 1572 a->vecinuse = col + 1; 1573 PetscCall(MatDenseGetLDA(a->A, &lda)); 1574 PetscCall(MatDenseGetArrayRead(a->A, &a->ptrinuse)); 1575 PetscCall(VecPlaceArray(a->cvec, PetscSafePointerPlusOffset(a->ptrinuse, (size_t)col * (size_t)lda))); 1576 PetscCall(VecLockReadPush(a->cvec)); 1577 *v = a->cvec; 1578 PetscFunctionReturn(PETSC_SUCCESS); 1579 } 1580 1581 PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat A, PetscInt col, Vec *v) 1582 { 1583 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1584 1585 PetscFunctionBegin; 1586 PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first"); 1587 PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector"); 1588 VecCheckAssembled(a->cvec); 1589 a->vecinuse = 0; 1590 PetscCall(MatDenseRestoreArrayRead(a->A, &a->ptrinuse)); 1591 PetscCall(VecLockReadPop(a->cvec)); 1592 PetscCall(VecResetArray(a->cvec)); 1593 if (v) *v = NULL; 1594 PetscFunctionReturn(PETSC_SUCCESS); 1595 } 1596 1597 PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat A, PetscInt col, Vec *v) 1598 { 1599 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1600 PetscInt lda; 1601 1602 PetscFunctionBegin; 1603 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1604 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1605 if (!a->cvec) PetscCall(MatDenseCreateColumnVec_Private(A, &a->cvec)); 1606 a->vecinuse = col + 1; 1607 PetscCall(MatDenseGetLDA(a->A, &lda)); 1608 PetscCall(MatDenseGetArrayWrite(a->A, (PetscScalar **)&a->ptrinuse)); 1609 PetscCall(VecPlaceArray(a->cvec, PetscSafePointerPlusOffset(a->ptrinuse, (size_t)col * (size_t)lda))); 1610 *v = a->cvec; 1611 PetscFunctionReturn(PETSC_SUCCESS); 1612 } 1613 1614 PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat A, PetscInt col, Vec *v) 1615 { 1616 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1617 1618 PetscFunctionBegin; 1619 PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first"); 1620 PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector"); 1621 VecCheckAssembled(a->cvec); 1622 a->vecinuse = 0; 1623 PetscCall(MatDenseRestoreArrayWrite(a->A, (PetscScalar **)&a->ptrinuse)); 1624 PetscCall(VecResetArray(a->cvec)); 1625 if (v) *v = NULL; 1626 PetscFunctionReturn(PETSC_SUCCESS); 1627 } 1628 1629 static PetscErrorCode MatDenseGetSubMatrix_MPIDense(Mat A, PetscInt rbegin, PetscInt rend, PetscInt cbegin, PetscInt cend, Mat *v) 1630 { 1631 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1632 Mat_MPIDense *c; 1633 MPI_Comm comm; 1634 PetscInt prbegin, prend, pcbegin, pcend; 1635 1636 PetscFunctionBegin; 1637 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 1638 PetscCheck(!a->vecinuse, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1639 PetscCheck(!a->matinuse, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1640 prbegin = PetscMax(0, PetscMin(A->rmap->rend, rbegin) - A->rmap->rstart); 1641 prend = PetscMin(A->rmap->n, PetscMax(0, rend - A->rmap->rstart)); 1642 pcbegin = PetscMax(0, PetscMin(A->cmap->rend, cbegin) - A->cmap->rstart); 1643 pcend = PetscMin(A->cmap->n, PetscMax(0, cend - A->cmap->rstart)); 1644 if (!a->cmat) { 1645 PetscCall(MatCreate(comm, &a->cmat)); 1646 PetscCall(MatSetType(a->cmat, ((PetscObject)A)->type_name)); 1647 if (rend - rbegin == A->rmap->N) PetscCall(PetscLayoutReference(A->rmap, &a->cmat->rmap)); 1648 else { 1649 PetscCall(PetscLayoutSetLocalSize(a->cmat->rmap, prend - prbegin)); 1650 PetscCall(PetscLayoutSetSize(a->cmat->rmap, rend - rbegin)); 1651 PetscCall(PetscLayoutSetUp(a->cmat->rmap)); 1652 } 1653 if (cend - cbegin == A->cmap->N) PetscCall(PetscLayoutReference(A->cmap, &a->cmat->cmap)); 1654 else { 1655 PetscCall(PetscLayoutSetLocalSize(a->cmat->cmap, pcend - pcbegin)); 1656 PetscCall(PetscLayoutSetSize(a->cmat->cmap, cend - cbegin)); 1657 PetscCall(PetscLayoutSetUp(a->cmat->cmap)); 1658 } 1659 c = (Mat_MPIDense *)a->cmat->data; 1660 c->sub_rbegin = rbegin; 1661 c->sub_rend = rend; 1662 c->sub_cbegin = cbegin; 1663 c->sub_cend = cend; 1664 } 1665 c = (Mat_MPIDense *)a->cmat->data; 1666 if (c->sub_rbegin != rbegin || c->sub_rend != rend) { 1667 PetscCall(PetscLayoutDestroy(&a->cmat->rmap)); 1668 PetscCall(PetscLayoutCreate(comm, &a->cmat->rmap)); 1669 PetscCall(PetscLayoutSetLocalSize(a->cmat->rmap, prend - prbegin)); 1670 PetscCall(PetscLayoutSetSize(a->cmat->rmap, rend - rbegin)); 1671 PetscCall(PetscLayoutSetUp(a->cmat->rmap)); 1672 c->sub_rbegin = rbegin; 1673 c->sub_rend = rend; 1674 } 1675 if (c->sub_cbegin != cbegin || c->sub_cend != cend) { 1676 // special optimization: check if all columns are owned by rank 0, in which case no communication is necessary 1677 if ((cend - cbegin != a->cmat->cmap->N) || (A->cmap->range[1] != A->cmap->N)) { 1678 PetscCall(PetscLayoutDestroy(&a->cmat->cmap)); 1679 PetscCall(PetscLayoutCreate(comm, &a->cmat->cmap)); 1680 PetscCall(PetscLayoutSetLocalSize(a->cmat->cmap, pcend - pcbegin)); 1681 PetscCall(PetscLayoutSetSize(a->cmat->cmap, cend - cbegin)); 1682 PetscCall(PetscLayoutSetUp(a->cmat->cmap)); 1683 PetscCall(VecDestroy(&c->lvec)); 1684 PetscCall(PetscSFDestroy(&c->Mvctx)); 1685 } 1686 c->sub_cbegin = cbegin; 1687 c->sub_cend = cend; 1688 } 1689 PetscCheck(!c->A, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1690 PetscCall(MatDenseGetSubMatrix(a->A, prbegin, prend, cbegin, cend, &c->A)); 1691 1692 a->cmat->preallocated = PETSC_TRUE; 1693 a->cmat->assembled = PETSC_TRUE; 1694 #if defined(PETSC_HAVE_DEVICE) 1695 a->cmat->offloadmask = c->A->offloadmask; 1696 #endif 1697 a->matinuse = cbegin + 1; 1698 *v = a->cmat; 1699 PetscFunctionReturn(PETSC_SUCCESS); 1700 } 1701 1702 static PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat A, Mat *v) 1703 { 1704 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1705 Mat_MPIDense *c; 1706 1707 PetscFunctionBegin; 1708 PetscCheck(a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetSubMatrix() first"); 1709 PetscCheck(a->cmat, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal matrix"); 1710 PetscCheck(*v == a->cmat, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Not the matrix obtained from MatDenseGetSubMatrix()"); 1711 a->matinuse = 0; 1712 c = (Mat_MPIDense *)a->cmat->data; 1713 PetscCall(MatDenseRestoreSubMatrix(a->A, &c->A)); 1714 if (v) *v = NULL; 1715 #if defined(PETSC_HAVE_DEVICE) 1716 A->offloadmask = a->A->offloadmask; 1717 #endif 1718 PetscFunctionReturn(PETSC_SUCCESS); 1719 } 1720 1721 /*MC 1722 MATMPIDENSE - MATMPIDENSE = "mpidense" - A matrix type to be used for distributed dense matrices. 1723 1724 Options Database Key: 1725 . -mat_type mpidense - sets the matrix type to `MATMPIDENSE` during a call to `MatSetFromOptions()` 1726 1727 Level: beginner 1728 1729 .seealso: [](ch_matrices), `Mat`, `MatCreateDense()`, `MATSEQDENSE`, `MATDENSE` 1730 M*/ 1731 PetscErrorCode MatCreate_MPIDense(Mat mat) 1732 { 1733 Mat_MPIDense *a; 1734 1735 PetscFunctionBegin; 1736 PetscCall(PetscNew(&a)); 1737 mat->data = (void *)a; 1738 mat->ops[0] = MatOps_Values; 1739 1740 mat->insertmode = NOT_SET_VALUES; 1741 1742 /* build cache for off array entries formed */ 1743 a->donotstash = PETSC_FALSE; 1744 1745 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)mat), 1, &mat->stash)); 1746 1747 /* stuff used for matrix vector multiply */ 1748 a->lvec = NULL; 1749 a->Mvctx = NULL; 1750 a->roworiented = PETSC_TRUE; 1751 1752 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetLDA_C", MatDenseGetLDA_MPIDense)); 1753 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseSetLDA_C", MatDenseSetLDA_MPIDense)); 1754 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArray_C", MatDenseGetArray_MPIDense)); 1755 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArray_C", MatDenseRestoreArray_MPIDense)); 1756 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayRead_C", MatDenseGetArrayRead_MPIDense)); 1757 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayRead_C", MatDenseRestoreArrayRead_MPIDense)); 1758 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayWrite_C", MatDenseGetArrayWrite_MPIDense)); 1759 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayWrite_C", MatDenseRestoreArrayWrite_MPIDense)); 1760 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDensePlaceArray_C", MatDensePlaceArray_MPIDense)); 1761 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseResetArray_C", MatDenseResetArray_MPIDense)); 1762 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseReplaceArray_C", MatDenseReplaceArray_MPIDense)); 1763 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", MatDenseGetColumnVec_MPIDense)); 1764 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", MatDenseRestoreColumnVec_MPIDense)); 1765 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", MatDenseGetColumnVecRead_MPIDense)); 1766 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", MatDenseRestoreColumnVecRead_MPIDense)); 1767 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", MatDenseGetColumnVecWrite_MPIDense)); 1768 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", MatDenseRestoreColumnVecWrite_MPIDense)); 1769 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetSubMatrix_C", MatDenseGetSubMatrix_MPIDense)); 1770 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreSubMatrix_C", MatDenseRestoreSubMatrix_MPIDense)); 1771 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", MatConvert_MPIAIJ_MPIDense)); 1772 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpiaij_C", MatConvert_MPIDense_MPIAIJ)); 1773 #if defined(PETSC_HAVE_ELEMENTAL) 1774 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_elemental_C", MatConvert_MPIDense_Elemental)); 1775 #endif 1776 #if defined(PETSC_HAVE_SCALAPACK) 1777 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_scalapack_C", MatConvert_Dense_ScaLAPACK)); 1778 #endif 1779 #if defined(PETSC_HAVE_CUDA) 1780 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensecuda_C", MatConvert_MPIDense_MPIDenseCUDA)); 1781 #endif 1782 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIDenseSetPreallocation_C", MatMPIDenseSetPreallocation_MPIDense)); 1783 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense)); 1784 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaij_C", MatProductSetFromOptions_MPIDense_MPIAIJ)); 1785 #if defined(PETSC_HAVE_CUDA) 1786 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense)); 1787 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijcusparse_C", MatProductSetFromOptions_MPIDense_MPIAIJ)); 1788 #endif 1789 #if defined(PETSC_HAVE_HIP) 1790 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensehip_C", MatConvert_MPIDense_MPIDenseHIP)); 1791 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense)); 1792 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijhipsparse_C", MatProductSetFromOptions_MPIDense_MPIAIJ)); 1793 #endif 1794 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumn_C", MatDenseGetColumn_MPIDense)); 1795 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumn_C", MatDenseRestoreColumn_MPIDense)); 1796 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultColumnRange_C", MatMultColumnRange_MPIDense)); 1797 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultAddColumnRange_C", MatMultAddColumnRange_MPIDense)); 1798 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultHermitianTransposeColumnRange_C", MatMultHermitianTransposeColumnRange_MPIDense)); 1799 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultHermitianTransposeAddColumnRange_C", MatMultHermitianTransposeAddColumnRange_MPIDense)); 1800 PetscCall(PetscObjectChangeTypeName((PetscObject)mat, MATMPIDENSE)); 1801 PetscFunctionReturn(PETSC_SUCCESS); 1802 } 1803 1804 /*MC 1805 MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices. 1806 1807 This matrix type is identical to `MATSEQDENSE` when constructed with a single process communicator, 1808 and `MATMPIDENSE` otherwise. 1809 1810 Options Database Key: 1811 . -mat_type dense - sets the matrix type to `MATDENSE` during a call to `MatSetFromOptions()` 1812 1813 Level: beginner 1814 1815 .seealso: [](ch_matrices), `Mat`, `MATSEQDENSE`, `MATMPIDENSE`, `MATDENSECUDA`, `MATDENSEHIP` 1816 M*/ 1817 1818 /*@ 1819 MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries 1820 1821 Collective 1822 1823 Input Parameters: 1824 + B - the matrix 1825 - data - optional location of matrix data. Set to `NULL` for PETSc 1826 to control all matrix memory allocation. 1827 1828 Level: intermediate 1829 1830 Notes: 1831 The dense format is fully compatible with standard Fortran 1832 storage by columns. 1833 1834 The data input variable is intended primarily for Fortran programmers 1835 who wish to allocate their own matrix memory space. Most users should 1836 set `data` to `NULL`. 1837 1838 .seealso: [](ch_matrices), `Mat`, `MATMPIDENSE`, `MatCreate()`, `MatCreateSeqDense()`, `MatSetValues()` 1839 @*/ 1840 PetscErrorCode MatMPIDenseSetPreallocation(Mat B, PetscScalar *data) 1841 { 1842 PetscFunctionBegin; 1843 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 1844 PetscTryMethod(B, "MatMPIDenseSetPreallocation_C", (Mat, PetscScalar *), (B, data)); 1845 PetscFunctionReturn(PETSC_SUCCESS); 1846 } 1847 1848 /*@ 1849 MatDensePlaceArray - Allows one to replace the array in a `MATDENSE` matrix with an 1850 array provided by the user. This is useful to avoid copying an array 1851 into a matrix 1852 1853 Not Collective 1854 1855 Input Parameters: 1856 + mat - the matrix 1857 - array - the array in column major order 1858 1859 Level: developer 1860 1861 Note: 1862 Adding `const` to `array` was an oversight, see notes in `VecPlaceArray()`. 1863 1864 You can return to the original array with a call to `MatDenseResetArray()`. The user is responsible for freeing this array; it will not be 1865 freed when the matrix is destroyed. 1866 1867 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseGetArray()`, `MatDenseResetArray()`, `VecPlaceArray()`, `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`, `VecResetArray()`, 1868 `MatDenseReplaceArray()` 1869 @*/ 1870 PetscErrorCode MatDensePlaceArray(Mat mat, const PetscScalar *array) 1871 { 1872 PetscFunctionBegin; 1873 PetscValidHeaderSpecific(mat, MAT_CLASSID, 1); 1874 PetscUseMethod(mat, "MatDensePlaceArray_C", (Mat, const PetscScalar *), (mat, array)); 1875 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 1876 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 1877 mat->offloadmask = PETSC_OFFLOAD_CPU; 1878 #endif 1879 PetscFunctionReturn(PETSC_SUCCESS); 1880 } 1881 1882 /*@ 1883 MatDenseResetArray - Resets the matrix array to that it previously had before the call to `MatDensePlaceArray()` 1884 1885 Not Collective 1886 1887 Input Parameter: 1888 . mat - the matrix 1889 1890 Level: developer 1891 1892 Note: 1893 You can only call this after a call to `MatDensePlaceArray()` 1894 1895 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseGetArray()`, `MatDensePlaceArray()`, `VecPlaceArray()`, `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`, `VecResetArray()` 1896 @*/ 1897 PetscErrorCode MatDenseResetArray(Mat mat) 1898 { 1899 PetscFunctionBegin; 1900 PetscValidHeaderSpecific(mat, MAT_CLASSID, 1); 1901 PetscUseMethod(mat, "MatDenseResetArray_C", (Mat), (mat)); 1902 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 1903 PetscFunctionReturn(PETSC_SUCCESS); 1904 } 1905 1906 /*@ 1907 MatDenseReplaceArray - Allows one to replace the array in a dense matrix with an 1908 array provided by the user. This is useful to avoid copying an array 1909 into a matrix 1910 1911 Not Collective 1912 1913 Input Parameters: 1914 + mat - the matrix 1915 - array - the array in column major order 1916 1917 Level: developer 1918 1919 Note: 1920 Adding `const` to `array` was an oversight, see notes in `VecPlaceArray()`. 1921 1922 The memory passed in MUST be obtained with `PetscMalloc()` and CANNOT be 1923 freed by the user. It will be freed when the matrix is destroyed. 1924 1925 .seealso: [](ch_matrices), `Mat`, `MatDensePlaceArray()`, `MatDenseGetArray()`, `VecReplaceArray()` 1926 @*/ 1927 PetscErrorCode MatDenseReplaceArray(Mat mat, const PetscScalar *array) 1928 { 1929 PetscFunctionBegin; 1930 PetscValidHeaderSpecific(mat, MAT_CLASSID, 1); 1931 PetscUseMethod(mat, "MatDenseReplaceArray_C", (Mat, const PetscScalar *), (mat, array)); 1932 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 1933 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 1934 mat->offloadmask = PETSC_OFFLOAD_CPU; 1935 #endif 1936 PetscFunctionReturn(PETSC_SUCCESS); 1937 } 1938 1939 /*@ 1940 MatCreateDense - Creates a matrix in `MATDENSE` format. 1941 1942 Collective 1943 1944 Input Parameters: 1945 + comm - MPI communicator 1946 . m - number of local rows (or `PETSC_DECIDE` to have calculated if `M` is given) 1947 . n - number of local columns (or `PETSC_DECIDE` to have calculated if `N` is given) 1948 . M - number of global rows (or `PETSC_DECIDE` to have calculated if `m` is given) 1949 . N - number of global columns (or `PETSC_DECIDE` to have calculated if `n` is given) 1950 - data - optional location of matrix data. Set data to `NULL` (`PETSC_NULL_SCALAR_ARRAY` for Fortran users) for PETSc 1951 to control all matrix memory allocation. 1952 1953 Output Parameter: 1954 . A - the matrix 1955 1956 Level: intermediate 1957 1958 Notes: 1959 The dense format is fully compatible with standard Fortran 1960 storage by columns. 1961 1962 Although local portions of the matrix are stored in column-major 1963 order, the matrix is partitioned across MPI ranks by row. 1964 1965 The data input variable is intended primarily for Fortran programmers 1966 who wish to allocate their own matrix memory space. Most users should 1967 set `data` to `NULL` (`PETSC_NULL_SCALAR_ARRAY` for Fortran users). 1968 1969 The user MUST specify either the local or global matrix dimensions 1970 (possibly both). 1971 1972 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatCreate()`, `MatCreateSeqDense()`, `MatSetValues()` 1973 @*/ 1974 PetscErrorCode MatCreateDense(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscScalar data[], Mat *A) 1975 { 1976 PetscFunctionBegin; 1977 PetscCall(MatCreate(comm, A)); 1978 PetscCall(MatSetSizes(*A, m, n, M, N)); 1979 PetscCall(MatSetType(*A, MATDENSE)); 1980 PetscCall(MatSeqDenseSetPreallocation(*A, data)); 1981 PetscCall(MatMPIDenseSetPreallocation(*A, data)); 1982 PetscFunctionReturn(PETSC_SUCCESS); 1983 } 1984 1985 static PetscErrorCode MatDuplicate_MPIDense(Mat A, MatDuplicateOption cpvalues, Mat *newmat) 1986 { 1987 Mat mat; 1988 Mat_MPIDense *a, *oldmat = (Mat_MPIDense *)A->data; 1989 1990 PetscFunctionBegin; 1991 *newmat = NULL; 1992 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat)); 1993 PetscCall(MatSetSizes(mat, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 1994 PetscCall(MatSetType(mat, ((PetscObject)A)->type_name)); 1995 a = (Mat_MPIDense *)mat->data; 1996 1997 mat->factortype = A->factortype; 1998 mat->assembled = PETSC_TRUE; 1999 mat->preallocated = PETSC_TRUE; 2000 2001 mat->insertmode = NOT_SET_VALUES; 2002 a->donotstash = oldmat->donotstash; 2003 2004 PetscCall(PetscLayoutReference(A->rmap, &mat->rmap)); 2005 PetscCall(PetscLayoutReference(A->cmap, &mat->cmap)); 2006 2007 PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A)); 2008 2009 *newmat = mat; 2010 PetscFunctionReturn(PETSC_SUCCESS); 2011 } 2012 2013 static PetscErrorCode MatLoad_MPIDense(Mat newMat, PetscViewer viewer) 2014 { 2015 PetscBool isbinary; 2016 #if defined(PETSC_HAVE_HDF5) 2017 PetscBool ishdf5; 2018 #endif 2019 2020 PetscFunctionBegin; 2021 PetscValidHeaderSpecific(newMat, MAT_CLASSID, 1); 2022 PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2); 2023 /* force binary viewer to load .info file if it has not yet done so */ 2024 PetscCall(PetscViewerSetUp(viewer)); 2025 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 2026 #if defined(PETSC_HAVE_HDF5) 2027 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5)); 2028 #endif 2029 if (isbinary) { 2030 PetscCall(MatLoad_Dense_Binary(newMat, viewer)); 2031 #if defined(PETSC_HAVE_HDF5) 2032 } else if (ishdf5) { 2033 PetscCall(MatLoad_Dense_HDF5(newMat, viewer)); 2034 #endif 2035 } else SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name); 2036 PetscFunctionReturn(PETSC_SUCCESS); 2037 } 2038 2039 static PetscErrorCode MatEqual_MPIDense(Mat A, Mat B, PetscBool *flag) 2040 { 2041 Mat_MPIDense *matB = (Mat_MPIDense *)B->data, *matA = (Mat_MPIDense *)A->data; 2042 Mat a, b; 2043 2044 PetscFunctionBegin; 2045 a = matA->A; 2046 b = matB->A; 2047 PetscCall(MatEqual(a, b, flag)); 2048 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A))); 2049 PetscFunctionReturn(PETSC_SUCCESS); 2050 } 2051 2052 static PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(void *data) 2053 { 2054 Mat_TransMatMultDense *atb = (Mat_TransMatMultDense *)data; 2055 2056 PetscFunctionBegin; 2057 PetscCall(PetscFree2(atb->sendbuf, atb->recvcounts)); 2058 PetscCall(MatDestroy(&atb->atb)); 2059 PetscCall(PetscFree(atb)); 2060 PetscFunctionReturn(PETSC_SUCCESS); 2061 } 2062 2063 static PetscErrorCode MatDestroy_MatMatTransMult_MPIDense_MPIDense(void *data) 2064 { 2065 Mat_MatTransMultDense *abt = (Mat_MatTransMultDense *)data; 2066 2067 PetscFunctionBegin; 2068 PetscCall(PetscFree2(abt->buf[0], abt->buf[1])); 2069 PetscCall(PetscFree2(abt->recvcounts, abt->recvdispls)); 2070 PetscCall(PetscFree(abt)); 2071 PetscFunctionReturn(PETSC_SUCCESS); 2072 } 2073 2074 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C) 2075 { 2076 Mat_MPIDense *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data; 2077 Mat_TransMatMultDense *atb; 2078 MPI_Comm comm; 2079 PetscMPIInt size, *recvcounts; 2080 PetscScalar *carray, *sendbuf; 2081 const PetscScalar *atbarray; 2082 PetscInt i, cN = C->cmap->N, proc, k, j, lda; 2083 const PetscInt *ranges; 2084 2085 PetscFunctionBegin; 2086 MatCheckProduct(C, 3); 2087 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 2088 atb = (Mat_TransMatMultDense *)C->product->data; 2089 recvcounts = atb->recvcounts; 2090 sendbuf = atb->sendbuf; 2091 2092 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 2093 PetscCallMPI(MPI_Comm_size(comm, &size)); 2094 2095 /* compute atbarray = aseq^T * bseq */ 2096 PetscCall(MatTransposeMatMult(a->A, b->A, atb->atb ? MAT_REUSE_MATRIX : MAT_INITIAL_MATRIX, PETSC_DETERMINE, &atb->atb)); 2097 2098 PetscCall(MatGetOwnershipRanges(C, &ranges)); 2099 2100 if (ranges[1] == C->rmap->N) { 2101 /* all of the values are being reduced to rank 0: optimize this case to use MPI_Reduce and GPU aware MPI if available */ 2102 PetscInt atb_lda, c_lda; 2103 Mat atb_local = atb->atb; 2104 Mat atb_alloc = NULL; 2105 Mat c_local = c->A; 2106 Mat c_alloc = NULL; 2107 PetscMemType atb_memtype, c_memtype; 2108 const PetscScalar *atb_array = NULL; 2109 MPI_Datatype vector_type; 2110 PetscScalar *c_array = NULL; 2111 PetscMPIInt rank; 2112 2113 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 2114 2115 PetscCall(MatDenseGetLDA(atb_local, &atb_lda)); 2116 if (atb_lda != C->rmap->N) { 2117 // copy atb to a matrix that will have lda == the number of rows 2118 PetscCall(MatDuplicate(atb_local, MAT_DO_NOT_COPY_VALUES, &atb_alloc)); 2119 PetscCall(MatCopy(atb_local, atb_alloc, DIFFERENT_NONZERO_PATTERN)); 2120 atb_local = atb_alloc; 2121 } 2122 2123 if (rank == 0) { 2124 PetscCall(MatDenseGetLDA(c_local, &c_lda)); 2125 if (c_lda != C->rmap->N) { 2126 // copy c to a matrix that will have lda == the number of rows 2127 PetscCall(MatDuplicate(c_local, MAT_DO_NOT_COPY_VALUES, &c_alloc)); 2128 c_local = c_alloc; 2129 } 2130 PetscCall(MatZeroEntries(c_local)); 2131 } 2132 /* atb_local and c_local have nrows = lda = A->cmap->N and ncols = 2133 * B->cmap->N: use the a->Mvctx to use the best reduction method */ 2134 if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A)); 2135 vector_type = MPIU_SCALAR; 2136 if (B->cmap->N > 1) { 2137 PetscMPIInt mpi_N; 2138 2139 PetscCall(PetscMPIIntCast(B->cmap->N, &mpi_N)); 2140 PetscCallMPI(MPI_Type_contiguous(mpi_N, MPIU_SCALAR, &vector_type)); 2141 PetscCallMPI(MPI_Type_commit(&vector_type)); 2142 } 2143 PetscCall(MatDenseGetArrayReadAndMemType(atb_local, &atb_array, &atb_memtype)); 2144 PetscCall(MatDenseGetArrayWriteAndMemType(c_local, &c_array, &c_memtype)); 2145 PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, vector_type, atb_memtype, atb_array, c_memtype, c_array, MPIU_SUM)); 2146 PetscCall(PetscSFReduceEnd(a->Mvctx, vector_type, atb_array, c_array, MPIU_SUM)); 2147 PetscCall(MatDenseRestoreArrayWriteAndMemType(c_local, &c_array)); 2148 PetscCall(MatDenseRestoreArrayReadAndMemType(atb_local, &atb_array)); 2149 if (rank == 0 && c_local != c->A) PetscCall(MatCopy(c_local, c->A, DIFFERENT_NONZERO_PATTERN)); 2150 if (B->cmap->N > 1) PetscCallMPI(MPI_Type_free(&vector_type)); 2151 PetscCall(MatDestroy(&atb_alloc)); 2152 PetscCall(MatDestroy(&c_alloc)); 2153 PetscCall(MatSetOption(C, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 2154 PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY)); 2155 PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY)); 2156 PetscFunctionReturn(PETSC_SUCCESS); 2157 } 2158 2159 /* arrange atbarray into sendbuf */ 2160 PetscCall(MatDenseGetArrayRead(atb->atb, &atbarray)); 2161 PetscCall(MatDenseGetLDA(atb->atb, &lda)); 2162 for (proc = 0, k = 0; proc < size; proc++) { 2163 for (j = 0; j < cN; j++) { 2164 for (i = ranges[proc]; i < ranges[proc + 1]; i++) sendbuf[k++] = atbarray[i + j * lda]; 2165 } 2166 } 2167 PetscCall(MatDenseRestoreArrayRead(atb->atb, &atbarray)); 2168 2169 /* sum all atbarray to local values of C */ 2170 PetscCall(MatDenseGetArrayWrite(c->A, &carray)); 2171 PetscCallMPI(MPI_Reduce_scatter(sendbuf, carray, recvcounts, MPIU_SCALAR, MPIU_SUM, comm)); 2172 PetscCall(MatDenseRestoreArrayWrite(c->A, &carray)); 2173 PetscCall(MatSetOption(C, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 2174 PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY)); 2175 PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY)); 2176 PetscFunctionReturn(PETSC_SUCCESS); 2177 } 2178 2179 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C) 2180 { 2181 MPI_Comm comm; 2182 PetscMPIInt size; 2183 PetscInt cm = A->cmap->n, cM, cN = B->cmap->N; 2184 Mat_TransMatMultDense *atb; 2185 PetscBool cisdense = PETSC_FALSE; 2186 const PetscInt *ranges; 2187 2188 PetscFunctionBegin; 2189 MatCheckProduct(C, 4); 2190 PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty"); 2191 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 2192 PetscCheck(A->rmap->rstart == B->rmap->rstart && A->rmap->rend == B->rmap->rend, comm, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->rmap->rstart, 2193 A->rmap->rend, B->rmap->rstart, B->rmap->rend); 2194 2195 /* create matrix product C */ 2196 PetscCall(MatSetSizes(C, cm, B->cmap->n, A->cmap->N, B->cmap->N)); 2197 #if defined(PETSC_HAVE_CUDA) 2198 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSECUDA, "")); 2199 #endif 2200 #if defined(PETSC_HAVE_HIP) 2201 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSEHIP, "")); 2202 #endif 2203 if (!cisdense) PetscCall(MatSetType(C, ((PetscObject)A)->type_name)); 2204 PetscCall(MatSetUp(C)); 2205 2206 /* create data structure for reuse C */ 2207 PetscCallMPI(MPI_Comm_size(comm, &size)); 2208 PetscCall(PetscNew(&atb)); 2209 cM = C->rmap->N; 2210 PetscCall(PetscMalloc2(cM * cN, &atb->sendbuf, size, &atb->recvcounts)); 2211 PetscCall(MatGetOwnershipRanges(C, &ranges)); 2212 for (PetscMPIInt i = 0; i < size; i++) PetscCall(PetscMPIIntCast((ranges[i + 1] - ranges[i]) * cN, &atb->recvcounts[i])); 2213 C->product->data = atb; 2214 C->product->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense; 2215 PetscFunctionReturn(PETSC_SUCCESS); 2216 } 2217 2218 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C) 2219 { 2220 MPI_Comm comm; 2221 PetscMPIInt i, size; 2222 PetscInt maxRows, bufsiz; 2223 PetscMPIInt tag; 2224 PetscInt alg; 2225 Mat_MatTransMultDense *abt; 2226 Mat_Product *product = C->product; 2227 PetscBool flg; 2228 2229 PetscFunctionBegin; 2230 MatCheckProduct(C, 4); 2231 PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty"); 2232 /* check local size of A and B */ 2233 PetscCheck(A->cmap->n == B->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local column dimensions are incompatible, A (%" PetscInt_FMT ") != B (%" PetscInt_FMT ")", A->cmap->n, B->cmap->n); 2234 2235 PetscCall(PetscStrcmp(product->alg, "allgatherv", &flg)); 2236 alg = flg ? 0 : 1; 2237 2238 /* setup matrix product C */ 2239 PetscCall(MatSetSizes(C, A->rmap->n, B->rmap->n, A->rmap->N, B->rmap->N)); 2240 PetscCall(MatSetType(C, MATMPIDENSE)); 2241 PetscCall(MatSetUp(C)); 2242 PetscCall(PetscObjectGetNewTag((PetscObject)C, &tag)); 2243 2244 /* create data structure for reuse C */ 2245 PetscCall(PetscObjectGetComm((PetscObject)C, &comm)); 2246 PetscCallMPI(MPI_Comm_size(comm, &size)); 2247 PetscCall(PetscNew(&abt)); 2248 abt->tag = tag; 2249 abt->alg = alg; 2250 switch (alg) { 2251 case 1: /* alg: "cyclic" */ 2252 for (maxRows = 0, i = 0; i < size; i++) maxRows = PetscMax(maxRows, B->rmap->range[i + 1] - B->rmap->range[i]); 2253 bufsiz = A->cmap->N * maxRows; 2254 PetscCall(PetscMalloc2(bufsiz, &abt->buf[0], bufsiz, &abt->buf[1])); 2255 break; 2256 default: /* alg: "allgatherv" */ 2257 PetscCall(PetscMalloc2(B->rmap->n * B->cmap->N, &abt->buf[0], B->rmap->N * B->cmap->N, &abt->buf[1])); 2258 PetscCall(PetscMalloc2(size, &abt->recvcounts, size + 1, &abt->recvdispls)); 2259 for (i = 0; i <= size; i++) PetscCall(PetscMPIIntCast(B->rmap->range[i] * A->cmap->N, &abt->recvdispls[i])); 2260 for (i = 0; i < size; i++) PetscCall(PetscMPIIntCast(abt->recvdispls[i + 1] - abt->recvdispls[i], &abt->recvcounts[i])); 2261 break; 2262 } 2263 2264 C->product->data = abt; 2265 C->product->destroy = MatDestroy_MatMatTransMult_MPIDense_MPIDense; 2266 C->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_MPIDense_MPIDense; 2267 PetscFunctionReturn(PETSC_SUCCESS); 2268 } 2269 2270 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(Mat A, Mat B, Mat C) 2271 { 2272 Mat_MPIDense *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data; 2273 Mat_MatTransMultDense *abt; 2274 MPI_Comm comm; 2275 PetscMPIInt rank, size, sendto, recvfrom, recvisfrom; 2276 PetscScalar *sendbuf, *recvbuf = NULL, *cv; 2277 PetscInt i, cK = A->cmap->N, sendsiz, recvsiz, k, j, bn; 2278 PetscScalar _DOne = 1.0, _DZero = 0.0; 2279 const PetscScalar *av, *bv; 2280 PetscBLASInt cm, cn, ck, alda, blda = 0, clda; 2281 MPI_Request reqs[2]; 2282 const PetscInt *ranges; 2283 2284 PetscFunctionBegin; 2285 MatCheckProduct(C, 3); 2286 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 2287 abt = (Mat_MatTransMultDense *)C->product->data; 2288 PetscCall(PetscObjectGetComm((PetscObject)C, &comm)); 2289 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 2290 PetscCallMPI(MPI_Comm_size(comm, &size)); 2291 PetscCall(MatDenseGetArrayRead(a->A, &av)); 2292 PetscCall(MatDenseGetArrayRead(b->A, &bv)); 2293 PetscCall(MatDenseGetArrayWrite(c->A, &cv)); 2294 PetscCall(MatDenseGetLDA(a->A, &i)); 2295 PetscCall(PetscBLASIntCast(i, &alda)); 2296 PetscCall(MatDenseGetLDA(b->A, &i)); 2297 PetscCall(PetscBLASIntCast(i, &blda)); 2298 PetscCall(MatDenseGetLDA(c->A, &i)); 2299 PetscCall(PetscBLASIntCast(i, &clda)); 2300 PetscCall(MatGetOwnershipRanges(B, &ranges)); 2301 bn = B->rmap->n; 2302 if (blda == bn) { 2303 sendbuf = (PetscScalar *)bv; 2304 } else { 2305 sendbuf = abt->buf[0]; 2306 for (k = 0, i = 0; i < cK; i++) { 2307 for (j = 0; j < bn; j++, k++) sendbuf[k] = bv[i * blda + j]; 2308 } 2309 } 2310 if (size > 1) { 2311 sendto = (rank + size - 1) % size; 2312 recvfrom = (rank + size + 1) % size; 2313 } else { 2314 sendto = recvfrom = 0; 2315 } 2316 PetscCall(PetscBLASIntCast(cK, &ck)); 2317 PetscCall(PetscBLASIntCast(c->A->rmap->n, &cm)); 2318 recvisfrom = rank; 2319 for (i = 0; i < size; i++) { 2320 /* we have finished receiving in sending, bufs can be read/modified */ 2321 PetscMPIInt nextrecvisfrom = (recvisfrom + 1) % size; /* which process the next recvbuf will originate on */ 2322 PetscInt nextbn = ranges[nextrecvisfrom + 1] - ranges[nextrecvisfrom]; 2323 2324 if (nextrecvisfrom != rank) { 2325 /* start the cyclic sends from sendbuf, to recvbuf (which will switch to sendbuf) */ 2326 sendsiz = cK * bn; 2327 recvsiz = cK * nextbn; 2328 recvbuf = (i & 1) ? abt->buf[0] : abt->buf[1]; 2329 PetscCallMPI(MPIU_Isend(sendbuf, sendsiz, MPIU_SCALAR, sendto, abt->tag, comm, &reqs[0])); 2330 PetscCallMPI(MPIU_Irecv(recvbuf, recvsiz, MPIU_SCALAR, recvfrom, abt->tag, comm, &reqs[1])); 2331 } 2332 2333 /* local aseq * sendbuf^T */ 2334 PetscCall(PetscBLASIntCast(ranges[recvisfrom + 1] - ranges[recvisfrom], &cn)); 2335 if (cm && cn && ck) PetscCallBLAS("BLASgemm", BLASgemm_("N", "T", &cm, &cn, &ck, &_DOne, av, &alda, sendbuf, &cn, &_DZero, cv + clda * ranges[recvisfrom], &clda)); 2336 2337 if (nextrecvisfrom != rank) { 2338 /* wait for the sends and receives to complete, swap sendbuf and recvbuf */ 2339 PetscCallMPI(MPI_Waitall(2, reqs, MPI_STATUSES_IGNORE)); 2340 } 2341 bn = nextbn; 2342 recvisfrom = nextrecvisfrom; 2343 sendbuf = recvbuf; 2344 } 2345 PetscCall(MatDenseRestoreArrayRead(a->A, &av)); 2346 PetscCall(MatDenseRestoreArrayRead(b->A, &bv)); 2347 PetscCall(MatDenseRestoreArrayWrite(c->A, &cv)); 2348 PetscCall(MatSetOption(C, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 2349 PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY)); 2350 PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY)); 2351 PetscFunctionReturn(PETSC_SUCCESS); 2352 } 2353 2354 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(Mat A, Mat B, Mat C) 2355 { 2356 Mat_MPIDense *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data; 2357 Mat_MatTransMultDense *abt; 2358 MPI_Comm comm; 2359 PetscMPIInt size, ibn; 2360 PetscScalar *cv, *sendbuf, *recvbuf; 2361 const PetscScalar *av, *bv; 2362 PetscInt blda, i, cK = A->cmap->N, k, j, bn; 2363 PetscScalar _DOne = 1.0, _DZero = 0.0; 2364 PetscBLASInt cm, cn, ck, alda, clda; 2365 2366 PetscFunctionBegin; 2367 MatCheckProduct(C, 3); 2368 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 2369 abt = (Mat_MatTransMultDense *)C->product->data; 2370 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 2371 PetscCallMPI(MPI_Comm_size(comm, &size)); 2372 PetscCall(MatDenseGetArrayRead(a->A, &av)); 2373 PetscCall(MatDenseGetArrayRead(b->A, &bv)); 2374 PetscCall(MatDenseGetArrayWrite(c->A, &cv)); 2375 PetscCall(MatDenseGetLDA(a->A, &i)); 2376 PetscCall(PetscBLASIntCast(i, &alda)); 2377 PetscCall(MatDenseGetLDA(b->A, &blda)); 2378 PetscCall(MatDenseGetLDA(c->A, &i)); 2379 PetscCall(PetscBLASIntCast(i, &clda)); 2380 /* copy transpose of B into buf[0] */ 2381 bn = B->rmap->n; 2382 sendbuf = abt->buf[0]; 2383 recvbuf = abt->buf[1]; 2384 for (k = 0, j = 0; j < bn; j++) { 2385 for (i = 0; i < cK; i++, k++) sendbuf[k] = bv[i * blda + j]; 2386 } 2387 PetscCall(MatDenseRestoreArrayRead(b->A, &bv)); 2388 PetscCall(PetscMPIIntCast(bn * cK, &ibn)); 2389 PetscCallMPI(MPI_Allgatherv(sendbuf, ibn, MPIU_SCALAR, recvbuf, abt->recvcounts, abt->recvdispls, MPIU_SCALAR, comm)); 2390 PetscCall(PetscBLASIntCast(cK, &ck)); 2391 PetscCall(PetscBLASIntCast(c->A->rmap->n, &cm)); 2392 PetscCall(PetscBLASIntCast(c->A->cmap->n, &cn)); 2393 if (cm && cn && ck) PetscCallBLAS("BLASgemm", BLASgemm_("N", "N", &cm, &cn, &ck, &_DOne, av, &alda, recvbuf, &ck, &_DZero, cv, &clda)); 2394 PetscCall(MatDenseRestoreArrayRead(a->A, &av)); 2395 PetscCall(MatDenseRestoreArrayRead(b->A, &bv)); 2396 PetscCall(MatDenseRestoreArrayWrite(c->A, &cv)); 2397 PetscCall(MatSetOption(C, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 2398 PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY)); 2399 PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY)); 2400 PetscFunctionReturn(PETSC_SUCCESS); 2401 } 2402 2403 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C) 2404 { 2405 Mat_MatTransMultDense *abt; 2406 2407 PetscFunctionBegin; 2408 MatCheckProduct(C, 3); 2409 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 2410 abt = (Mat_MatTransMultDense *)C->product->data; 2411 switch (abt->alg) { 2412 case 1: 2413 PetscCall(MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(A, B, C)); 2414 break; 2415 default: 2416 PetscCall(MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(A, B, C)); 2417 break; 2418 } 2419 PetscFunctionReturn(PETSC_SUCCESS); 2420 } 2421 2422 static PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(void *data) 2423 { 2424 Mat_MatMultDense *ab = (Mat_MatMultDense *)data; 2425 2426 PetscFunctionBegin; 2427 PetscCall(MatDestroy(&ab->Ce)); 2428 PetscCall(MatDestroy(&ab->Ae)); 2429 PetscCall(MatDestroy(&ab->Be)); 2430 PetscCall(PetscFree(ab)); 2431 PetscFunctionReturn(PETSC_SUCCESS); 2432 } 2433 2434 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C) 2435 { 2436 Mat_MatMultDense *ab; 2437 Mat_MPIDense *mdn = (Mat_MPIDense *)A->data; 2438 Mat_MPIDense *b = (Mat_MPIDense *)B->data; 2439 2440 PetscFunctionBegin; 2441 MatCheckProduct(C, 3); 2442 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Missing product data"); 2443 ab = (Mat_MatMultDense *)C->product->data; 2444 if (ab->Ae && ab->Ce) { 2445 #if PetscDefined(HAVE_ELEMENTAL) 2446 PetscCall(MatConvert_MPIDense_Elemental(A, MATELEMENTAL, MAT_REUSE_MATRIX, &ab->Ae)); 2447 PetscCall(MatConvert_MPIDense_Elemental(B, MATELEMENTAL, MAT_REUSE_MATRIX, &ab->Be)); 2448 PetscCall(MatMatMultNumeric_Elemental(ab->Ae, ab->Be, ab->Ce)); 2449 PetscCall(MatConvert(ab->Ce, MATMPIDENSE, MAT_REUSE_MATRIX, &C)); 2450 #else 2451 SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "PETSC_HAVE_ELEMENTAL not defined"); 2452 #endif 2453 } else { 2454 MPI_Comm comm; 2455 const PetscScalar *read; 2456 PetscScalar *write; 2457 PetscInt lda; 2458 const PetscInt *ranges; 2459 PetscMPIInt size; 2460 2461 if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A)); /* cannot be done during the symbolic phase because of possible calls to MatProductReplaceMats() */ 2462 comm = PetscObjectComm((PetscObject)B); 2463 PetscCallMPI(MPI_Comm_size(comm, &size)); 2464 PetscCall(PetscLayoutGetRanges(B->rmap, &ranges)); 2465 if (ranges[1] == ranges[size]) { 2466 // optimize for the case where the B matrix is broadcast from rank 0 2467 PetscInt b_lda, be_lda; 2468 Mat b_local = b->A; 2469 Mat b_alloc = NULL; 2470 Mat be_local = ab->Be; 2471 Mat be_alloc = NULL; 2472 PetscMemType b_memtype, be_memtype; 2473 const PetscScalar *b_array = NULL; 2474 MPI_Datatype vector_type; 2475 PetscScalar *be_array = NULL; 2476 PetscMPIInt rank; 2477 2478 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 2479 PetscCall(MatDenseGetLDA(be_local, &be_lda)); 2480 if (be_lda != B->rmap->N) { 2481 PetscCall(MatDuplicate(be_local, MAT_DO_NOT_COPY_VALUES, &be_alloc)); 2482 be_local = be_alloc; 2483 } 2484 2485 if (rank == 0) { 2486 PetscCall(MatDenseGetLDA(b_local, &b_lda)); 2487 if (b_lda != B->rmap->N) { 2488 PetscCall(MatDuplicate(b_local, MAT_DO_NOT_COPY_VALUES, &b_alloc)); 2489 PetscCall(MatCopy(b_local, b_alloc, DIFFERENT_NONZERO_PATTERN)); 2490 b_local = b_alloc; 2491 } 2492 } 2493 vector_type = MPIU_SCALAR; 2494 if (B->cmap->N > 1) { 2495 PetscMPIInt mpi_N; 2496 2497 PetscCall(PetscMPIIntCast(B->cmap->N, &mpi_N)); 2498 PetscCallMPI(MPI_Type_contiguous(mpi_N, MPIU_SCALAR, &vector_type)); 2499 PetscCallMPI(MPI_Type_commit(&vector_type)); 2500 } 2501 PetscCall(MatDenseGetArrayReadAndMemType(b_local, &b_array, &b_memtype)); 2502 PetscCall(MatDenseGetArrayWriteAndMemType(be_local, &be_array, &be_memtype)); 2503 PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, vector_type, b_memtype, b_array, be_memtype, be_array, MPI_REPLACE)); 2504 PetscCall(PetscSFBcastEnd(mdn->Mvctx, vector_type, b_array, be_array, MPI_REPLACE)); 2505 PetscCall(MatDenseRestoreArrayWriteAndMemType(be_local, &be_array)); 2506 PetscCall(MatDenseRestoreArrayReadAndMemType(b_local, &b_array)); 2507 if (be_local != ab->Be) PetscCall(MatCopy(be_local, ab->Be, DIFFERENT_NONZERO_PATTERN)); 2508 if (B->cmap->N > 1) PetscCallMPI(MPI_Type_free(&vector_type)); 2509 PetscCall(MatDestroy(&be_alloc)); 2510 PetscCall(MatDestroy(&b_alloc)); 2511 } else { 2512 PetscCall(MatDenseGetLDA(B, &lda)); 2513 PetscCall(MatDenseGetArrayRead(B, &read)); 2514 PetscCall(MatDenseGetArrayWrite(ab->Be, &write)); 2515 for (PetscInt i = 0; i < C->cmap->N; ++i) { 2516 PetscCall(PetscSFBcastBegin(mdn->Mvctx, MPIU_SCALAR, read + i * lda, write + i * ab->Be->rmap->n, MPI_REPLACE)); 2517 PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, read + i * lda, write + i * ab->Be->rmap->n, MPI_REPLACE)); 2518 } 2519 PetscCall(MatDenseRestoreArrayWrite(ab->Be, &write)); 2520 PetscCall(MatDenseRestoreArrayRead(B, &read)); 2521 } 2522 PetscCall(MatMatMultNumeric_SeqDense_SeqDense(((Mat_MPIDense *)A->data)->A, ab->Be, ((Mat_MPIDense *)C->data)->A)); 2523 } 2524 PetscFunctionReturn(PETSC_SUCCESS); 2525 } 2526 2527 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C) 2528 { 2529 Mat_Product *product = C->product; 2530 PetscInt alg; 2531 Mat_MatMultDense *ab; 2532 PetscBool flg; 2533 2534 PetscFunctionBegin; 2535 MatCheckProduct(C, 4); 2536 PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty"); 2537 /* check local size of A and B */ 2538 PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ", %" PetscInt_FMT ")", 2539 A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend); 2540 2541 PetscCall(PetscStrcmp(product->alg, "petsc", &flg)); 2542 alg = flg ? 0 : 1; 2543 2544 /* setup C */ 2545 PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, A->rmap->N, B->cmap->N)); 2546 PetscCall(MatSetType(C, MATMPIDENSE)); 2547 PetscCall(MatSetUp(C)); 2548 2549 /* create data structure for reuse Cdense */ 2550 PetscCall(PetscNew(&ab)); 2551 2552 switch (alg) { 2553 case 1: /* alg: "elemental" */ 2554 #if PetscDefined(HAVE_ELEMENTAL) 2555 /* create elemental matrices Ae and Be */ 2556 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &ab->Ae)); 2557 PetscCall(MatSetSizes(ab->Ae, PETSC_DECIDE, PETSC_DECIDE, A->rmap->N, A->cmap->N)); 2558 PetscCall(MatSetType(ab->Ae, MATELEMENTAL)); 2559 PetscCall(MatSetUp(ab->Ae)); 2560 PetscCall(MatSetOption(ab->Ae, MAT_ROW_ORIENTED, PETSC_FALSE)); 2561 2562 PetscCall(MatCreate(PetscObjectComm((PetscObject)B), &ab->Be)); 2563 PetscCall(MatSetSizes(ab->Be, PETSC_DECIDE, PETSC_DECIDE, B->rmap->N, B->cmap->N)); 2564 PetscCall(MatSetType(ab->Be, MATELEMENTAL)); 2565 PetscCall(MatSetUp(ab->Be)); 2566 PetscCall(MatSetOption(ab->Be, MAT_ROW_ORIENTED, PETSC_FALSE)); 2567 2568 /* compute symbolic Ce = Ae*Be */ 2569 PetscCall(MatCreate(PetscObjectComm((PetscObject)C), &ab->Ce)); 2570 PetscCall(MatMatMultSymbolic_Elemental(ab->Ae, ab->Be, fill, ab->Ce)); 2571 #else 2572 SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "PETSC_HAVE_ELEMENTAL not defined"); 2573 #endif 2574 break; 2575 default: /* alg: "petsc" */ 2576 ab->Ae = NULL; 2577 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, A->cmap->N, B->cmap->N, NULL, &ab->Be)); 2578 ab->Ce = NULL; 2579 break; 2580 } 2581 2582 C->product->data = ab; 2583 C->product->destroy = MatDestroy_MatMatMult_MPIDense_MPIDense; 2584 C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense; 2585 PetscFunctionReturn(PETSC_SUCCESS); 2586 } 2587 2588 static PetscErrorCode MatProductSetFromOptions_MPIDense_AB(Mat C) 2589 { 2590 Mat_Product *product = C->product; 2591 const char *algTypes[2] = {"petsc", "elemental"}; 2592 PetscInt alg, nalg = PetscDefined(HAVE_ELEMENTAL) ? 2 : 1; 2593 PetscBool flg = PETSC_FALSE; 2594 2595 PetscFunctionBegin; 2596 /* Set default algorithm */ 2597 alg = 0; /* default is PETSc */ 2598 PetscCall(PetscStrcmp(product->alg, "default", &flg)); 2599 if (flg) PetscCall(MatProductSetAlgorithm(C, algTypes[alg])); 2600 2601 /* Get runtime option */ 2602 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_AB", "Mat"); 2603 PetscCall(PetscOptionsEList("-mat_product_algorithm", "Algorithmic approach", "MatProduct_AB", algTypes, nalg, algTypes[alg], &alg, &flg)); 2604 PetscOptionsEnd(); 2605 if (flg) PetscCall(MatProductSetAlgorithm(C, algTypes[alg])); 2606 2607 C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIDense; 2608 C->ops->productsymbolic = MatProductSymbolic_AB; 2609 PetscFunctionReturn(PETSC_SUCCESS); 2610 } 2611 2612 static PetscErrorCode MatProductSetFromOptions_MPIDense_AtB(Mat C) 2613 { 2614 Mat_Product *product = C->product; 2615 Mat A = product->A, B = product->B; 2616 2617 PetscFunctionBegin; 2618 PetscCheck(A->rmap->rstart == B->rmap->rstart && A->rmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", 2619 A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend); 2620 C->ops->transposematmultsymbolic = MatTransposeMatMultSymbolic_MPIDense_MPIDense; 2621 C->ops->productsymbolic = MatProductSymbolic_AtB; 2622 PetscFunctionReturn(PETSC_SUCCESS); 2623 } 2624 2625 static PetscErrorCode MatProductSetFromOptions_MPIDense_ABt(Mat C) 2626 { 2627 Mat_Product *product = C->product; 2628 const char *algTypes[2] = {"allgatherv", "cyclic"}; 2629 PetscInt alg, nalg = 2; 2630 PetscBool flg = PETSC_FALSE; 2631 2632 PetscFunctionBegin; 2633 /* Set default algorithm */ 2634 alg = 0; /* default is allgatherv */ 2635 PetscCall(PetscStrcmp(product->alg, "default", &flg)); 2636 if (flg) PetscCall(MatProductSetAlgorithm(C, algTypes[alg])); 2637 2638 /* Get runtime option */ 2639 if (product->api_user) { 2640 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatMatTransposeMult", "Mat"); 2641 PetscCall(PetscOptionsEList("-matmattransmult_mpidense_mpidense_via", "Algorithmic approach", "MatMatTransposeMult", algTypes, nalg, algTypes[alg], &alg, &flg)); 2642 PetscOptionsEnd(); 2643 } else { 2644 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_ABt", "Mat"); 2645 PetscCall(PetscOptionsEList("-mat_product_algorithm", "Algorithmic approach", "MatProduct_ABt", algTypes, nalg, algTypes[alg], &alg, &flg)); 2646 PetscOptionsEnd(); 2647 } 2648 if (flg) PetscCall(MatProductSetAlgorithm(C, algTypes[alg])); 2649 2650 C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_MPIDense_MPIDense; 2651 C->ops->productsymbolic = MatProductSymbolic_ABt; 2652 PetscFunctionReturn(PETSC_SUCCESS); 2653 } 2654 2655 static PetscErrorCode MatProductSetFromOptions_MPIDense(Mat C) 2656 { 2657 Mat_Product *product = C->product; 2658 2659 PetscFunctionBegin; 2660 switch (product->type) { 2661 case MATPRODUCT_AB: 2662 PetscCall(MatProductSetFromOptions_MPIDense_AB(C)); 2663 break; 2664 case MATPRODUCT_AtB: 2665 PetscCall(MatProductSetFromOptions_MPIDense_AtB(C)); 2666 break; 2667 case MATPRODUCT_ABt: 2668 PetscCall(MatProductSetFromOptions_MPIDense_ABt(C)); 2669 break; 2670 default: 2671 break; 2672 } 2673 PetscFunctionReturn(PETSC_SUCCESS); 2674 } 2675