1 /* 2 Basic functions for basic parallel dense matrices. 3 Portions of this code are under: 4 Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. 5 */ 6 7 #include <../src/mat/impls/dense/mpi/mpidense.h> /*I "petscmat.h" I*/ 8 #include <../src/mat/impls/aij/mpi/mpiaij.h> 9 #include <petscblaslapack.h> 10 #include <petsc/private/vecimpl.h> 11 #include <petscdevice.h> 12 #include <petsc/private/deviceimpl.h> 13 14 /*@ 15 MatDenseGetLocalMatrix - For a `MATMPIDENSE` or `MATSEQDENSE` matrix returns the sequential 16 matrix that represents the operator. For sequential matrices it returns itself. 17 18 Input Parameter: 19 . A - the sequential or MPI `MATDENSE` matrix 20 21 Output Parameter: 22 . B - the inner matrix 23 24 Level: intermediate 25 26 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MATMPIDENSE`, `MATSEQDENSE` 27 @*/ 28 PetscErrorCode MatDenseGetLocalMatrix(Mat A, Mat *B) 29 { 30 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 31 PetscBool flg; 32 33 PetscFunctionBegin; 34 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 35 PetscAssertPointer(B, 2); 36 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIDENSE, &flg)); 37 if (flg) *B = mat->A; 38 else { 39 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQDENSE, &flg)); 40 PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Not for matrix type %s", ((PetscObject)A)->type_name); 41 *B = A; 42 } 43 PetscFunctionReturn(PETSC_SUCCESS); 44 } 45 46 static PetscErrorCode MatCopy_MPIDense(Mat A, Mat B, MatStructure s) 47 { 48 Mat_MPIDense *Amat = (Mat_MPIDense *)A->data; 49 Mat_MPIDense *Bmat = (Mat_MPIDense *)B->data; 50 51 PetscFunctionBegin; 52 PetscCall(MatCopy(Amat->A, Bmat->A, s)); 53 PetscFunctionReturn(PETSC_SUCCESS); 54 } 55 56 PetscErrorCode MatShift_MPIDense(Mat A, PetscScalar alpha) 57 { 58 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 59 PetscInt j, lda, rstart = A->rmap->rstart, rend = A->rmap->rend, rend2; 60 PetscScalar *v; 61 62 PetscFunctionBegin; 63 PetscCall(MatDenseGetArray(mat->A, &v)); 64 PetscCall(MatDenseGetLDA(mat->A, &lda)); 65 rend2 = PetscMin(rend, A->cmap->N); 66 if (rend2 > rstart) { 67 for (j = rstart; j < rend2; j++) v[j - rstart + j * lda] += alpha; 68 PetscCall(PetscLogFlops(rend2 - rstart)); 69 } 70 PetscCall(MatDenseRestoreArray(mat->A, &v)); 71 PetscFunctionReturn(PETSC_SUCCESS); 72 } 73 74 static PetscErrorCode MatGetRow_MPIDense(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 75 { 76 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 77 PetscInt lrow, rstart = A->rmap->rstart, rend = A->rmap->rend; 78 79 PetscFunctionBegin; 80 PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "only local rows"); 81 lrow = row - rstart; 82 PetscCall(MatGetRow(mat->A, lrow, nz, (const PetscInt **)idx, (const PetscScalar **)v)); 83 PetscFunctionReturn(PETSC_SUCCESS); 84 } 85 86 static PetscErrorCode MatRestoreRow_MPIDense(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 87 { 88 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 89 PetscInt lrow, rstart = A->rmap->rstart, rend = A->rmap->rend; 90 91 PetscFunctionBegin; 92 PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "only local rows"); 93 lrow = row - rstart; 94 PetscCall(MatRestoreRow(mat->A, lrow, nz, (const PetscInt **)idx, (const PetscScalar **)v)); 95 PetscFunctionReturn(PETSC_SUCCESS); 96 } 97 98 static PetscErrorCode MatGetDiagonalBlock_MPIDense(Mat A, Mat *a) 99 { 100 Mat_MPIDense *mdn = (Mat_MPIDense *)A->data; 101 PetscInt m = A->rmap->n, rstart = A->rmap->rstart; 102 PetscScalar *array; 103 MPI_Comm comm; 104 PetscBool flg; 105 Mat B; 106 107 PetscFunctionBegin; 108 PetscCall(MatHasCongruentLayouts(A, &flg)); 109 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only square matrices supported."); 110 PetscCall(PetscObjectQuery((PetscObject)A, "DiagonalBlock", (PetscObject *)&B)); 111 if (!B) { /* This should use MatDenseGetSubMatrix (not create), but we would need a call like MatRestoreDiagonalBlock */ 112 #if PetscDefined(HAVE_CUDA) 113 PetscCall(PetscObjectTypeCompare((PetscObject)mdn->A, MATSEQDENSECUDA, &flg)); 114 PetscCheck(!flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature", MATSEQDENSECUDA); 115 #elif PetscDefined(HAVE_HIP) 116 PetscCall(PetscObjectTypeCompare((PetscObject)mdn->A, MATSEQDENSEHIP, &flg)); 117 PetscCheck(!flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature", MATSEQDENSEHIP); 118 #endif 119 PetscCall(PetscObjectGetComm((PetscObject)mdn->A, &comm)); 120 PetscCall(MatCreate(comm, &B)); 121 PetscCall(MatSetSizes(B, m, m, m, m)); 122 PetscCall(MatSetType(B, ((PetscObject)mdn->A)->type_name)); 123 PetscCall(MatDenseGetArrayRead(mdn->A, (const PetscScalar **)&array)); 124 PetscCall(MatSeqDenseSetPreallocation(B, array + m * rstart)); 125 PetscCall(MatDenseRestoreArrayRead(mdn->A, (const PetscScalar **)&array)); 126 PetscCall(PetscObjectCompose((PetscObject)A, "DiagonalBlock", (PetscObject)B)); 127 *a = B; 128 PetscCall(MatDestroy(&B)); 129 } else *a = B; 130 PetscFunctionReturn(PETSC_SUCCESS); 131 } 132 133 static PetscErrorCode MatSetValues_MPIDense(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], const PetscScalar v[], InsertMode addv) 134 { 135 Mat_MPIDense *A = (Mat_MPIDense *)mat->data; 136 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend, row; 137 PetscBool roworiented = A->roworiented; 138 139 PetscFunctionBegin; 140 for (i = 0; i < m; i++) { 141 if (idxm[i] < 0) continue; 142 PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large"); 143 if (idxm[i] >= rstart && idxm[i] < rend) { 144 row = idxm[i] - rstart; 145 if (roworiented) { 146 PetscCall(MatSetValues(A->A, 1, &row, n, idxn, PetscSafePointerPlusOffset(v, i * n), addv)); 147 } else { 148 for (j = 0; j < n; j++) { 149 if (idxn[j] < 0) continue; 150 PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large"); 151 PetscCall(MatSetValues(A->A, 1, &row, 1, &idxn[j], PetscSafePointerPlusOffset(v, i + j * m), addv)); 152 } 153 } 154 } else if (!A->donotstash) { 155 mat->assembled = PETSC_FALSE; 156 if (roworiented) { 157 PetscCall(MatStashValuesRow_Private(&mat->stash, idxm[i], n, idxn, PetscSafePointerPlusOffset(v, i * n), PETSC_FALSE)); 158 } else { 159 PetscCall(MatStashValuesCol_Private(&mat->stash, idxm[i], n, idxn, PetscSafePointerPlusOffset(v, i), m, PETSC_FALSE)); 160 } 161 } 162 } 163 PetscFunctionReturn(PETSC_SUCCESS); 164 } 165 166 static PetscErrorCode MatGetValues_MPIDense(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[]) 167 { 168 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 169 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend, row; 170 171 PetscFunctionBegin; 172 for (i = 0; i < m; i++) { 173 if (idxm[i] < 0) continue; /* negative row */ 174 PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large"); 175 if (idxm[i] >= rstart && idxm[i] < rend) { 176 row = idxm[i] - rstart; 177 for (j = 0; j < n; j++) { 178 if (idxn[j] < 0) continue; /* negative column */ 179 PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large"); 180 PetscCall(MatGetValues(mdn->A, 1, &row, 1, &idxn[j], v + i * n + j)); 181 } 182 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported"); 183 } 184 PetscFunctionReturn(PETSC_SUCCESS); 185 } 186 187 static PetscErrorCode MatDenseGetLDA_MPIDense(Mat A, PetscInt *lda) 188 { 189 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 190 191 PetscFunctionBegin; 192 PetscCall(MatDenseGetLDA(a->A, lda)); 193 PetscFunctionReturn(PETSC_SUCCESS); 194 } 195 196 static PetscErrorCode MatDenseSetLDA_MPIDense(Mat A, PetscInt lda) 197 { 198 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 199 MatType mtype = MATSEQDENSE; 200 201 PetscFunctionBegin; 202 if (!a->A) { 203 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 204 PetscCall(PetscLayoutSetUp(A->rmap)); 205 PetscCall(PetscLayoutSetUp(A->cmap)); 206 PetscCall(MatCreate(PETSC_COMM_SELF, &a->A)); 207 PetscCall(MatSetSizes(a->A, A->rmap->n, A->cmap->N, A->rmap->n, A->cmap->N)); 208 #if PetscDefined(HAVE_CUDA) 209 PetscBool iscuda; 210 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIDENSECUDA, &iscuda)); 211 if (iscuda) mtype = MATSEQDENSECUDA; 212 #elif PetscDefined(HAVE_HIP) 213 PetscBool iship; 214 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIDENSEHIP, &iship)); 215 if (iship) mtype = MATSEQDENSEHIP; 216 #endif 217 PetscCall(MatSetType(a->A, mtype)); 218 } 219 PetscCall(MatDenseSetLDA(a->A, lda)); 220 PetscFunctionReturn(PETSC_SUCCESS); 221 } 222 223 static PetscErrorCode MatDenseGetArray_MPIDense(Mat A, PetscScalar **array) 224 { 225 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 226 227 PetscFunctionBegin; 228 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 229 PetscCall(MatDenseGetArray(a->A, array)); 230 PetscFunctionReturn(PETSC_SUCCESS); 231 } 232 233 static PetscErrorCode MatDenseGetArrayRead_MPIDense(Mat A, PetscScalar **array) 234 { 235 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 236 237 PetscFunctionBegin; 238 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 239 PetscCall(MatDenseGetArrayRead(a->A, (const PetscScalar **)array)); 240 PetscFunctionReturn(PETSC_SUCCESS); 241 } 242 243 static PetscErrorCode MatDenseGetArrayWrite_MPIDense(Mat A, PetscScalar **array) 244 { 245 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 246 247 PetscFunctionBegin; 248 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 249 PetscCall(MatDenseGetArrayWrite(a->A, array)); 250 PetscFunctionReturn(PETSC_SUCCESS); 251 } 252 253 static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A, const PetscScalar *array) 254 { 255 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 256 257 PetscFunctionBegin; 258 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 259 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 260 PetscCall(MatDensePlaceArray(a->A, array)); 261 PetscFunctionReturn(PETSC_SUCCESS); 262 } 263 264 static PetscErrorCode MatDenseResetArray_MPIDense(Mat A) 265 { 266 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 267 268 PetscFunctionBegin; 269 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 270 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 271 PetscCall(MatDenseResetArray(a->A)); 272 PetscFunctionReturn(PETSC_SUCCESS); 273 } 274 275 static PetscErrorCode MatDenseReplaceArray_MPIDense(Mat A, const PetscScalar *array) 276 { 277 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 278 279 PetscFunctionBegin; 280 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 281 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 282 PetscCall(MatDenseReplaceArray(a->A, array)); 283 PetscFunctionReturn(PETSC_SUCCESS); 284 } 285 286 static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A, IS isrow, IS iscol, MatReuse scall, Mat *B) 287 { 288 Mat_MPIDense *mat = (Mat_MPIDense *)A->data, *newmatd; 289 PetscInt lda, i, j, rstart, rend, nrows, ncols, Ncols, nlrows, nlcols; 290 const PetscInt *irow, *icol; 291 const PetscScalar *v; 292 PetscScalar *bv; 293 Mat newmat; 294 IS iscol_local; 295 MPI_Comm comm_is, comm_mat; 296 297 PetscFunctionBegin; 298 PetscCall(PetscObjectGetComm((PetscObject)A, &comm_mat)); 299 PetscCall(PetscObjectGetComm((PetscObject)iscol, &comm_is)); 300 PetscCheck(comm_mat == comm_is, PETSC_COMM_SELF, PETSC_ERR_ARG_NOTSAMECOMM, "IS communicator must match matrix communicator"); 301 302 PetscCall(ISAllGather(iscol, &iscol_local)); 303 PetscCall(ISGetIndices(isrow, &irow)); 304 PetscCall(ISGetIndices(iscol_local, &icol)); 305 PetscCall(ISGetLocalSize(isrow, &nrows)); 306 PetscCall(ISGetLocalSize(iscol, &ncols)); 307 PetscCall(ISGetSize(iscol, &Ncols)); /* global number of columns, size of iscol_local */ 308 309 /* No parallel redistribution currently supported! Should really check each index set 310 to confirm that it is OK. ... Currently supports only submatrix same partitioning as 311 original matrix! */ 312 313 PetscCall(MatGetLocalSize(A, &nlrows, &nlcols)); 314 PetscCall(MatGetOwnershipRange(A, &rstart, &rend)); 315 316 /* Check submatrix call */ 317 if (scall == MAT_REUSE_MATRIX) { 318 /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */ 319 /* Really need to test rows and column sizes! */ 320 newmat = *B; 321 } else { 322 /* Create and fill new matrix */ 323 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &newmat)); 324 PetscCall(MatSetSizes(newmat, nrows, ncols, PETSC_DECIDE, Ncols)); 325 PetscCall(MatSetType(newmat, ((PetscObject)A)->type_name)); 326 PetscCall(MatMPIDenseSetPreallocation(newmat, NULL)); 327 } 328 329 /* Now extract the data pointers and do the copy, column at a time */ 330 newmatd = (Mat_MPIDense *)newmat->data; 331 PetscCall(MatDenseGetArray(newmatd->A, &bv)); 332 PetscCall(MatDenseGetArrayRead(mat->A, &v)); 333 PetscCall(MatDenseGetLDA(mat->A, &lda)); 334 for (i = 0; i < Ncols; i++) { 335 const PetscScalar *av = v + lda * icol[i]; 336 for (j = 0; j < nrows; j++) *bv++ = av[irow[j] - rstart]; 337 } 338 PetscCall(MatDenseRestoreArrayRead(mat->A, &v)); 339 PetscCall(MatDenseRestoreArray(newmatd->A, &bv)); 340 341 /* Assemble the matrices so that the correct flags are set */ 342 PetscCall(MatAssemblyBegin(newmat, MAT_FINAL_ASSEMBLY)); 343 PetscCall(MatAssemblyEnd(newmat, MAT_FINAL_ASSEMBLY)); 344 345 /* Free work space */ 346 PetscCall(ISRestoreIndices(isrow, &irow)); 347 PetscCall(ISRestoreIndices(iscol_local, &icol)); 348 PetscCall(ISDestroy(&iscol_local)); 349 *B = newmat; 350 PetscFunctionReturn(PETSC_SUCCESS); 351 } 352 353 static PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A, PetscScalar **array) 354 { 355 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 356 357 PetscFunctionBegin; 358 PetscCall(MatDenseRestoreArray(a->A, array)); 359 PetscFunctionReturn(PETSC_SUCCESS); 360 } 361 362 static PetscErrorCode MatDenseRestoreArrayRead_MPIDense(Mat A, PetscScalar **array) 363 { 364 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 365 366 PetscFunctionBegin; 367 PetscCall(MatDenseRestoreArrayRead(a->A, (const PetscScalar **)array)); 368 PetscFunctionReturn(PETSC_SUCCESS); 369 } 370 371 static PetscErrorCode MatDenseRestoreArrayWrite_MPIDense(Mat A, PetscScalar **array) 372 { 373 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 374 375 PetscFunctionBegin; 376 PetscCall(MatDenseRestoreArrayWrite(a->A, array)); 377 PetscFunctionReturn(PETSC_SUCCESS); 378 } 379 380 static PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat, MatAssemblyType mode) 381 { 382 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 383 PetscInt nstash, reallocs; 384 385 PetscFunctionBegin; 386 if (mdn->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS); 387 388 PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range)); 389 PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs)); 390 PetscCall(PetscInfo(mdn->A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 391 PetscFunctionReturn(PETSC_SUCCESS); 392 } 393 394 static PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat, MatAssemblyType mode) 395 { 396 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 397 PetscInt i, *row, *col, flg, j, rstart, ncols; 398 PetscMPIInt n; 399 PetscScalar *val; 400 401 PetscFunctionBegin; 402 if (!mdn->donotstash && !mat->nooffprocentries) { 403 /* wait on receives */ 404 while (1) { 405 PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg)); 406 if (!flg) break; 407 408 for (i = 0; i < n;) { 409 /* Now identify the consecutive vals belonging to the same row */ 410 for (j = i, rstart = row[j]; j < n; j++) { 411 if (row[j] != rstart) break; 412 } 413 if (j < n) ncols = j - i; 414 else ncols = n - i; 415 /* Now assemble all these values with a single function call */ 416 PetscCall(MatSetValues_MPIDense(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode)); 417 i = j; 418 } 419 } 420 PetscCall(MatStashScatterEnd_Private(&mat->stash)); 421 } 422 423 PetscCall(MatAssemblyBegin(mdn->A, mode)); 424 PetscCall(MatAssemblyEnd(mdn->A, mode)); 425 PetscFunctionReturn(PETSC_SUCCESS); 426 } 427 428 static PetscErrorCode MatZeroEntries_MPIDense(Mat A) 429 { 430 Mat_MPIDense *l = (Mat_MPIDense *)A->data; 431 432 PetscFunctionBegin; 433 PetscCall(MatZeroEntries(l->A)); 434 PetscFunctionReturn(PETSC_SUCCESS); 435 } 436 437 static PetscErrorCode MatZeroRows_MPIDense(Mat A, PetscInt n, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 438 { 439 Mat_MPIDense *l = (Mat_MPIDense *)A->data; 440 PetscInt i, len, *lrows; 441 442 PetscFunctionBegin; 443 /* get locally owned rows */ 444 PetscCall(PetscLayoutMapLocal(A->rmap, n, rows, &len, &lrows, NULL)); 445 /* fix right-hand side if needed */ 446 if (x && b) { 447 const PetscScalar *xx; 448 PetscScalar *bb; 449 450 PetscCall(VecGetArrayRead(x, &xx)); 451 PetscCall(VecGetArrayWrite(b, &bb)); 452 for (i = 0; i < len; ++i) bb[lrows[i]] = diag * xx[lrows[i]]; 453 PetscCall(VecRestoreArrayRead(x, &xx)); 454 PetscCall(VecRestoreArrayWrite(b, &bb)); 455 } 456 PetscCall(MatZeroRows(l->A, len, lrows, 0.0, NULL, NULL)); 457 if (diag != 0.0) { 458 Vec d; 459 460 PetscCall(MatCreateVecs(A, NULL, &d)); 461 PetscCall(VecSet(d, diag)); 462 PetscCall(MatDiagonalSet(A, d, INSERT_VALUES)); 463 PetscCall(VecDestroy(&d)); 464 } 465 PetscCall(PetscFree(lrows)); 466 PetscFunctionReturn(PETSC_SUCCESS); 467 } 468 469 PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat, Vec, Vec); 470 PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat, Vec, Vec, Vec); 471 PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat, Vec, Vec); 472 PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat, Vec, Vec, Vec); 473 474 static PetscErrorCode MatMultColumnRange_MPIDense(Mat mat, Vec xx, Vec yy, PetscInt c_start, PetscInt c_end) 475 { 476 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 477 const PetscScalar *ax; 478 PetscScalar *ay; 479 PetscMemType axmtype, aymtype; 480 481 PetscFunctionBegin; 482 if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat)); 483 PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype)); 484 PetscCall(VecGetArrayWriteAndMemType(mdn->lvec, &ay, &aymtype)); 485 PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE)); 486 PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE)); 487 PetscCall(VecRestoreArrayWriteAndMemType(mdn->lvec, &ay)); 488 PetscCall(VecRestoreArrayReadAndMemType(xx, &ax)); 489 PetscUseMethod(mdn->A, "MatMultColumnRange_C", (Mat, Vec, Vec, PetscInt, PetscInt), (mdn->A, mdn->lvec, yy, c_start, c_end)); 490 PetscFunctionReturn(PETSC_SUCCESS); 491 } 492 493 static PetscErrorCode MatMult_MPIDense(Mat mat, Vec xx, Vec yy) 494 { 495 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 496 const PetscScalar *ax; 497 PetscScalar *ay; 498 PetscMemType axmtype, aymtype; 499 500 PetscFunctionBegin; 501 if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat)); 502 PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype)); 503 PetscCall(VecGetArrayWriteAndMemType(mdn->lvec, &ay, &aymtype)); 504 PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE)); 505 PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE)); 506 PetscCall(VecRestoreArrayWriteAndMemType(mdn->lvec, &ay)); 507 PetscCall(VecRestoreArrayReadAndMemType(xx, &ax)); 508 PetscCall((*mdn->A->ops->mult)(mdn->A, mdn->lvec, yy)); 509 PetscFunctionReturn(PETSC_SUCCESS); 510 } 511 512 static PetscErrorCode MatMultAddColumnRange_MPIDense(Mat mat, Vec xx, Vec yy, Vec zz, PetscInt c_start, PetscInt c_end) 513 { 514 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 515 const PetscScalar *ax; 516 PetscScalar *ay; 517 PetscMemType axmtype, aymtype; 518 519 PetscFunctionBegin; 520 if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat)); 521 PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype)); 522 PetscCall(VecGetArrayAndMemType(mdn->lvec, &ay, &aymtype)); 523 PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE)); 524 PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE)); 525 PetscCall(VecRestoreArrayAndMemType(mdn->lvec, &ay)); 526 PetscCall(VecRestoreArrayReadAndMemType(xx, &ax)); 527 PetscUseMethod(mdn->A, "MatMultAddColumnRange_C", (Mat, Vec, Vec, Vec, PetscInt, PetscInt), (mdn->A, mdn->lvec, yy, zz, c_start, c_end)); 528 PetscFunctionReturn(PETSC_SUCCESS); 529 } 530 531 static PetscErrorCode MatMultAdd_MPIDense(Mat mat, Vec xx, Vec yy, Vec zz) 532 { 533 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 534 const PetscScalar *ax; 535 PetscScalar *ay; 536 PetscMemType axmtype, aymtype; 537 538 PetscFunctionBegin; 539 if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat)); 540 PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype)); 541 PetscCall(VecGetArrayAndMemType(mdn->lvec, &ay, &aymtype)); 542 PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE)); 543 PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE)); 544 PetscCall(VecRestoreArrayAndMemType(mdn->lvec, &ay)); 545 PetscCall(VecRestoreArrayReadAndMemType(xx, &ax)); 546 PetscCall((*mdn->A->ops->multadd)(mdn->A, mdn->lvec, yy, zz)); 547 PetscFunctionReturn(PETSC_SUCCESS); 548 } 549 550 static PetscErrorCode MatMultHermitianTransposeColumnRange_MPIDense(Mat A, Vec xx, Vec yy, PetscInt c_start, PetscInt c_end) 551 { 552 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 553 const PetscScalar *ax; 554 PetscScalar *ay; 555 PetscMemType axmtype, aymtype; 556 PetscInt r_start, r_end; 557 PetscInt c_start_local, c_end_local; 558 559 PetscFunctionBegin; 560 if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A)); 561 PetscCall(VecZeroEntries(a->lvec)); 562 PetscCall(VecGetOwnershipRange(yy, &r_start, &r_end)); 563 c_start_local = PetscMax(c_start, r_start); 564 c_end_local = PetscMin(c_end, r_end); 565 PetscCall(VecGetArrayAndMemType(yy, &ay, &aymtype)); 566 if (c_end_local > c_start_local) { 567 if (PetscMemTypeHost(aymtype)) { 568 PetscCall(PetscArrayzero(&ay[c_start_local], (size_t)(c_end_local - c_start_local))); 569 } else { 570 PetscCall(PetscDeviceRegisterMemory(ay, aymtype, sizeof(*ay) * ((size_t)(r_end - r_start)))); 571 PetscCall(PetscDeviceArrayZero(NULL, &ay[c_start_local], (size_t)(c_end_local - c_start_local))); 572 } 573 } 574 PetscUseMethod(a->A, "MatMultHermitianTransposeColumnRange_C", (Mat, Vec, Vec, PetscInt, PetscInt), (a->A, xx, a->lvec, c_start, c_end)); 575 PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype)); 576 PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM)); 577 PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM)); 578 PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax)); 579 PetscCall(VecRestoreArrayAndMemType(yy, &ay)); 580 PetscFunctionReturn(PETSC_SUCCESS); 581 } 582 583 static PetscErrorCode MatMultTransposeKernel_MPIDense(Mat A, Vec xx, Vec yy, PetscBool herm) 584 { 585 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 586 const PetscScalar *ax; 587 PetscScalar *ay; 588 PetscMemType axmtype, aymtype; 589 590 PetscFunctionBegin; 591 if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A)); 592 PetscCall(VecSet(yy, 0.0)); 593 if (herm) PetscCall((*a->A->ops->multhermitiantranspose)(a->A, xx, a->lvec)); 594 else PetscCall((*a->A->ops->multtranspose)(a->A, xx, a->lvec)); 595 PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype)); 596 PetscCall(VecGetArrayAndMemType(yy, &ay, &aymtype)); 597 PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM)); 598 PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM)); 599 PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax)); 600 PetscCall(VecRestoreArrayAndMemType(yy, &ay)); 601 PetscFunctionReturn(PETSC_SUCCESS); 602 } 603 604 static PetscErrorCode MatMultHermitianTransposeAddColumnRange_MPIDense(Mat A, Vec xx, Vec yy, Vec zz, PetscInt c_start, PetscInt c_end) 605 { 606 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 607 const PetscScalar *ax; 608 PetscScalar *ay; 609 PetscMemType axmtype, aymtype; 610 611 PetscFunctionBegin; 612 if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A)); 613 PetscCall(VecCopy(yy, zz)); 614 PetscCall(VecZeroEntries(a->lvec)); 615 PetscUseMethod(a->A, "MatMultHermitianTransposeColumnRange_C", (Mat, Vec, Vec, PetscInt, PetscInt), (a->A, xx, a->lvec, c_start, c_end)); 616 PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype)); 617 PetscCall(VecGetArrayAndMemType(zz, &ay, &aymtype)); 618 PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM)); 619 PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM)); 620 PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax)); 621 PetscCall(VecRestoreArrayAndMemType(zz, &ay)); 622 PetscFunctionReturn(PETSC_SUCCESS); 623 } 624 625 static PetscErrorCode MatMultTransposeAddKernel_MPIDense(Mat A, Vec xx, Vec yy, Vec zz, PetscBool herm) 626 { 627 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 628 const PetscScalar *ax; 629 PetscScalar *ay; 630 PetscMemType axmtype, aymtype; 631 632 PetscFunctionBegin; 633 if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A)); 634 PetscCall(VecCopy(yy, zz)); 635 if (herm) PetscCall((*a->A->ops->multhermitiantranspose)(a->A, xx, a->lvec)); 636 else PetscCall((*a->A->ops->multtranspose)(a->A, xx, a->lvec)); 637 PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype)); 638 PetscCall(VecGetArrayAndMemType(zz, &ay, &aymtype)); 639 PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM)); 640 PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM)); 641 PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax)); 642 PetscCall(VecRestoreArrayAndMemType(zz, &ay)); 643 PetscFunctionReturn(PETSC_SUCCESS); 644 } 645 646 static PetscErrorCode MatMultTranspose_MPIDense(Mat A, Vec xx, Vec yy) 647 { 648 PetscFunctionBegin; 649 PetscCall(MatMultTransposeKernel_MPIDense(A, xx, yy, PETSC_FALSE)); 650 PetscFunctionReturn(PETSC_SUCCESS); 651 } 652 653 static PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A, Vec xx, Vec yy, Vec zz) 654 { 655 PetscFunctionBegin; 656 PetscCall(MatMultTransposeAddKernel_MPIDense(A, xx, yy, zz, PETSC_FALSE)); 657 PetscFunctionReturn(PETSC_SUCCESS); 658 } 659 660 static PetscErrorCode MatMultHermitianTranspose_MPIDense(Mat A, Vec xx, Vec yy) 661 { 662 PetscFunctionBegin; 663 PetscCall(MatMultTransposeKernel_MPIDense(A, xx, yy, PETSC_TRUE)); 664 PetscFunctionReturn(PETSC_SUCCESS); 665 } 666 667 static PetscErrorCode MatMultHermitianTransposeAdd_MPIDense(Mat A, Vec xx, Vec yy, Vec zz) 668 { 669 PetscFunctionBegin; 670 PetscCall(MatMultTransposeAddKernel_MPIDense(A, xx, yy, zz, PETSC_TRUE)); 671 PetscFunctionReturn(PETSC_SUCCESS); 672 } 673 674 PetscErrorCode MatGetDiagonal_MPIDense(Mat A, Vec v) 675 { 676 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 677 PetscInt lda, len, i, nl, ng, m = A->rmap->n, radd; 678 PetscScalar *x; 679 const PetscScalar *av; 680 681 PetscFunctionBegin; 682 PetscCall(VecGetArray(v, &x)); 683 PetscCall(VecGetSize(v, &ng)); 684 PetscCheck(ng == A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming mat and vec"); 685 PetscCall(VecGetLocalSize(v, &nl)); 686 len = PetscMin(a->A->rmap->n, a->A->cmap->n); 687 radd = A->rmap->rstart * m; 688 PetscCall(MatDenseGetArrayRead(a->A, &av)); 689 PetscCall(MatDenseGetLDA(a->A, &lda)); 690 for (i = 0; i < len; i++) x[i] = av[radd + i * lda + i]; 691 PetscCall(MatDenseRestoreArrayRead(a->A, &av)); 692 if (nl - i > 0) PetscCall(PetscArrayzero(x + i, nl - i)); 693 PetscCall(VecRestoreArray(v, &x)); 694 PetscFunctionReturn(PETSC_SUCCESS); 695 } 696 697 static PetscErrorCode MatDestroy_MPIDense(Mat mat) 698 { 699 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 700 701 PetscFunctionBegin; 702 PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N)); 703 PetscCall(MatStashDestroy_Private(&mat->stash)); 704 PetscCheck(!mdn->vecinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 705 PetscCheck(!mdn->matinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 706 PetscCall(MatDestroy(&mdn->A)); 707 PetscCall(VecDestroy(&mdn->lvec)); 708 PetscCall(PetscSFDestroy(&mdn->Mvctx)); 709 PetscCall(VecDestroy(&mdn->cvec)); 710 PetscCall(MatDestroy(&mdn->cmat)); 711 712 PetscCall(PetscFree(mat->data)); 713 PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL)); 714 715 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetLDA_C", NULL)); 716 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseSetLDA_C", NULL)); 717 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArray_C", NULL)); 718 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArray_C", NULL)); 719 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayRead_C", NULL)); 720 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayRead_C", NULL)); 721 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayWrite_C", NULL)); 722 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayWrite_C", NULL)); 723 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDensePlaceArray_C", NULL)); 724 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseResetArray_C", NULL)); 725 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseReplaceArray_C", NULL)); 726 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", NULL)); 727 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpiaij_C", NULL)); 728 #if defined(PETSC_HAVE_ELEMENTAL) 729 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_elemental_C", NULL)); 730 #endif 731 #if defined(PETSC_HAVE_SCALAPACK) 732 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_scalapack_C", NULL)); 733 #endif 734 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIDenseSetPreallocation_C", NULL)); 735 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidense_C", NULL)); 736 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaij_C", NULL)); 737 #if defined(PETSC_HAVE_CUDA) 738 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidense_C", NULL)); 739 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijcusparse_C", NULL)); 740 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensecuda_C", NULL)); 741 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidensecuda_mpidense_C", NULL)); 742 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidensecuda_C", NULL)); 743 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C", NULL)); 744 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensecuda_mpiaij_C", NULL)); 745 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C", NULL)); 746 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArray_C", NULL)); 747 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArrayRead_C", NULL)); 748 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArrayWrite_C", NULL)); 749 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArray_C", NULL)); 750 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArrayRead_C", NULL)); 751 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArrayWrite_C", NULL)); 752 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAPlaceArray_C", NULL)); 753 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAResetArray_C", NULL)); 754 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAReplaceArray_C", NULL)); 755 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDASetPreallocation_C", NULL)); 756 #endif 757 #if defined(PETSC_HAVE_HIP) 758 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidense_C", NULL)); 759 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijhipsparse_C", NULL)); 760 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensehip_C", NULL)); 761 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidensehip_mpidense_C", NULL)); 762 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidensehip_C", NULL)); 763 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidensehip_C", NULL)); 764 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensehip_mpiaij_C", NULL)); 765 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensehip_mpiaijhipsparse_C", NULL)); 766 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArray_C", NULL)); 767 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArrayRead_C", NULL)); 768 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArrayWrite_C", NULL)); 769 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArray_C", NULL)); 770 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArrayRead_C", NULL)); 771 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArrayWrite_C", NULL)); 772 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPPlaceArray_C", NULL)); 773 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPResetArray_C", NULL)); 774 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPReplaceArray_C", NULL)); 775 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPSetPreallocation_C", NULL)); 776 #endif 777 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumn_C", NULL)); 778 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumn_C", NULL)); 779 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", NULL)); 780 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", NULL)); 781 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", NULL)); 782 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", NULL)); 783 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", NULL)); 784 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", NULL)); 785 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetSubMatrix_C", NULL)); 786 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreSubMatrix_C", NULL)); 787 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultColumnRange_C", NULL)); 788 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultAddColumnRange_C", NULL)); 789 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultHermitianTransposeColumnRange_C", NULL)); 790 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultHermitianTransposeAddColumnRange_C", NULL)); 791 792 PetscCall(PetscObjectCompose((PetscObject)mat, "DiagonalBlock", NULL)); 793 PetscFunctionReturn(PETSC_SUCCESS); 794 } 795 796 #include <petscdraw.h> 797 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat, PetscViewer viewer) 798 { 799 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 800 PetscMPIInt rank; 801 PetscViewerType vtype; 802 PetscBool iascii, isdraw; 803 PetscViewer sviewer; 804 PetscViewerFormat format; 805 806 PetscFunctionBegin; 807 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 808 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 809 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 810 if (iascii) { 811 PetscCall(PetscViewerGetType(viewer, &vtype)); 812 PetscCall(PetscViewerGetFormat(viewer, &format)); 813 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 814 MatInfo info; 815 PetscCall(MatGetInfo(mat, MAT_LOCAL, &info)); 816 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 817 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %" PetscInt_FMT " \n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, 818 (PetscInt)info.memory)); 819 PetscCall(PetscViewerFlush(viewer)); 820 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 821 if (mdn->Mvctx) PetscCall(PetscSFView(mdn->Mvctx, viewer)); 822 PetscFunctionReturn(PETSC_SUCCESS); 823 } else if (format == PETSC_VIEWER_ASCII_INFO) { 824 PetscFunctionReturn(PETSC_SUCCESS); 825 } 826 } else if (isdraw) { 827 PetscDraw draw; 828 PetscBool isnull; 829 830 PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw)); 831 PetscCall(PetscDrawIsNull(draw, &isnull)); 832 if (isnull) PetscFunctionReturn(PETSC_SUCCESS); 833 } 834 835 { 836 /* assemble the entire matrix onto first processor. */ 837 Mat A; 838 PetscInt M = mat->rmap->N, N = mat->cmap->N, m, row, i, nz; 839 PetscInt *cols; 840 PetscScalar *vals; 841 842 PetscCall(MatCreate(PetscObjectComm((PetscObject)mat), &A)); 843 if (rank == 0) { 844 PetscCall(MatSetSizes(A, M, N, M, N)); 845 } else { 846 PetscCall(MatSetSizes(A, 0, 0, M, N)); 847 } 848 /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */ 849 PetscCall(MatSetType(A, MATMPIDENSE)); 850 PetscCall(MatMPIDenseSetPreallocation(A, NULL)); 851 852 /* Copy the matrix ... This isn't the most efficient means, 853 but it's quick for now */ 854 A->insertmode = INSERT_VALUES; 855 856 row = mat->rmap->rstart; 857 m = mdn->A->rmap->n; 858 for (i = 0; i < m; i++) { 859 PetscCall(MatGetRow_MPIDense(mat, row, &nz, &cols, &vals)); 860 PetscCall(MatSetValues_MPIDense(A, 1, &row, nz, cols, vals, INSERT_VALUES)); 861 PetscCall(MatRestoreRow_MPIDense(mat, row, &nz, &cols, &vals)); 862 row++; 863 } 864 865 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 866 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 867 PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 868 if (rank == 0) { 869 PetscCall(PetscObjectSetName((PetscObject)((Mat_MPIDense *)A->data)->A, ((PetscObject)mat)->name)); 870 PetscCall(MatView_SeqDense(((Mat_MPIDense *)A->data)->A, sviewer)); 871 } 872 PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 873 PetscCall(MatDestroy(&A)); 874 } 875 PetscFunctionReturn(PETSC_SUCCESS); 876 } 877 878 static PetscErrorCode MatView_MPIDense(Mat mat, PetscViewer viewer) 879 { 880 PetscBool iascii, isbinary, isdraw, issocket; 881 882 PetscFunctionBegin; 883 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 884 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 885 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket)); 886 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 887 888 if (iascii || issocket || isdraw) { 889 PetscCall(MatView_MPIDense_ASCIIorDraworSocket(mat, viewer)); 890 } else if (isbinary) PetscCall(MatView_Dense_Binary(mat, viewer)); 891 PetscFunctionReturn(PETSC_SUCCESS); 892 } 893 894 static PetscErrorCode MatGetInfo_MPIDense(Mat A, MatInfoType flag, MatInfo *info) 895 { 896 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 897 Mat mdn = mat->A; 898 PetscLogDouble isend[5], irecv[5]; 899 900 PetscFunctionBegin; 901 info->block_size = 1.0; 902 903 PetscCall(MatGetInfo(mdn, MAT_LOCAL, info)); 904 905 isend[0] = info->nz_used; 906 isend[1] = info->nz_allocated; 907 isend[2] = info->nz_unneeded; 908 isend[3] = info->memory; 909 isend[4] = info->mallocs; 910 if (flag == MAT_LOCAL) { 911 info->nz_used = isend[0]; 912 info->nz_allocated = isend[1]; 913 info->nz_unneeded = isend[2]; 914 info->memory = isend[3]; 915 info->mallocs = isend[4]; 916 } else if (flag == MAT_GLOBAL_MAX) { 917 PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)A))); 918 919 info->nz_used = irecv[0]; 920 info->nz_allocated = irecv[1]; 921 info->nz_unneeded = irecv[2]; 922 info->memory = irecv[3]; 923 info->mallocs = irecv[4]; 924 } else if (flag == MAT_GLOBAL_SUM) { 925 PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)A))); 926 927 info->nz_used = irecv[0]; 928 info->nz_allocated = irecv[1]; 929 info->nz_unneeded = irecv[2]; 930 info->memory = irecv[3]; 931 info->mallocs = irecv[4]; 932 } 933 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 934 info->fill_ratio_needed = 0; 935 info->factor_mallocs = 0; 936 PetscFunctionReturn(PETSC_SUCCESS); 937 } 938 939 static PetscErrorCode MatSetOption_MPIDense(Mat A, MatOption op, PetscBool flg) 940 { 941 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 942 943 PetscFunctionBegin; 944 switch (op) { 945 case MAT_NEW_NONZERO_LOCATIONS: 946 case MAT_NEW_NONZERO_LOCATION_ERR: 947 case MAT_NEW_NONZERO_ALLOCATION_ERR: 948 MatCheckPreallocated(A, 1); 949 PetscCall(MatSetOption(a->A, op, flg)); 950 break; 951 case MAT_ROW_ORIENTED: 952 MatCheckPreallocated(A, 1); 953 a->roworiented = flg; 954 PetscCall(MatSetOption(a->A, op, flg)); 955 break; 956 case MAT_IGNORE_OFF_PROC_ENTRIES: 957 a->donotstash = flg; 958 break; 959 case MAT_SYMMETRIC: 960 case MAT_STRUCTURALLY_SYMMETRIC: 961 case MAT_HERMITIAN: 962 case MAT_SYMMETRY_ETERNAL: 963 case MAT_STRUCTURAL_SYMMETRY_ETERNAL: 964 case MAT_SPD: 965 case MAT_SPD_ETERNAL: 966 /* if the diagonal matrix is square it inherits some of the properties above */ 967 if (a->A && A->rmap->n == A->cmap->n) PetscCall(MatSetOption(a->A, op, flg)); 968 break; 969 default: 970 break; 971 } 972 PetscFunctionReturn(PETSC_SUCCESS); 973 } 974 975 static PetscErrorCode MatDiagonalScale_MPIDense(Mat A, Vec ll, Vec rr) 976 { 977 Mat_MPIDense *mdn = (Mat_MPIDense *)A->data; 978 const PetscScalar *l; 979 PetscScalar x, *v, *vv, *r; 980 PetscInt i, j, s2a, s3a, s2, s3, m = mdn->A->rmap->n, n = mdn->A->cmap->n, lda; 981 982 PetscFunctionBegin; 983 PetscCall(MatDenseGetArray(mdn->A, &vv)); 984 PetscCall(MatDenseGetLDA(mdn->A, &lda)); 985 PetscCall(MatGetLocalSize(A, &s2, &s3)); 986 if (ll) { 987 PetscCall(VecGetLocalSize(ll, &s2a)); 988 PetscCheck(s2a == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Left scaling vector non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT, s2a, s2); 989 PetscCall(VecGetArrayRead(ll, &l)); 990 for (i = 0; i < m; i++) { 991 x = l[i]; 992 v = vv + i; 993 for (j = 0; j < n; j++) { 994 (*v) *= x; 995 v += lda; 996 } 997 } 998 PetscCall(VecRestoreArrayRead(ll, &l)); 999 PetscCall(PetscLogFlops(1.0 * n * m)); 1000 } 1001 if (rr) { 1002 const PetscScalar *ar; 1003 1004 PetscCall(VecGetLocalSize(rr, &s3a)); 1005 PetscCheck(s3a == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Right scaling vec non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT ".", s3a, s3); 1006 PetscCall(VecGetArrayRead(rr, &ar)); 1007 if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A)); 1008 PetscCall(VecGetArray(mdn->lvec, &r)); 1009 PetscCall(PetscSFBcastBegin(mdn->Mvctx, MPIU_SCALAR, ar, r, MPI_REPLACE)); 1010 PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ar, r, MPI_REPLACE)); 1011 PetscCall(VecRestoreArrayRead(rr, &ar)); 1012 for (i = 0; i < n; i++) { 1013 x = r[i]; 1014 v = vv + i * lda; 1015 for (j = 0; j < m; j++) (*v++) *= x; 1016 } 1017 PetscCall(VecRestoreArray(mdn->lvec, &r)); 1018 PetscCall(PetscLogFlops(1.0 * n * m)); 1019 } 1020 PetscCall(MatDenseRestoreArray(mdn->A, &vv)); 1021 PetscFunctionReturn(PETSC_SUCCESS); 1022 } 1023 1024 static PetscErrorCode MatNorm_MPIDense(Mat A, NormType type, PetscReal *nrm) 1025 { 1026 Mat_MPIDense *mdn = (Mat_MPIDense *)A->data; 1027 PetscInt i, j; 1028 PetscMPIInt size; 1029 PetscReal sum = 0.0; 1030 const PetscScalar *av, *v; 1031 1032 PetscFunctionBegin; 1033 PetscCall(MatDenseGetArrayRead(mdn->A, &av)); 1034 v = av; 1035 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 1036 if (size == 1) { 1037 PetscCall(MatNorm(mdn->A, type, nrm)); 1038 } else { 1039 if (type == NORM_FROBENIUS) { 1040 for (i = 0; i < mdn->A->cmap->n * mdn->A->rmap->n; i++) { 1041 sum += PetscRealPart(PetscConj(*v) * (*v)); 1042 v++; 1043 } 1044 PetscCallMPI(MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A))); 1045 *nrm = PetscSqrtReal(*nrm); 1046 PetscCall(PetscLogFlops(2.0 * mdn->A->cmap->n * mdn->A->rmap->n)); 1047 } else if (type == NORM_1) { 1048 PetscReal *tmp; 1049 1050 PetscCall(PetscCalloc1(A->cmap->N, &tmp)); 1051 *nrm = 0.0; 1052 v = av; 1053 for (j = 0; j < mdn->A->cmap->n; j++) { 1054 for (i = 0; i < mdn->A->rmap->n; i++) { 1055 tmp[j] += PetscAbsScalar(*v); 1056 v++; 1057 } 1058 } 1059 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, tmp, A->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A))); 1060 for (j = 0; j < A->cmap->N; j++) { 1061 if (tmp[j] > *nrm) *nrm = tmp[j]; 1062 } 1063 PetscCall(PetscFree(tmp)); 1064 PetscCall(PetscLogFlops(A->cmap->n * A->rmap->n)); 1065 } else if (type == NORM_INFINITY) { /* max row norm */ 1066 PetscCall(MatNorm(mdn->A, type, nrm)); 1067 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, nrm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A))); 1068 } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "No support for two norm"); 1069 } 1070 PetscCall(MatDenseRestoreArrayRead(mdn->A, &av)); 1071 PetscFunctionReturn(PETSC_SUCCESS); 1072 } 1073 1074 static PetscErrorCode MatTranspose_MPIDense(Mat A, MatReuse reuse, Mat *matout) 1075 { 1076 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1077 Mat B; 1078 PetscInt M = A->rmap->N, N = A->cmap->N, m, n, *rwork, rstart = A->rmap->rstart; 1079 PetscInt j, i, lda; 1080 PetscScalar *v; 1081 1082 PetscFunctionBegin; 1083 if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout)); 1084 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) { 1085 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 1086 PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M)); 1087 PetscCall(MatSetType(B, ((PetscObject)A)->type_name)); 1088 PetscCall(MatMPIDenseSetPreallocation(B, NULL)); 1089 } else B = *matout; 1090 1091 m = a->A->rmap->n; 1092 n = a->A->cmap->n; 1093 PetscCall(MatDenseGetArrayRead(a->A, (const PetscScalar **)&v)); 1094 PetscCall(MatDenseGetLDA(a->A, &lda)); 1095 PetscCall(PetscMalloc1(m, &rwork)); 1096 for (i = 0; i < m; i++) rwork[i] = rstart + i; 1097 for (j = 0; j < n; j++) { 1098 PetscCall(MatSetValues(B, 1, &j, m, rwork, v, INSERT_VALUES)); 1099 v = PetscSafePointerPlusOffset(v, lda); 1100 } 1101 PetscCall(MatDenseRestoreArrayRead(a->A, (const PetscScalar **)&v)); 1102 PetscCall(PetscFree(rwork)); 1103 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 1104 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 1105 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 1106 *matout = B; 1107 } else { 1108 PetscCall(MatHeaderMerge(A, &B)); 1109 } 1110 PetscFunctionReturn(PETSC_SUCCESS); 1111 } 1112 1113 static PetscErrorCode MatDuplicate_MPIDense(Mat, MatDuplicateOption, Mat *); 1114 PETSC_INTERN PetscErrorCode MatScale_MPIDense(Mat, PetscScalar); 1115 1116 static PetscErrorCode MatSetUp_MPIDense(Mat A) 1117 { 1118 PetscFunctionBegin; 1119 PetscCall(PetscLayoutSetUp(A->rmap)); 1120 PetscCall(PetscLayoutSetUp(A->cmap)); 1121 if (!A->preallocated) PetscCall(MatMPIDenseSetPreallocation(A, NULL)); 1122 PetscFunctionReturn(PETSC_SUCCESS); 1123 } 1124 1125 static PetscErrorCode MatAXPY_MPIDense(Mat Y, PetscScalar alpha, Mat X, MatStructure str) 1126 { 1127 Mat_MPIDense *A = (Mat_MPIDense *)Y->data, *B = (Mat_MPIDense *)X->data; 1128 1129 PetscFunctionBegin; 1130 PetscCall(MatAXPY(A->A, alpha, B->A, str)); 1131 PetscFunctionReturn(PETSC_SUCCESS); 1132 } 1133 1134 static PetscErrorCode MatConjugate_MPIDense(Mat mat) 1135 { 1136 Mat_MPIDense *a = (Mat_MPIDense *)mat->data; 1137 1138 PetscFunctionBegin; 1139 PetscCall(MatConjugate(a->A)); 1140 PetscFunctionReturn(PETSC_SUCCESS); 1141 } 1142 1143 static PetscErrorCode MatRealPart_MPIDense(Mat A) 1144 { 1145 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1146 1147 PetscFunctionBegin; 1148 PetscCall(MatRealPart(a->A)); 1149 PetscFunctionReturn(PETSC_SUCCESS); 1150 } 1151 1152 static PetscErrorCode MatImaginaryPart_MPIDense(Mat A) 1153 { 1154 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1155 1156 PetscFunctionBegin; 1157 PetscCall(MatImaginaryPart(a->A)); 1158 PetscFunctionReturn(PETSC_SUCCESS); 1159 } 1160 1161 static PetscErrorCode MatGetColumnVector_MPIDense(Mat A, Vec v, PetscInt col) 1162 { 1163 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1164 1165 PetscFunctionBegin; 1166 PetscCheck(a->A, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Missing local matrix"); 1167 PetscCheck(a->A->ops->getcolumnvector, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Missing get column operation"); 1168 PetscCall((*a->A->ops->getcolumnvector)(a->A, v, col)); 1169 PetscFunctionReturn(PETSC_SUCCESS); 1170 } 1171 1172 PETSC_INTERN PetscErrorCode MatGetColumnReductions_SeqDense(Mat, PetscInt, PetscReal *); 1173 1174 static PetscErrorCode MatGetColumnReductions_MPIDense(Mat A, PetscInt type, PetscReal *reductions) 1175 { 1176 PetscInt i, m, n; 1177 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1178 1179 PetscFunctionBegin; 1180 PetscCall(MatGetSize(A, &m, &n)); 1181 if (type == REDUCTION_MEAN_REALPART) { 1182 PetscCall(MatGetColumnReductions_SeqDense(a->A, (PetscInt)REDUCTION_SUM_REALPART, reductions)); 1183 } else if (type == REDUCTION_MEAN_IMAGINARYPART) { 1184 PetscCall(MatGetColumnReductions_SeqDense(a->A, (PetscInt)REDUCTION_SUM_IMAGINARYPART, reductions)); 1185 } else { 1186 PetscCall(MatGetColumnReductions_SeqDense(a->A, type, reductions)); 1187 } 1188 if (type == NORM_2) { 1189 for (i = 0; i < n; i++) reductions[i] *= reductions[i]; 1190 } 1191 if (type == NORM_INFINITY) { 1192 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, reductions, n, MPIU_REAL, MPIU_MAX, A->hdr.comm)); 1193 } else { 1194 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, reductions, n, MPIU_REAL, MPIU_SUM, A->hdr.comm)); 1195 } 1196 if (type == NORM_2) { 1197 for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]); 1198 } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 1199 for (i = 0; i < n; i++) reductions[i] /= m; 1200 } 1201 PetscFunctionReturn(PETSC_SUCCESS); 1202 } 1203 1204 static PetscErrorCode MatSetRandom_MPIDense(Mat x, PetscRandom rctx) 1205 { 1206 Mat_MPIDense *d = (Mat_MPIDense *)x->data; 1207 1208 PetscFunctionBegin; 1209 PetscCall(MatSetRandom(d->A, rctx)); 1210 #if defined(PETSC_HAVE_DEVICE) 1211 x->offloadmask = d->A->offloadmask; 1212 #endif 1213 PetscFunctionReturn(PETSC_SUCCESS); 1214 } 1215 1216 static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A, PetscBool *missing, PetscInt *d) 1217 { 1218 PetscFunctionBegin; 1219 *missing = PETSC_FALSE; 1220 PetscFunctionReturn(PETSC_SUCCESS); 1221 } 1222 1223 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat, Mat, PetscReal, Mat); 1224 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat, Mat, Mat); 1225 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat, Mat, PetscReal, Mat); 1226 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat, Mat, Mat); 1227 static PetscErrorCode MatEqual_MPIDense(Mat, Mat, PetscBool *); 1228 static PetscErrorCode MatLoad_MPIDense(Mat, PetscViewer); 1229 static PetscErrorCode MatProductSetFromOptions_MPIDense(Mat); 1230 1231 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense, 1232 MatGetRow_MPIDense, 1233 MatRestoreRow_MPIDense, 1234 MatMult_MPIDense, 1235 /* 4*/ MatMultAdd_MPIDense, 1236 MatMultTranspose_MPIDense, 1237 MatMultTransposeAdd_MPIDense, 1238 NULL, 1239 NULL, 1240 NULL, 1241 /* 10*/ NULL, 1242 NULL, 1243 NULL, 1244 NULL, 1245 MatTranspose_MPIDense, 1246 /* 15*/ MatGetInfo_MPIDense, 1247 MatEqual_MPIDense, 1248 MatGetDiagonal_MPIDense, 1249 MatDiagonalScale_MPIDense, 1250 MatNorm_MPIDense, 1251 /* 20*/ MatAssemblyBegin_MPIDense, 1252 MatAssemblyEnd_MPIDense, 1253 MatSetOption_MPIDense, 1254 MatZeroEntries_MPIDense, 1255 /* 24*/ MatZeroRows_MPIDense, 1256 NULL, 1257 NULL, 1258 NULL, 1259 NULL, 1260 /* 29*/ MatSetUp_MPIDense, 1261 NULL, 1262 NULL, 1263 MatGetDiagonalBlock_MPIDense, 1264 NULL, 1265 /* 34*/ MatDuplicate_MPIDense, 1266 NULL, 1267 NULL, 1268 NULL, 1269 NULL, 1270 /* 39*/ MatAXPY_MPIDense, 1271 MatCreateSubMatrices_MPIDense, 1272 NULL, 1273 MatGetValues_MPIDense, 1274 MatCopy_MPIDense, 1275 /* 44*/ NULL, 1276 MatScale_MPIDense, 1277 MatShift_MPIDense, 1278 NULL, 1279 NULL, 1280 /* 49*/ MatSetRandom_MPIDense, 1281 NULL, 1282 NULL, 1283 NULL, 1284 NULL, 1285 /* 54*/ NULL, 1286 NULL, 1287 NULL, 1288 NULL, 1289 NULL, 1290 /* 59*/ MatCreateSubMatrix_MPIDense, 1291 MatDestroy_MPIDense, 1292 MatView_MPIDense, 1293 NULL, 1294 NULL, 1295 /* 64*/ NULL, 1296 NULL, 1297 NULL, 1298 NULL, 1299 NULL, 1300 /* 69*/ NULL, 1301 NULL, 1302 NULL, 1303 NULL, 1304 NULL, 1305 /* 74*/ NULL, 1306 NULL, 1307 NULL, 1308 NULL, 1309 MatLoad_MPIDense, 1310 /* 79*/ NULL, 1311 NULL, 1312 NULL, 1313 NULL, 1314 /* 83*/ NULL, 1315 NULL, 1316 NULL, 1317 NULL, 1318 MatMatTransposeMultSymbolic_MPIDense_MPIDense, 1319 MatMatTransposeMultNumeric_MPIDense_MPIDense, 1320 /* 89*/ NULL, 1321 MatProductSetFromOptions_MPIDense, 1322 NULL, 1323 NULL, 1324 MatConjugate_MPIDense, 1325 /* 94*/ NULL, 1326 NULL, 1327 MatRealPart_MPIDense, 1328 MatImaginaryPart_MPIDense, 1329 NULL, 1330 /*99*/ NULL, 1331 NULL, 1332 NULL, 1333 NULL, 1334 MatGetColumnVector_MPIDense, 1335 /*104*/ MatMissingDiagonal_MPIDense, 1336 NULL, 1337 NULL, 1338 NULL, 1339 NULL, 1340 /*109*/ NULL, 1341 NULL, 1342 NULL, 1343 MatMultHermitianTranspose_MPIDense, 1344 MatMultHermitianTransposeAdd_MPIDense, 1345 /*114*/ NULL, 1346 NULL, 1347 MatGetColumnReductions_MPIDense, 1348 NULL, 1349 NULL, 1350 NULL, 1351 /*120*/ NULL, 1352 MatTransposeMatMultSymbolic_MPIDense_MPIDense, 1353 MatTransposeMatMultNumeric_MPIDense_MPIDense, 1354 NULL, 1355 /*124*/ NULL, 1356 NULL, 1357 NULL, 1358 NULL, 1359 NULL, 1360 /*129*/ NULL, 1361 NULL, 1362 MatCreateMPIMatConcatenateSeqMat_MPIDense, 1363 NULL, 1364 NULL, 1365 /*134*/ NULL, 1366 NULL, 1367 NULL, 1368 NULL, 1369 NULL, 1370 /*139*/ NULL, 1371 NULL, 1372 NULL, 1373 NULL}; 1374 1375 static PetscErrorCode MatMPIDenseSetPreallocation_MPIDense(Mat mat, PetscScalar *data) 1376 { 1377 Mat_MPIDense *a = (Mat_MPIDense *)mat->data; 1378 MatType mtype = MATSEQDENSE; 1379 1380 PetscFunctionBegin; 1381 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1382 PetscCall(PetscLayoutSetUp(mat->rmap)); 1383 PetscCall(PetscLayoutSetUp(mat->cmap)); 1384 if (!a->A) { 1385 PetscCall(MatCreate(PETSC_COMM_SELF, &a->A)); 1386 PetscCall(MatSetSizes(a->A, mat->rmap->n, mat->cmap->N, mat->rmap->n, mat->cmap->N)); 1387 } 1388 #if defined(PETSC_HAVE_CUDA) 1389 PetscBool iscuda; 1390 PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPIDENSECUDA, &iscuda)); 1391 if (iscuda) mtype = MATSEQDENSECUDA; 1392 #endif 1393 #if defined(PETSC_HAVE_HIP) 1394 PetscBool iship; 1395 PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPIDENSEHIP, &iship)); 1396 if (iship) mtype = MATSEQDENSEHIP; 1397 #endif 1398 PetscCall(MatSetType(a->A, mtype)); 1399 PetscCall(MatSeqDenseSetPreallocation(a->A, data)); 1400 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 1401 mat->offloadmask = a->A->offloadmask; 1402 #endif 1403 mat->preallocated = PETSC_TRUE; 1404 mat->assembled = PETSC_TRUE; 1405 PetscFunctionReturn(PETSC_SUCCESS); 1406 } 1407 1408 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIDense(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 1409 { 1410 Mat B, C; 1411 1412 PetscFunctionBegin; 1413 PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &C)); 1414 PetscCall(MatConvert_SeqAIJ_SeqDense(C, MATSEQDENSE, MAT_INITIAL_MATRIX, &B)); 1415 PetscCall(MatDestroy(&C)); 1416 if (reuse == MAT_REUSE_MATRIX) { 1417 C = *newmat; 1418 } else C = NULL; 1419 PetscCall(MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A), B, A->cmap->n, !C ? MAT_INITIAL_MATRIX : MAT_REUSE_MATRIX, &C)); 1420 PetscCall(MatDestroy(&B)); 1421 if (reuse == MAT_INPLACE_MATRIX) { 1422 PetscCall(MatHeaderReplace(A, &C)); 1423 } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C; 1424 PetscFunctionReturn(PETSC_SUCCESS); 1425 } 1426 1427 static PetscErrorCode MatConvert_MPIDense_MPIAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 1428 { 1429 Mat B, C; 1430 1431 PetscFunctionBegin; 1432 PetscCall(MatDenseGetLocalMatrix(A, &C)); 1433 PetscCall(MatConvert_SeqDense_SeqAIJ(C, MATSEQAIJ, MAT_INITIAL_MATRIX, &B)); 1434 if (reuse == MAT_REUSE_MATRIX) { 1435 C = *newmat; 1436 } else C = NULL; 1437 PetscCall(MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A), B, A->cmap->n, !C ? MAT_INITIAL_MATRIX : MAT_REUSE_MATRIX, &C)); 1438 PetscCall(MatDestroy(&B)); 1439 if (reuse == MAT_INPLACE_MATRIX) { 1440 PetscCall(MatHeaderReplace(A, &C)); 1441 } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C; 1442 PetscFunctionReturn(PETSC_SUCCESS); 1443 } 1444 1445 #if defined(PETSC_HAVE_ELEMENTAL) 1446 PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 1447 { 1448 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1449 Mat mat_elemental; 1450 PetscScalar *v; 1451 PetscInt m = A->rmap->n, N = A->cmap->N, rstart = A->rmap->rstart, i, *rows, *cols, lda; 1452 1453 PetscFunctionBegin; 1454 if (reuse == MAT_REUSE_MATRIX) { 1455 mat_elemental = *newmat; 1456 PetscCall(MatZeroEntries(*newmat)); 1457 } else { 1458 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental)); 1459 PetscCall(MatSetSizes(mat_elemental, PETSC_DECIDE, PETSC_DECIDE, A->rmap->N, A->cmap->N)); 1460 PetscCall(MatSetType(mat_elemental, MATELEMENTAL)); 1461 PetscCall(MatSetUp(mat_elemental)); 1462 PetscCall(MatSetOption(mat_elemental, MAT_ROW_ORIENTED, PETSC_FALSE)); 1463 } 1464 1465 PetscCall(PetscMalloc2(m, &rows, N, &cols)); 1466 for (i = 0; i < N; i++) cols[i] = i; 1467 for (i = 0; i < m; i++) rows[i] = rstart + i; 1468 1469 /* PETSc-Elemental interface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */ 1470 PetscCall(MatDenseGetArray(A, &v)); 1471 PetscCall(MatDenseGetLDA(a->A, &lda)); 1472 if (lda == m) PetscCall(MatSetValues(mat_elemental, m, rows, N, cols, v, ADD_VALUES)); 1473 else { 1474 for (i = 0; i < N; i++) PetscCall(MatSetValues(mat_elemental, m, rows, 1, &i, v + lda * i, ADD_VALUES)); 1475 } 1476 PetscCall(MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY)); 1477 PetscCall(MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY)); 1478 PetscCall(MatDenseRestoreArray(A, &v)); 1479 PetscCall(PetscFree2(rows, cols)); 1480 1481 if (reuse == MAT_INPLACE_MATRIX) { 1482 PetscCall(MatHeaderReplace(A, &mat_elemental)); 1483 } else { 1484 *newmat = mat_elemental; 1485 } 1486 PetscFunctionReturn(PETSC_SUCCESS); 1487 } 1488 #endif 1489 1490 static PetscErrorCode MatDenseGetColumn_MPIDense(Mat A, PetscInt col, PetscScalar **vals) 1491 { 1492 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 1493 1494 PetscFunctionBegin; 1495 PetscCall(MatDenseGetColumn(mat->A, col, vals)); 1496 PetscFunctionReturn(PETSC_SUCCESS); 1497 } 1498 1499 static PetscErrorCode MatDenseRestoreColumn_MPIDense(Mat A, PetscScalar **vals) 1500 { 1501 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 1502 1503 PetscFunctionBegin; 1504 PetscCall(MatDenseRestoreColumn(mat->A, vals)); 1505 PetscFunctionReturn(PETSC_SUCCESS); 1506 } 1507 1508 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIDense(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat) 1509 { 1510 Mat_MPIDense *mat; 1511 PetscInt m, nloc, N; 1512 1513 PetscFunctionBegin; 1514 PetscCall(MatGetSize(inmat, &m, &N)); 1515 PetscCall(MatGetLocalSize(inmat, NULL, &nloc)); 1516 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 1517 PetscInt sum; 1518 1519 if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnership(comm, &n, &N)); 1520 /* Check sum(n) = N */ 1521 PetscCallMPI(MPIU_Allreduce(&n, &sum, 1, MPIU_INT, MPI_SUM, comm)); 1522 PetscCheck(sum == N, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT, sum, N); 1523 1524 PetscCall(MatCreateDense(comm, m, n, PETSC_DETERMINE, N, NULL, outmat)); 1525 PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 1526 } 1527 1528 /* numeric phase */ 1529 mat = (Mat_MPIDense *)(*outmat)->data; 1530 PetscCall(MatCopy(inmat, mat->A, SAME_NONZERO_PATTERN)); 1531 PetscFunctionReturn(PETSC_SUCCESS); 1532 } 1533 1534 PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat A, PetscInt col, Vec *v) 1535 { 1536 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1537 PetscInt lda; 1538 1539 PetscFunctionBegin; 1540 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1541 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1542 if (!a->cvec) PetscCall(MatDenseCreateColumnVec_Private(A, &a->cvec)); 1543 a->vecinuse = col + 1; 1544 PetscCall(MatDenseGetLDA(a->A, &lda)); 1545 PetscCall(MatDenseGetArray(a->A, (PetscScalar **)&a->ptrinuse)); 1546 PetscCall(VecPlaceArray(a->cvec, PetscSafePointerPlusOffset(a->ptrinuse, (size_t)col * (size_t)lda))); 1547 *v = a->cvec; 1548 PetscFunctionReturn(PETSC_SUCCESS); 1549 } 1550 1551 PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat A, PetscInt col, Vec *v) 1552 { 1553 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1554 1555 PetscFunctionBegin; 1556 PetscCheck(a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first"); 1557 PetscCheck(a->cvec, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing internal column vector"); 1558 VecCheckAssembled(a->cvec); 1559 a->vecinuse = 0; 1560 PetscCall(MatDenseRestoreArray(a->A, (PetscScalar **)&a->ptrinuse)); 1561 PetscCall(VecResetArray(a->cvec)); 1562 if (v) *v = NULL; 1563 PetscFunctionReturn(PETSC_SUCCESS); 1564 } 1565 1566 PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat A, PetscInt col, Vec *v) 1567 { 1568 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1569 PetscInt lda; 1570 1571 PetscFunctionBegin; 1572 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1573 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1574 if (!a->cvec) PetscCall(MatDenseCreateColumnVec_Private(A, &a->cvec)); 1575 a->vecinuse = col + 1; 1576 PetscCall(MatDenseGetLDA(a->A, &lda)); 1577 PetscCall(MatDenseGetArrayRead(a->A, &a->ptrinuse)); 1578 PetscCall(VecPlaceArray(a->cvec, PetscSafePointerPlusOffset(a->ptrinuse, (size_t)col * (size_t)lda))); 1579 PetscCall(VecLockReadPush(a->cvec)); 1580 *v = a->cvec; 1581 PetscFunctionReturn(PETSC_SUCCESS); 1582 } 1583 1584 PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat A, PetscInt col, Vec *v) 1585 { 1586 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1587 1588 PetscFunctionBegin; 1589 PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first"); 1590 PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector"); 1591 VecCheckAssembled(a->cvec); 1592 a->vecinuse = 0; 1593 PetscCall(MatDenseRestoreArrayRead(a->A, &a->ptrinuse)); 1594 PetscCall(VecLockReadPop(a->cvec)); 1595 PetscCall(VecResetArray(a->cvec)); 1596 if (v) *v = NULL; 1597 PetscFunctionReturn(PETSC_SUCCESS); 1598 } 1599 1600 PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat A, PetscInt col, Vec *v) 1601 { 1602 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1603 PetscInt lda; 1604 1605 PetscFunctionBegin; 1606 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1607 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1608 if (!a->cvec) PetscCall(MatDenseCreateColumnVec_Private(A, &a->cvec)); 1609 a->vecinuse = col + 1; 1610 PetscCall(MatDenseGetLDA(a->A, &lda)); 1611 PetscCall(MatDenseGetArrayWrite(a->A, (PetscScalar **)&a->ptrinuse)); 1612 PetscCall(VecPlaceArray(a->cvec, PetscSafePointerPlusOffset(a->ptrinuse, (size_t)col * (size_t)lda))); 1613 *v = a->cvec; 1614 PetscFunctionReturn(PETSC_SUCCESS); 1615 } 1616 1617 PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat A, PetscInt col, Vec *v) 1618 { 1619 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1620 1621 PetscFunctionBegin; 1622 PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first"); 1623 PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector"); 1624 VecCheckAssembled(a->cvec); 1625 a->vecinuse = 0; 1626 PetscCall(MatDenseRestoreArrayWrite(a->A, (PetscScalar **)&a->ptrinuse)); 1627 PetscCall(VecResetArray(a->cvec)); 1628 if (v) *v = NULL; 1629 PetscFunctionReturn(PETSC_SUCCESS); 1630 } 1631 1632 static PetscErrorCode MatDenseGetSubMatrix_MPIDense(Mat A, PetscInt rbegin, PetscInt rend, PetscInt cbegin, PetscInt cend, Mat *v) 1633 { 1634 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1635 Mat_MPIDense *c; 1636 MPI_Comm comm; 1637 PetscInt prbegin, prend, pcbegin, pcend; 1638 1639 PetscFunctionBegin; 1640 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 1641 PetscCheck(!a->vecinuse, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1642 PetscCheck(!a->matinuse, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1643 prbegin = PetscMax(0, PetscMin(A->rmap->rend, rbegin) - A->rmap->rstart); 1644 prend = PetscMin(A->rmap->n, PetscMax(0, rend - A->rmap->rstart)); 1645 pcbegin = PetscMax(0, PetscMin(A->cmap->rend, cbegin) - A->cmap->rstart); 1646 pcend = PetscMin(A->cmap->n, PetscMax(0, cend - A->cmap->rstart)); 1647 if (!a->cmat) { 1648 PetscCall(MatCreate(comm, &a->cmat)); 1649 PetscCall(MatSetType(a->cmat, ((PetscObject)A)->type_name)); 1650 if (rend - rbegin == A->rmap->N) PetscCall(PetscLayoutReference(A->rmap, &a->cmat->rmap)); 1651 else { 1652 PetscCall(PetscLayoutSetLocalSize(a->cmat->rmap, prend - prbegin)); 1653 PetscCall(PetscLayoutSetSize(a->cmat->rmap, rend - rbegin)); 1654 PetscCall(PetscLayoutSetUp(a->cmat->rmap)); 1655 } 1656 if (cend - cbegin == A->cmap->N) PetscCall(PetscLayoutReference(A->cmap, &a->cmat->cmap)); 1657 else { 1658 PetscCall(PetscLayoutSetLocalSize(a->cmat->cmap, pcend - pcbegin)); 1659 PetscCall(PetscLayoutSetSize(a->cmat->cmap, cend - cbegin)); 1660 PetscCall(PetscLayoutSetUp(a->cmat->cmap)); 1661 } 1662 c = (Mat_MPIDense *)a->cmat->data; 1663 c->sub_rbegin = rbegin; 1664 c->sub_rend = rend; 1665 c->sub_cbegin = cbegin; 1666 c->sub_cend = cend; 1667 } 1668 c = (Mat_MPIDense *)a->cmat->data; 1669 if (c->sub_rbegin != rbegin || c->sub_rend != rend) { 1670 PetscCall(PetscLayoutDestroy(&a->cmat->rmap)); 1671 PetscCall(PetscLayoutCreate(comm, &a->cmat->rmap)); 1672 PetscCall(PetscLayoutSetLocalSize(a->cmat->rmap, prend - prbegin)); 1673 PetscCall(PetscLayoutSetSize(a->cmat->rmap, rend - rbegin)); 1674 PetscCall(PetscLayoutSetUp(a->cmat->rmap)); 1675 c->sub_rbegin = rbegin; 1676 c->sub_rend = rend; 1677 } 1678 if (c->sub_cbegin != cbegin || c->sub_cend != cend) { 1679 // special optimization: check if all columns are owned by rank 0, in which case no communication is necessary 1680 if ((cend - cbegin != a->cmat->cmap->N) || (A->cmap->range[1] != A->cmap->N)) { 1681 PetscCall(PetscLayoutDestroy(&a->cmat->cmap)); 1682 PetscCall(PetscLayoutCreate(comm, &a->cmat->cmap)); 1683 PetscCall(PetscLayoutSetLocalSize(a->cmat->cmap, pcend - pcbegin)); 1684 PetscCall(PetscLayoutSetSize(a->cmat->cmap, cend - cbegin)); 1685 PetscCall(PetscLayoutSetUp(a->cmat->cmap)); 1686 PetscCall(VecDestroy(&c->lvec)); 1687 PetscCall(PetscSFDestroy(&c->Mvctx)); 1688 } 1689 c->sub_cbegin = cbegin; 1690 c->sub_cend = cend; 1691 } 1692 PetscCheck(!c->A, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1693 PetscCall(MatDenseGetSubMatrix(a->A, prbegin, prend, cbegin, cend, &c->A)); 1694 1695 a->cmat->preallocated = PETSC_TRUE; 1696 a->cmat->assembled = PETSC_TRUE; 1697 #if defined(PETSC_HAVE_DEVICE) 1698 a->cmat->offloadmask = c->A->offloadmask; 1699 #endif 1700 a->matinuse = cbegin + 1; 1701 *v = a->cmat; 1702 PetscFunctionReturn(PETSC_SUCCESS); 1703 } 1704 1705 static PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat A, Mat *v) 1706 { 1707 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1708 Mat_MPIDense *c; 1709 1710 PetscFunctionBegin; 1711 PetscCheck(a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetSubMatrix() first"); 1712 PetscCheck(a->cmat, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal matrix"); 1713 PetscCheck(*v == a->cmat, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Not the matrix obtained from MatDenseGetSubMatrix()"); 1714 a->matinuse = 0; 1715 c = (Mat_MPIDense *)a->cmat->data; 1716 PetscCall(MatDenseRestoreSubMatrix(a->A, &c->A)); 1717 if (v) *v = NULL; 1718 #if defined(PETSC_HAVE_DEVICE) 1719 A->offloadmask = a->A->offloadmask; 1720 #endif 1721 PetscFunctionReturn(PETSC_SUCCESS); 1722 } 1723 1724 /*MC 1725 MATMPIDENSE - MATMPIDENSE = "mpidense" - A matrix type to be used for distributed dense matrices. 1726 1727 Options Database Key: 1728 . -mat_type mpidense - sets the matrix type to `MATMPIDENSE` during a call to `MatSetFromOptions()` 1729 1730 Level: beginner 1731 1732 .seealso: [](ch_matrices), `Mat`, `MatCreateDense()`, `MATSEQDENSE`, `MATDENSE` 1733 M*/ 1734 PetscErrorCode MatCreate_MPIDense(Mat mat) 1735 { 1736 Mat_MPIDense *a; 1737 1738 PetscFunctionBegin; 1739 PetscCall(PetscNew(&a)); 1740 mat->data = (void *)a; 1741 mat->ops[0] = MatOps_Values; 1742 1743 mat->insertmode = NOT_SET_VALUES; 1744 1745 /* build cache for off array entries formed */ 1746 a->donotstash = PETSC_FALSE; 1747 1748 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)mat), 1, &mat->stash)); 1749 1750 /* stuff used for matrix vector multiply */ 1751 a->lvec = NULL; 1752 a->Mvctx = NULL; 1753 a->roworiented = PETSC_TRUE; 1754 1755 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetLDA_C", MatDenseGetLDA_MPIDense)); 1756 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseSetLDA_C", MatDenseSetLDA_MPIDense)); 1757 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArray_C", MatDenseGetArray_MPIDense)); 1758 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArray_C", MatDenseRestoreArray_MPIDense)); 1759 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayRead_C", MatDenseGetArrayRead_MPIDense)); 1760 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayRead_C", MatDenseRestoreArrayRead_MPIDense)); 1761 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayWrite_C", MatDenseGetArrayWrite_MPIDense)); 1762 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayWrite_C", MatDenseRestoreArrayWrite_MPIDense)); 1763 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDensePlaceArray_C", MatDensePlaceArray_MPIDense)); 1764 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseResetArray_C", MatDenseResetArray_MPIDense)); 1765 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseReplaceArray_C", MatDenseReplaceArray_MPIDense)); 1766 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", MatDenseGetColumnVec_MPIDense)); 1767 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", MatDenseRestoreColumnVec_MPIDense)); 1768 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", MatDenseGetColumnVecRead_MPIDense)); 1769 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", MatDenseRestoreColumnVecRead_MPIDense)); 1770 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", MatDenseGetColumnVecWrite_MPIDense)); 1771 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", MatDenseRestoreColumnVecWrite_MPIDense)); 1772 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetSubMatrix_C", MatDenseGetSubMatrix_MPIDense)); 1773 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreSubMatrix_C", MatDenseRestoreSubMatrix_MPIDense)); 1774 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", MatConvert_MPIAIJ_MPIDense)); 1775 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpiaij_C", MatConvert_MPIDense_MPIAIJ)); 1776 #if defined(PETSC_HAVE_ELEMENTAL) 1777 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_elemental_C", MatConvert_MPIDense_Elemental)); 1778 #endif 1779 #if defined(PETSC_HAVE_SCALAPACK) 1780 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_scalapack_C", MatConvert_Dense_ScaLAPACK)); 1781 #endif 1782 #if defined(PETSC_HAVE_CUDA) 1783 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensecuda_C", MatConvert_MPIDense_MPIDenseCUDA)); 1784 #endif 1785 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIDenseSetPreallocation_C", MatMPIDenseSetPreallocation_MPIDense)); 1786 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense)); 1787 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaij_C", MatProductSetFromOptions_MPIDense_MPIAIJ)); 1788 #if defined(PETSC_HAVE_CUDA) 1789 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense)); 1790 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijcusparse_C", MatProductSetFromOptions_MPIDense_MPIAIJ)); 1791 #endif 1792 #if defined(PETSC_HAVE_HIP) 1793 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensehip_C", MatConvert_MPIDense_MPIDenseHIP)); 1794 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense)); 1795 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijhipsparse_C", MatProductSetFromOptions_MPIDense_MPIAIJ)); 1796 #endif 1797 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumn_C", MatDenseGetColumn_MPIDense)); 1798 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumn_C", MatDenseRestoreColumn_MPIDense)); 1799 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultColumnRange_C", MatMultColumnRange_MPIDense)); 1800 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultAddColumnRange_C", MatMultAddColumnRange_MPIDense)); 1801 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultHermitianTransposeColumnRange_C", MatMultHermitianTransposeColumnRange_MPIDense)); 1802 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultHermitianTransposeAddColumnRange_C", MatMultHermitianTransposeAddColumnRange_MPIDense)); 1803 PetscCall(PetscObjectChangeTypeName((PetscObject)mat, MATMPIDENSE)); 1804 PetscFunctionReturn(PETSC_SUCCESS); 1805 } 1806 1807 /*MC 1808 MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices. 1809 1810 This matrix type is identical to `MATSEQDENSE` when constructed with a single process communicator, 1811 and `MATMPIDENSE` otherwise. 1812 1813 Options Database Key: 1814 . -mat_type dense - sets the matrix type to `MATDENSE` during a call to `MatSetFromOptions()` 1815 1816 Level: beginner 1817 1818 .seealso: [](ch_matrices), `Mat`, `MATSEQDENSE`, `MATMPIDENSE`, `MATDENSECUDA`, `MATDENSEHIP` 1819 M*/ 1820 1821 /*@ 1822 MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries 1823 1824 Collective 1825 1826 Input Parameters: 1827 + B - the matrix 1828 - data - optional location of matrix data. Set to `NULL` for PETSc 1829 to control all matrix memory allocation. 1830 1831 Level: intermediate 1832 1833 Notes: 1834 The dense format is fully compatible with standard Fortran 1835 storage by columns. 1836 1837 The data input variable is intended primarily for Fortran programmers 1838 who wish to allocate their own matrix memory space. Most users should 1839 set `data` to `NULL`. 1840 1841 .seealso: [](ch_matrices), `Mat`, `MATMPIDENSE`, `MatCreate()`, `MatCreateSeqDense()`, `MatSetValues()` 1842 @*/ 1843 PetscErrorCode MatMPIDenseSetPreallocation(Mat B, PetscScalar *data) 1844 { 1845 PetscFunctionBegin; 1846 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 1847 PetscTryMethod(B, "MatMPIDenseSetPreallocation_C", (Mat, PetscScalar *), (B, data)); 1848 PetscFunctionReturn(PETSC_SUCCESS); 1849 } 1850 1851 /*@ 1852 MatDensePlaceArray - Allows one to replace the array in a `MATDENSE` matrix with an 1853 array provided by the user. This is useful to avoid copying an array 1854 into a matrix 1855 1856 Not Collective 1857 1858 Input Parameters: 1859 + mat - the matrix 1860 - array - the array in column major order 1861 1862 Level: developer 1863 1864 Note: 1865 Adding `const` to `array` was an oversight, see notes in `VecPlaceArray()`. 1866 1867 You can return to the original array with a call to `MatDenseResetArray()`. The user is responsible for freeing this array; it will not be 1868 freed when the matrix is destroyed. 1869 1870 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseGetArray()`, `MatDenseResetArray()`, `VecPlaceArray()`, `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`, `VecResetArray()`, 1871 `MatDenseReplaceArray()` 1872 @*/ 1873 PetscErrorCode MatDensePlaceArray(Mat mat, const PetscScalar *array) 1874 { 1875 PetscFunctionBegin; 1876 PetscValidHeaderSpecific(mat, MAT_CLASSID, 1); 1877 PetscUseMethod(mat, "MatDensePlaceArray_C", (Mat, const PetscScalar *), (mat, array)); 1878 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 1879 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 1880 mat->offloadmask = PETSC_OFFLOAD_CPU; 1881 #endif 1882 PetscFunctionReturn(PETSC_SUCCESS); 1883 } 1884 1885 /*@ 1886 MatDenseResetArray - Resets the matrix array to that it previously had before the call to `MatDensePlaceArray()` 1887 1888 Not Collective 1889 1890 Input Parameter: 1891 . mat - the matrix 1892 1893 Level: developer 1894 1895 Note: 1896 You can only call this after a call to `MatDensePlaceArray()` 1897 1898 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseGetArray()`, `MatDensePlaceArray()`, `VecPlaceArray()`, `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`, `VecResetArray()` 1899 @*/ 1900 PetscErrorCode MatDenseResetArray(Mat mat) 1901 { 1902 PetscFunctionBegin; 1903 PetscValidHeaderSpecific(mat, MAT_CLASSID, 1); 1904 PetscUseMethod(mat, "MatDenseResetArray_C", (Mat), (mat)); 1905 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 1906 PetscFunctionReturn(PETSC_SUCCESS); 1907 } 1908 1909 /*@ 1910 MatDenseReplaceArray - Allows one to replace the array in a dense matrix with an 1911 array provided by the user. This is useful to avoid copying an array 1912 into a matrix 1913 1914 Not Collective 1915 1916 Input Parameters: 1917 + mat - the matrix 1918 - array - the array in column major order 1919 1920 Level: developer 1921 1922 Note: 1923 Adding `const` to `array` was an oversight, see notes in `VecPlaceArray()`. 1924 1925 The memory passed in MUST be obtained with `PetscMalloc()` and CANNOT be 1926 freed by the user. It will be freed when the matrix is destroyed. 1927 1928 .seealso: [](ch_matrices), `Mat`, `MatDensePlaceArray()`, `MatDenseGetArray()`, `VecReplaceArray()` 1929 @*/ 1930 PetscErrorCode MatDenseReplaceArray(Mat mat, const PetscScalar *array) 1931 { 1932 PetscFunctionBegin; 1933 PetscValidHeaderSpecific(mat, MAT_CLASSID, 1); 1934 PetscUseMethod(mat, "MatDenseReplaceArray_C", (Mat, const PetscScalar *), (mat, array)); 1935 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 1936 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 1937 mat->offloadmask = PETSC_OFFLOAD_CPU; 1938 #endif 1939 PetscFunctionReturn(PETSC_SUCCESS); 1940 } 1941 1942 /*@ 1943 MatCreateDense - Creates a matrix in `MATDENSE` format. 1944 1945 Collective 1946 1947 Input Parameters: 1948 + comm - MPI communicator 1949 . m - number of local rows (or `PETSC_DECIDE` to have calculated if `M` is given) 1950 . n - number of local columns (or `PETSC_DECIDE` to have calculated if `N` is given) 1951 . M - number of global rows (or `PETSC_DECIDE` to have calculated if `m` is given) 1952 . N - number of global columns (or `PETSC_DECIDE` to have calculated if `n` is given) 1953 - data - optional location of matrix data. Set data to `NULL` (`PETSC_NULL_SCALAR_ARRAY` for Fortran users) for PETSc 1954 to control all matrix memory allocation. 1955 1956 Output Parameter: 1957 . A - the matrix 1958 1959 Level: intermediate 1960 1961 Notes: 1962 The dense format is fully compatible with standard Fortran 1963 storage by columns. 1964 1965 Although local portions of the matrix are stored in column-major 1966 order, the matrix is partitioned across MPI ranks by row. 1967 1968 The data input variable is intended primarily for Fortran programmers 1969 who wish to allocate their own matrix memory space. Most users should 1970 set `data` to `NULL` (`PETSC_NULL_SCALAR_ARRAY` for Fortran users). 1971 1972 The user MUST specify either the local or global matrix dimensions 1973 (possibly both). 1974 1975 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatCreate()`, `MatCreateSeqDense()`, `MatSetValues()` 1976 @*/ 1977 PetscErrorCode MatCreateDense(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscScalar data[], Mat *A) 1978 { 1979 PetscFunctionBegin; 1980 PetscCall(MatCreate(comm, A)); 1981 PetscCall(MatSetSizes(*A, m, n, M, N)); 1982 PetscCall(MatSetType(*A, MATDENSE)); 1983 PetscCall(MatSeqDenseSetPreallocation(*A, data)); 1984 PetscCall(MatMPIDenseSetPreallocation(*A, data)); 1985 PetscFunctionReturn(PETSC_SUCCESS); 1986 } 1987 1988 static PetscErrorCode MatDuplicate_MPIDense(Mat A, MatDuplicateOption cpvalues, Mat *newmat) 1989 { 1990 Mat mat; 1991 Mat_MPIDense *a, *oldmat = (Mat_MPIDense *)A->data; 1992 1993 PetscFunctionBegin; 1994 *newmat = NULL; 1995 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat)); 1996 PetscCall(MatSetSizes(mat, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 1997 PetscCall(MatSetType(mat, ((PetscObject)A)->type_name)); 1998 a = (Mat_MPIDense *)mat->data; 1999 2000 mat->factortype = A->factortype; 2001 mat->assembled = PETSC_TRUE; 2002 mat->preallocated = PETSC_TRUE; 2003 2004 mat->insertmode = NOT_SET_VALUES; 2005 a->donotstash = oldmat->donotstash; 2006 2007 PetscCall(PetscLayoutReference(A->rmap, &mat->rmap)); 2008 PetscCall(PetscLayoutReference(A->cmap, &mat->cmap)); 2009 2010 PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A)); 2011 2012 *newmat = mat; 2013 PetscFunctionReturn(PETSC_SUCCESS); 2014 } 2015 2016 static PetscErrorCode MatLoad_MPIDense(Mat newMat, PetscViewer viewer) 2017 { 2018 PetscBool isbinary; 2019 #if defined(PETSC_HAVE_HDF5) 2020 PetscBool ishdf5; 2021 #endif 2022 2023 PetscFunctionBegin; 2024 PetscValidHeaderSpecific(newMat, MAT_CLASSID, 1); 2025 PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2); 2026 /* force binary viewer to load .info file if it has not yet done so */ 2027 PetscCall(PetscViewerSetUp(viewer)); 2028 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 2029 #if defined(PETSC_HAVE_HDF5) 2030 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5)); 2031 #endif 2032 if (isbinary) { 2033 PetscCall(MatLoad_Dense_Binary(newMat, viewer)); 2034 #if defined(PETSC_HAVE_HDF5) 2035 } else if (ishdf5) { 2036 PetscCall(MatLoad_Dense_HDF5(newMat, viewer)); 2037 #endif 2038 } else SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name); 2039 PetscFunctionReturn(PETSC_SUCCESS); 2040 } 2041 2042 static PetscErrorCode MatEqual_MPIDense(Mat A, Mat B, PetscBool *flag) 2043 { 2044 Mat_MPIDense *matB = (Mat_MPIDense *)B->data, *matA = (Mat_MPIDense *)A->data; 2045 Mat a, b; 2046 2047 PetscFunctionBegin; 2048 a = matA->A; 2049 b = matB->A; 2050 PetscCall(MatEqual(a, b, flag)); 2051 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A))); 2052 PetscFunctionReturn(PETSC_SUCCESS); 2053 } 2054 2055 static PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(void *data) 2056 { 2057 Mat_TransMatMultDense *atb = (Mat_TransMatMultDense *)data; 2058 2059 PetscFunctionBegin; 2060 PetscCall(PetscFree2(atb->sendbuf, atb->recvcounts)); 2061 PetscCall(MatDestroy(&atb->atb)); 2062 PetscCall(PetscFree(atb)); 2063 PetscFunctionReturn(PETSC_SUCCESS); 2064 } 2065 2066 static PetscErrorCode MatDestroy_MatMatTransMult_MPIDense_MPIDense(void *data) 2067 { 2068 Mat_MatTransMultDense *abt = (Mat_MatTransMultDense *)data; 2069 2070 PetscFunctionBegin; 2071 PetscCall(PetscFree2(abt->buf[0], abt->buf[1])); 2072 PetscCall(PetscFree2(abt->recvcounts, abt->recvdispls)); 2073 PetscCall(PetscFree(abt)); 2074 PetscFunctionReturn(PETSC_SUCCESS); 2075 } 2076 2077 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C) 2078 { 2079 Mat_MPIDense *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data; 2080 Mat_TransMatMultDense *atb; 2081 MPI_Comm comm; 2082 PetscMPIInt size, *recvcounts; 2083 PetscScalar *carray, *sendbuf; 2084 const PetscScalar *atbarray; 2085 PetscInt i, cN = C->cmap->N, proc, k, j, lda; 2086 const PetscInt *ranges; 2087 2088 PetscFunctionBegin; 2089 MatCheckProduct(C, 3); 2090 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 2091 atb = (Mat_TransMatMultDense *)C->product->data; 2092 recvcounts = atb->recvcounts; 2093 sendbuf = atb->sendbuf; 2094 2095 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 2096 PetscCallMPI(MPI_Comm_size(comm, &size)); 2097 2098 /* compute atbarray = aseq^T * bseq */ 2099 PetscCall(MatTransposeMatMult(a->A, b->A, atb->atb ? MAT_REUSE_MATRIX : MAT_INITIAL_MATRIX, PETSC_DETERMINE, &atb->atb)); 2100 2101 PetscCall(MatGetOwnershipRanges(C, &ranges)); 2102 2103 if (ranges[1] == C->rmap->N) { 2104 /* all of the values are being reduced to rank 0: optimize this case to use MPI_Reduce and GPU aware MPI if available */ 2105 PetscInt atb_lda, c_lda; 2106 Mat atb_local = atb->atb; 2107 Mat atb_alloc = NULL; 2108 Mat c_local = c->A; 2109 Mat c_alloc = NULL; 2110 PetscMemType atb_memtype, c_memtype; 2111 const PetscScalar *atb_array = NULL; 2112 MPI_Datatype vector_type; 2113 PetscScalar *c_array = NULL; 2114 PetscMPIInt rank; 2115 2116 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 2117 2118 PetscCall(MatDenseGetLDA(atb_local, &atb_lda)); 2119 if (atb_lda != C->rmap->N) { 2120 // copy atb to a matrix that will have lda == the number of rows 2121 PetscCall(MatDuplicate(atb_local, MAT_DO_NOT_COPY_VALUES, &atb_alloc)); 2122 PetscCall(MatCopy(atb_local, atb_alloc, DIFFERENT_NONZERO_PATTERN)); 2123 atb_local = atb_alloc; 2124 } 2125 2126 if (rank == 0) { 2127 PetscCall(MatDenseGetLDA(c_local, &c_lda)); 2128 if (c_lda != C->rmap->N) { 2129 // copy c to a matrix that will have lda == the number of rows 2130 PetscCall(MatDuplicate(c_local, MAT_DO_NOT_COPY_VALUES, &c_alloc)); 2131 c_local = c_alloc; 2132 } 2133 PetscCall(MatZeroEntries(c_local)); 2134 } 2135 /* atb_local and c_local have nrows = lda = A->cmap->N and ncols = 2136 * B->cmap->N: use the a->Mvctx to use the best reduction method */ 2137 if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A)); 2138 vector_type = MPIU_SCALAR; 2139 if (B->cmap->N > 1) { 2140 PetscMPIInt mpi_N; 2141 2142 PetscCall(PetscMPIIntCast(B->cmap->N, &mpi_N)); 2143 PetscCallMPI(MPI_Type_contiguous(mpi_N, MPIU_SCALAR, &vector_type)); 2144 PetscCallMPI(MPI_Type_commit(&vector_type)); 2145 } 2146 PetscCall(MatDenseGetArrayReadAndMemType(atb_local, &atb_array, &atb_memtype)); 2147 PetscCall(MatDenseGetArrayWriteAndMemType(c_local, &c_array, &c_memtype)); 2148 PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, vector_type, atb_memtype, atb_array, c_memtype, c_array, MPIU_SUM)); 2149 PetscCall(PetscSFReduceEnd(a->Mvctx, vector_type, atb_array, c_array, MPIU_SUM)); 2150 PetscCall(MatDenseRestoreArrayWriteAndMemType(c_local, &c_array)); 2151 PetscCall(MatDenseRestoreArrayReadAndMemType(atb_local, &atb_array)); 2152 if (rank == 0 && c_local != c->A) PetscCall(MatCopy(c_local, c->A, DIFFERENT_NONZERO_PATTERN)); 2153 if (B->cmap->N > 1) PetscCallMPI(MPI_Type_free(&vector_type)); 2154 PetscCall(MatDestroy(&atb_alloc)); 2155 PetscCall(MatDestroy(&c_alloc)); 2156 PetscCall(MatSetOption(C, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 2157 PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY)); 2158 PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY)); 2159 PetscFunctionReturn(PETSC_SUCCESS); 2160 } 2161 2162 /* arrange atbarray into sendbuf */ 2163 PetscCall(MatDenseGetArrayRead(atb->atb, &atbarray)); 2164 PetscCall(MatDenseGetLDA(atb->atb, &lda)); 2165 for (proc = 0, k = 0; proc < size; proc++) { 2166 for (j = 0; j < cN; j++) { 2167 for (i = ranges[proc]; i < ranges[proc + 1]; i++) sendbuf[k++] = atbarray[i + j * lda]; 2168 } 2169 } 2170 PetscCall(MatDenseRestoreArrayRead(atb->atb, &atbarray)); 2171 2172 /* sum all atbarray to local values of C */ 2173 PetscCall(MatDenseGetArrayWrite(c->A, &carray)); 2174 PetscCallMPI(MPI_Reduce_scatter(sendbuf, carray, recvcounts, MPIU_SCALAR, MPIU_SUM, comm)); 2175 PetscCall(MatDenseRestoreArrayWrite(c->A, &carray)); 2176 PetscCall(MatSetOption(C, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 2177 PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY)); 2178 PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY)); 2179 PetscFunctionReturn(PETSC_SUCCESS); 2180 } 2181 2182 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C) 2183 { 2184 MPI_Comm comm; 2185 PetscMPIInt size; 2186 PetscInt cm = A->cmap->n, cM, cN = B->cmap->N; 2187 Mat_TransMatMultDense *atb; 2188 PetscBool cisdense = PETSC_FALSE; 2189 const PetscInt *ranges; 2190 2191 PetscFunctionBegin; 2192 MatCheckProduct(C, 4); 2193 PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty"); 2194 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 2195 if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) { 2196 SETERRQ(comm, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend); 2197 } 2198 2199 /* create matrix product C */ 2200 PetscCall(MatSetSizes(C, cm, B->cmap->n, A->cmap->N, B->cmap->N)); 2201 #if defined(PETSC_HAVE_CUDA) 2202 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSECUDA, "")); 2203 #endif 2204 #if defined(PETSC_HAVE_HIP) 2205 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSEHIP, "")); 2206 #endif 2207 if (!cisdense) PetscCall(MatSetType(C, ((PetscObject)A)->type_name)); 2208 PetscCall(MatSetUp(C)); 2209 2210 /* create data structure for reuse C */ 2211 PetscCallMPI(MPI_Comm_size(comm, &size)); 2212 PetscCall(PetscNew(&atb)); 2213 cM = C->rmap->N; 2214 PetscCall(PetscMalloc2(cM * cN, &atb->sendbuf, size, &atb->recvcounts)); 2215 PetscCall(MatGetOwnershipRanges(C, &ranges)); 2216 for (PetscMPIInt i = 0; i < size; i++) PetscCall(PetscMPIIntCast((ranges[i + 1] - ranges[i]) * cN, &atb->recvcounts[i])); 2217 C->product->data = atb; 2218 C->product->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense; 2219 PetscFunctionReturn(PETSC_SUCCESS); 2220 } 2221 2222 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C) 2223 { 2224 MPI_Comm comm; 2225 PetscMPIInt i, size; 2226 PetscInt maxRows, bufsiz; 2227 PetscMPIInt tag; 2228 PetscInt alg; 2229 Mat_MatTransMultDense *abt; 2230 Mat_Product *product = C->product; 2231 PetscBool flg; 2232 2233 PetscFunctionBegin; 2234 MatCheckProduct(C, 4); 2235 PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty"); 2236 /* check local size of A and B */ 2237 PetscCheck(A->cmap->n == B->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local column dimensions are incompatible, A (%" PetscInt_FMT ") != B (%" PetscInt_FMT ")", A->cmap->n, B->cmap->n); 2238 2239 PetscCall(PetscStrcmp(product->alg, "allgatherv", &flg)); 2240 alg = flg ? 0 : 1; 2241 2242 /* setup matrix product C */ 2243 PetscCall(MatSetSizes(C, A->rmap->n, B->rmap->n, A->rmap->N, B->rmap->N)); 2244 PetscCall(MatSetType(C, MATMPIDENSE)); 2245 PetscCall(MatSetUp(C)); 2246 PetscCall(PetscObjectGetNewTag((PetscObject)C, &tag)); 2247 2248 /* create data structure for reuse C */ 2249 PetscCall(PetscObjectGetComm((PetscObject)C, &comm)); 2250 PetscCallMPI(MPI_Comm_size(comm, &size)); 2251 PetscCall(PetscNew(&abt)); 2252 abt->tag = tag; 2253 abt->alg = alg; 2254 switch (alg) { 2255 case 1: /* alg: "cyclic" */ 2256 for (maxRows = 0, i = 0; i < size; i++) maxRows = PetscMax(maxRows, B->rmap->range[i + 1] - B->rmap->range[i]); 2257 bufsiz = A->cmap->N * maxRows; 2258 PetscCall(PetscMalloc2(bufsiz, &abt->buf[0], bufsiz, &abt->buf[1])); 2259 break; 2260 default: /* alg: "allgatherv" */ 2261 PetscCall(PetscMalloc2(B->rmap->n * B->cmap->N, &abt->buf[0], B->rmap->N * B->cmap->N, &abt->buf[1])); 2262 PetscCall(PetscMalloc2(size, &abt->recvcounts, size + 1, &abt->recvdispls)); 2263 for (i = 0; i <= size; i++) PetscCall(PetscMPIIntCast(B->rmap->range[i] * A->cmap->N, &abt->recvdispls[i])); 2264 for (i = 0; i < size; i++) PetscCall(PetscMPIIntCast(abt->recvdispls[i + 1] - abt->recvdispls[i], &abt->recvcounts[i])); 2265 break; 2266 } 2267 2268 C->product->data = abt; 2269 C->product->destroy = MatDestroy_MatMatTransMult_MPIDense_MPIDense; 2270 C->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_MPIDense_MPIDense; 2271 PetscFunctionReturn(PETSC_SUCCESS); 2272 } 2273 2274 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(Mat A, Mat B, Mat C) 2275 { 2276 Mat_MPIDense *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data; 2277 Mat_MatTransMultDense *abt; 2278 MPI_Comm comm; 2279 PetscMPIInt rank, size, sendto, recvfrom, recvisfrom; 2280 PetscScalar *sendbuf, *recvbuf = NULL, *cv; 2281 PetscInt i, cK = A->cmap->N, sendsiz, recvsiz, k, j, bn; 2282 PetscScalar _DOne = 1.0, _DZero = 0.0; 2283 const PetscScalar *av, *bv; 2284 PetscBLASInt cm, cn, ck, alda, blda = 0, clda; 2285 MPI_Request reqs[2]; 2286 const PetscInt *ranges; 2287 2288 PetscFunctionBegin; 2289 MatCheckProduct(C, 3); 2290 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 2291 abt = (Mat_MatTransMultDense *)C->product->data; 2292 PetscCall(PetscObjectGetComm((PetscObject)C, &comm)); 2293 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 2294 PetscCallMPI(MPI_Comm_size(comm, &size)); 2295 PetscCall(MatDenseGetArrayRead(a->A, &av)); 2296 PetscCall(MatDenseGetArrayRead(b->A, &bv)); 2297 PetscCall(MatDenseGetArrayWrite(c->A, &cv)); 2298 PetscCall(MatDenseGetLDA(a->A, &i)); 2299 PetscCall(PetscBLASIntCast(i, &alda)); 2300 PetscCall(MatDenseGetLDA(b->A, &i)); 2301 PetscCall(PetscBLASIntCast(i, &blda)); 2302 PetscCall(MatDenseGetLDA(c->A, &i)); 2303 PetscCall(PetscBLASIntCast(i, &clda)); 2304 PetscCall(MatGetOwnershipRanges(B, &ranges)); 2305 bn = B->rmap->n; 2306 if (blda == bn) { 2307 sendbuf = (PetscScalar *)bv; 2308 } else { 2309 sendbuf = abt->buf[0]; 2310 for (k = 0, i = 0; i < cK; i++) { 2311 for (j = 0; j < bn; j++, k++) sendbuf[k] = bv[i * blda + j]; 2312 } 2313 } 2314 if (size > 1) { 2315 sendto = (rank + size - 1) % size; 2316 recvfrom = (rank + size + 1) % size; 2317 } else { 2318 sendto = recvfrom = 0; 2319 } 2320 PetscCall(PetscBLASIntCast(cK, &ck)); 2321 PetscCall(PetscBLASIntCast(c->A->rmap->n, &cm)); 2322 recvisfrom = rank; 2323 for (i = 0; i < size; i++) { 2324 /* we have finished receiving in sending, bufs can be read/modified */ 2325 PetscMPIInt nextrecvisfrom = (recvisfrom + 1) % size; /* which process the next recvbuf will originate on */ 2326 PetscInt nextbn = ranges[nextrecvisfrom + 1] - ranges[nextrecvisfrom]; 2327 2328 if (nextrecvisfrom != rank) { 2329 /* start the cyclic sends from sendbuf, to recvbuf (which will switch to sendbuf) */ 2330 sendsiz = cK * bn; 2331 recvsiz = cK * nextbn; 2332 recvbuf = (i & 1) ? abt->buf[0] : abt->buf[1]; 2333 PetscCallMPI(MPIU_Isend(sendbuf, sendsiz, MPIU_SCALAR, sendto, abt->tag, comm, &reqs[0])); 2334 PetscCallMPI(MPIU_Irecv(recvbuf, recvsiz, MPIU_SCALAR, recvfrom, abt->tag, comm, &reqs[1])); 2335 } 2336 2337 /* local aseq * sendbuf^T */ 2338 PetscCall(PetscBLASIntCast(ranges[recvisfrom + 1] - ranges[recvisfrom], &cn)); 2339 if (cm && cn && ck) PetscCallBLAS("BLASgemm", BLASgemm_("N", "T", &cm, &cn, &ck, &_DOne, av, &alda, sendbuf, &cn, &_DZero, cv + clda * ranges[recvisfrom], &clda)); 2340 2341 if (nextrecvisfrom != rank) { 2342 /* wait for the sends and receives to complete, swap sendbuf and recvbuf */ 2343 PetscCallMPI(MPI_Waitall(2, reqs, MPI_STATUSES_IGNORE)); 2344 } 2345 bn = nextbn; 2346 recvisfrom = nextrecvisfrom; 2347 sendbuf = recvbuf; 2348 } 2349 PetscCall(MatDenseRestoreArrayRead(a->A, &av)); 2350 PetscCall(MatDenseRestoreArrayRead(b->A, &bv)); 2351 PetscCall(MatDenseRestoreArrayWrite(c->A, &cv)); 2352 PetscCall(MatSetOption(C, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 2353 PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY)); 2354 PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY)); 2355 PetscFunctionReturn(PETSC_SUCCESS); 2356 } 2357 2358 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(Mat A, Mat B, Mat C) 2359 { 2360 Mat_MPIDense *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data; 2361 Mat_MatTransMultDense *abt; 2362 MPI_Comm comm; 2363 PetscMPIInt size, ibn; 2364 PetscScalar *cv, *sendbuf, *recvbuf; 2365 const PetscScalar *av, *bv; 2366 PetscInt blda, i, cK = A->cmap->N, k, j, bn; 2367 PetscScalar _DOne = 1.0, _DZero = 0.0; 2368 PetscBLASInt cm, cn, ck, alda, clda; 2369 2370 PetscFunctionBegin; 2371 MatCheckProduct(C, 3); 2372 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 2373 abt = (Mat_MatTransMultDense *)C->product->data; 2374 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 2375 PetscCallMPI(MPI_Comm_size(comm, &size)); 2376 PetscCall(MatDenseGetArrayRead(a->A, &av)); 2377 PetscCall(MatDenseGetArrayRead(b->A, &bv)); 2378 PetscCall(MatDenseGetArrayWrite(c->A, &cv)); 2379 PetscCall(MatDenseGetLDA(a->A, &i)); 2380 PetscCall(PetscBLASIntCast(i, &alda)); 2381 PetscCall(MatDenseGetLDA(b->A, &blda)); 2382 PetscCall(MatDenseGetLDA(c->A, &i)); 2383 PetscCall(PetscBLASIntCast(i, &clda)); 2384 /* copy transpose of B into buf[0] */ 2385 bn = B->rmap->n; 2386 sendbuf = abt->buf[0]; 2387 recvbuf = abt->buf[1]; 2388 for (k = 0, j = 0; j < bn; j++) { 2389 for (i = 0; i < cK; i++, k++) sendbuf[k] = bv[i * blda + j]; 2390 } 2391 PetscCall(MatDenseRestoreArrayRead(b->A, &bv)); 2392 PetscCall(PetscMPIIntCast(bn * cK, &ibn)); 2393 PetscCallMPI(MPI_Allgatherv(sendbuf, ibn, MPIU_SCALAR, recvbuf, abt->recvcounts, abt->recvdispls, MPIU_SCALAR, comm)); 2394 PetscCall(PetscBLASIntCast(cK, &ck)); 2395 PetscCall(PetscBLASIntCast(c->A->rmap->n, &cm)); 2396 PetscCall(PetscBLASIntCast(c->A->cmap->n, &cn)); 2397 if (cm && cn && ck) PetscCallBLAS("BLASgemm", BLASgemm_("N", "N", &cm, &cn, &ck, &_DOne, av, &alda, recvbuf, &ck, &_DZero, cv, &clda)); 2398 PetscCall(MatDenseRestoreArrayRead(a->A, &av)); 2399 PetscCall(MatDenseRestoreArrayRead(b->A, &bv)); 2400 PetscCall(MatDenseRestoreArrayWrite(c->A, &cv)); 2401 PetscCall(MatSetOption(C, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 2402 PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY)); 2403 PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY)); 2404 PetscFunctionReturn(PETSC_SUCCESS); 2405 } 2406 2407 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C) 2408 { 2409 Mat_MatTransMultDense *abt; 2410 2411 PetscFunctionBegin; 2412 MatCheckProduct(C, 3); 2413 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 2414 abt = (Mat_MatTransMultDense *)C->product->data; 2415 switch (abt->alg) { 2416 case 1: 2417 PetscCall(MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(A, B, C)); 2418 break; 2419 default: 2420 PetscCall(MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(A, B, C)); 2421 break; 2422 } 2423 PetscFunctionReturn(PETSC_SUCCESS); 2424 } 2425 2426 static PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(void *data) 2427 { 2428 Mat_MatMultDense *ab = (Mat_MatMultDense *)data; 2429 2430 PetscFunctionBegin; 2431 PetscCall(MatDestroy(&ab->Ce)); 2432 PetscCall(MatDestroy(&ab->Ae)); 2433 PetscCall(MatDestroy(&ab->Be)); 2434 PetscCall(PetscFree(ab)); 2435 PetscFunctionReturn(PETSC_SUCCESS); 2436 } 2437 2438 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C) 2439 { 2440 Mat_MatMultDense *ab; 2441 Mat_MPIDense *mdn = (Mat_MPIDense *)A->data; 2442 Mat_MPIDense *b = (Mat_MPIDense *)B->data; 2443 2444 PetscFunctionBegin; 2445 MatCheckProduct(C, 3); 2446 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Missing product data"); 2447 ab = (Mat_MatMultDense *)C->product->data; 2448 if (ab->Ae && ab->Ce) { 2449 #if PetscDefined(HAVE_ELEMENTAL) 2450 PetscCall(MatConvert_MPIDense_Elemental(A, MATELEMENTAL, MAT_REUSE_MATRIX, &ab->Ae)); 2451 PetscCall(MatConvert_MPIDense_Elemental(B, MATELEMENTAL, MAT_REUSE_MATRIX, &ab->Be)); 2452 PetscCall(MatMatMultNumeric_Elemental(ab->Ae, ab->Be, ab->Ce)); 2453 PetscCall(MatConvert(ab->Ce, MATMPIDENSE, MAT_REUSE_MATRIX, &C)); 2454 #else 2455 SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "PETSC_HAVE_ELEMENTAL not defined"); 2456 #endif 2457 } else { 2458 MPI_Comm comm; 2459 const PetscScalar *read; 2460 PetscScalar *write; 2461 PetscInt lda; 2462 const PetscInt *ranges; 2463 PetscMPIInt size; 2464 2465 if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A)); /* cannot be done during the symbolic phase because of possible calls to MatProductReplaceMats() */ 2466 comm = PetscObjectComm((PetscObject)B); 2467 PetscCallMPI(MPI_Comm_size(comm, &size)); 2468 PetscCall(PetscLayoutGetRanges(B->rmap, &ranges)); 2469 if (ranges[1] == ranges[size]) { 2470 // optimize for the case where the B matrix is broadcast from rank 0 2471 PetscInt b_lda, be_lda; 2472 Mat b_local = b->A; 2473 Mat b_alloc = NULL; 2474 Mat be_local = ab->Be; 2475 Mat be_alloc = NULL; 2476 PetscMemType b_memtype, be_memtype; 2477 const PetscScalar *b_array = NULL; 2478 MPI_Datatype vector_type; 2479 PetscScalar *be_array = NULL; 2480 PetscMPIInt rank; 2481 2482 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 2483 PetscCall(MatDenseGetLDA(be_local, &be_lda)); 2484 if (be_lda != B->rmap->N) { 2485 PetscCall(MatDuplicate(be_local, MAT_DO_NOT_COPY_VALUES, &be_alloc)); 2486 be_local = be_alloc; 2487 } 2488 2489 if (rank == 0) { 2490 PetscCall(MatDenseGetLDA(b_local, &b_lda)); 2491 if (b_lda != B->rmap->N) { 2492 PetscCall(MatDuplicate(b_local, MAT_DO_NOT_COPY_VALUES, &b_alloc)); 2493 PetscCall(MatCopy(b_local, b_alloc, DIFFERENT_NONZERO_PATTERN)); 2494 b_local = b_alloc; 2495 } 2496 } 2497 vector_type = MPIU_SCALAR; 2498 if (B->cmap->N > 1) { 2499 PetscMPIInt mpi_N; 2500 2501 PetscCall(PetscMPIIntCast(B->cmap->N, &mpi_N)); 2502 PetscCallMPI(MPI_Type_contiguous(mpi_N, MPIU_SCALAR, &vector_type)); 2503 PetscCallMPI(MPI_Type_commit(&vector_type)); 2504 } 2505 PetscCall(MatDenseGetArrayReadAndMemType(b_local, &b_array, &b_memtype)); 2506 PetscCall(MatDenseGetArrayWriteAndMemType(be_local, &be_array, &be_memtype)); 2507 PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, vector_type, b_memtype, b_array, be_memtype, be_array, MPI_REPLACE)); 2508 PetscCall(PetscSFBcastEnd(mdn->Mvctx, vector_type, b_array, be_array, MPI_REPLACE)); 2509 PetscCall(MatDenseRestoreArrayWriteAndMemType(be_local, &be_array)); 2510 PetscCall(MatDenseRestoreArrayReadAndMemType(b_local, &b_array)); 2511 if (be_local != ab->Be) PetscCall(MatCopy(be_local, ab->Be, DIFFERENT_NONZERO_PATTERN)); 2512 if (B->cmap->N > 1) PetscCallMPI(MPI_Type_free(&vector_type)); 2513 PetscCall(MatDestroy(&be_alloc)); 2514 PetscCall(MatDestroy(&b_alloc)); 2515 } else { 2516 PetscCall(MatDenseGetLDA(B, &lda)); 2517 PetscCall(MatDenseGetArrayRead(B, &read)); 2518 PetscCall(MatDenseGetArrayWrite(ab->Be, &write)); 2519 for (PetscInt i = 0; i < C->cmap->N; ++i) { 2520 PetscCall(PetscSFBcastBegin(mdn->Mvctx, MPIU_SCALAR, read + i * lda, write + i * ab->Be->rmap->n, MPI_REPLACE)); 2521 PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, read + i * lda, write + i * ab->Be->rmap->n, MPI_REPLACE)); 2522 } 2523 PetscCall(MatDenseRestoreArrayWrite(ab->Be, &write)); 2524 PetscCall(MatDenseRestoreArrayRead(B, &read)); 2525 } 2526 PetscCall(MatMatMultNumeric_SeqDense_SeqDense(((Mat_MPIDense *)A->data)->A, ab->Be, ((Mat_MPIDense *)C->data)->A)); 2527 } 2528 PetscFunctionReturn(PETSC_SUCCESS); 2529 } 2530 2531 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C) 2532 { 2533 Mat_Product *product = C->product; 2534 PetscInt alg; 2535 Mat_MatMultDense *ab; 2536 PetscBool flg; 2537 2538 PetscFunctionBegin; 2539 MatCheckProduct(C, 4); 2540 PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty"); 2541 /* check local size of A and B */ 2542 PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ", %" PetscInt_FMT ")", 2543 A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend); 2544 2545 PetscCall(PetscStrcmp(product->alg, "petsc", &flg)); 2546 alg = flg ? 0 : 1; 2547 2548 /* setup C */ 2549 PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, A->rmap->N, B->cmap->N)); 2550 PetscCall(MatSetType(C, MATMPIDENSE)); 2551 PetscCall(MatSetUp(C)); 2552 2553 /* create data structure for reuse Cdense */ 2554 PetscCall(PetscNew(&ab)); 2555 2556 switch (alg) { 2557 case 1: /* alg: "elemental" */ 2558 #if PetscDefined(HAVE_ELEMENTAL) 2559 /* create elemental matrices Ae and Be */ 2560 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &ab->Ae)); 2561 PetscCall(MatSetSizes(ab->Ae, PETSC_DECIDE, PETSC_DECIDE, A->rmap->N, A->cmap->N)); 2562 PetscCall(MatSetType(ab->Ae, MATELEMENTAL)); 2563 PetscCall(MatSetUp(ab->Ae)); 2564 PetscCall(MatSetOption(ab->Ae, MAT_ROW_ORIENTED, PETSC_FALSE)); 2565 2566 PetscCall(MatCreate(PetscObjectComm((PetscObject)B), &ab->Be)); 2567 PetscCall(MatSetSizes(ab->Be, PETSC_DECIDE, PETSC_DECIDE, B->rmap->N, B->cmap->N)); 2568 PetscCall(MatSetType(ab->Be, MATELEMENTAL)); 2569 PetscCall(MatSetUp(ab->Be)); 2570 PetscCall(MatSetOption(ab->Be, MAT_ROW_ORIENTED, PETSC_FALSE)); 2571 2572 /* compute symbolic Ce = Ae*Be */ 2573 PetscCall(MatCreate(PetscObjectComm((PetscObject)C), &ab->Ce)); 2574 PetscCall(MatMatMultSymbolic_Elemental(ab->Ae, ab->Be, fill, ab->Ce)); 2575 #else 2576 SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "PETSC_HAVE_ELEMENTAL not defined"); 2577 #endif 2578 break; 2579 default: /* alg: "petsc" */ 2580 ab->Ae = NULL; 2581 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, A->cmap->N, B->cmap->N, NULL, &ab->Be)); 2582 ab->Ce = NULL; 2583 break; 2584 } 2585 2586 C->product->data = ab; 2587 C->product->destroy = MatDestroy_MatMatMult_MPIDense_MPIDense; 2588 C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense; 2589 PetscFunctionReturn(PETSC_SUCCESS); 2590 } 2591 2592 static PetscErrorCode MatProductSetFromOptions_MPIDense_AB(Mat C) 2593 { 2594 Mat_Product *product = C->product; 2595 const char *algTypes[2] = {"petsc", "elemental"}; 2596 PetscInt alg, nalg = PetscDefined(HAVE_ELEMENTAL) ? 2 : 1; 2597 PetscBool flg = PETSC_FALSE; 2598 2599 PetscFunctionBegin; 2600 /* Set default algorithm */ 2601 alg = 0; /* default is PETSc */ 2602 PetscCall(PetscStrcmp(product->alg, "default", &flg)); 2603 if (flg) PetscCall(MatProductSetAlgorithm(C, algTypes[alg])); 2604 2605 /* Get runtime option */ 2606 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_AB", "Mat"); 2607 PetscCall(PetscOptionsEList("-mat_product_algorithm", "Algorithmic approach", "MatProduct_AB", algTypes, nalg, algTypes[alg], &alg, &flg)); 2608 PetscOptionsEnd(); 2609 if (flg) PetscCall(MatProductSetAlgorithm(C, algTypes[alg])); 2610 2611 C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIDense; 2612 C->ops->productsymbolic = MatProductSymbolic_AB; 2613 PetscFunctionReturn(PETSC_SUCCESS); 2614 } 2615 2616 static PetscErrorCode MatProductSetFromOptions_MPIDense_AtB(Mat C) 2617 { 2618 Mat_Product *product = C->product; 2619 Mat A = product->A, B = product->B; 2620 2621 PetscFunctionBegin; 2622 PetscCheck(A->rmap->rstart == B->rmap->rstart && A->rmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", 2623 A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend); 2624 C->ops->transposematmultsymbolic = MatTransposeMatMultSymbolic_MPIDense_MPIDense; 2625 C->ops->productsymbolic = MatProductSymbolic_AtB; 2626 PetscFunctionReturn(PETSC_SUCCESS); 2627 } 2628 2629 static PetscErrorCode MatProductSetFromOptions_MPIDense_ABt(Mat C) 2630 { 2631 Mat_Product *product = C->product; 2632 const char *algTypes[2] = {"allgatherv", "cyclic"}; 2633 PetscInt alg, nalg = 2; 2634 PetscBool flg = PETSC_FALSE; 2635 2636 PetscFunctionBegin; 2637 /* Set default algorithm */ 2638 alg = 0; /* default is allgatherv */ 2639 PetscCall(PetscStrcmp(product->alg, "default", &flg)); 2640 if (flg) PetscCall(MatProductSetAlgorithm(C, algTypes[alg])); 2641 2642 /* Get runtime option */ 2643 if (product->api_user) { 2644 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatMatTransposeMult", "Mat"); 2645 PetscCall(PetscOptionsEList("-matmattransmult_mpidense_mpidense_via", "Algorithmic approach", "MatMatTransposeMult", algTypes, nalg, algTypes[alg], &alg, &flg)); 2646 PetscOptionsEnd(); 2647 } else { 2648 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_ABt", "Mat"); 2649 PetscCall(PetscOptionsEList("-mat_product_algorithm", "Algorithmic approach", "MatProduct_ABt", algTypes, nalg, algTypes[alg], &alg, &flg)); 2650 PetscOptionsEnd(); 2651 } 2652 if (flg) PetscCall(MatProductSetAlgorithm(C, algTypes[alg])); 2653 2654 C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_MPIDense_MPIDense; 2655 C->ops->productsymbolic = MatProductSymbolic_ABt; 2656 PetscFunctionReturn(PETSC_SUCCESS); 2657 } 2658 2659 static PetscErrorCode MatProductSetFromOptions_MPIDense(Mat C) 2660 { 2661 Mat_Product *product = C->product; 2662 2663 PetscFunctionBegin; 2664 switch (product->type) { 2665 case MATPRODUCT_AB: 2666 PetscCall(MatProductSetFromOptions_MPIDense_AB(C)); 2667 break; 2668 case MATPRODUCT_AtB: 2669 PetscCall(MatProductSetFromOptions_MPIDense_AtB(C)); 2670 break; 2671 case MATPRODUCT_ABt: 2672 PetscCall(MatProductSetFromOptions_MPIDense_ABt(C)); 2673 break; 2674 default: 2675 break; 2676 } 2677 PetscFunctionReturn(PETSC_SUCCESS); 2678 } 2679