1 /* 2 Basic functions for basic parallel dense matrices. 3 Portions of this code are under: 4 Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. 5 */ 6 7 #include <../src/mat/impls/dense/mpi/mpidense.h> /*I "petscmat.h" I*/ 8 #include <../src/mat/impls/aij/mpi/mpiaij.h> 9 #include <petscblaslapack.h> 10 #include <petsc/private/vecimpl.h> 11 #include <petscdevice.h> 12 #include <petsc/private/deviceimpl.h> 13 14 /*@ 15 MatDenseGetLocalMatrix - For a `MATMPIDENSE` or `MATSEQDENSE` matrix returns the sequential 16 matrix that represents the operator. For sequential matrices it returns itself. 17 18 Input Parameter: 19 . A - the sequential or MPI `MATDENSE` matrix 20 21 Output Parameter: 22 . B - the inner matrix 23 24 Level: intermediate 25 26 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MATMPIDENSE`, `MATSEQDENSE` 27 @*/ 28 PetscErrorCode MatDenseGetLocalMatrix(Mat A, Mat *B) 29 { 30 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 31 PetscBool flg; 32 33 PetscFunctionBegin; 34 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 35 PetscAssertPointer(B, 2); 36 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIDENSE, &flg)); 37 if (flg) *B = mat->A; 38 else { 39 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQDENSE, &flg)); 40 PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Not for matrix type %s", ((PetscObject)A)->type_name); 41 *B = A; 42 } 43 PetscFunctionReturn(PETSC_SUCCESS); 44 } 45 46 static PetscErrorCode MatCopy_MPIDense(Mat A, Mat B, MatStructure s) 47 { 48 Mat_MPIDense *Amat = (Mat_MPIDense *)A->data; 49 Mat_MPIDense *Bmat = (Mat_MPIDense *)B->data; 50 51 PetscFunctionBegin; 52 PetscCall(MatCopy(Amat->A, Bmat->A, s)); 53 PetscFunctionReturn(PETSC_SUCCESS); 54 } 55 56 PetscErrorCode MatShift_MPIDense(Mat A, PetscScalar alpha) 57 { 58 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 59 PetscInt j, lda, rstart = A->rmap->rstart, rend = A->rmap->rend, rend2; 60 PetscScalar *v; 61 62 PetscFunctionBegin; 63 PetscCall(MatDenseGetArray(mat->A, &v)); 64 PetscCall(MatDenseGetLDA(mat->A, &lda)); 65 rend2 = PetscMin(rend, A->cmap->N); 66 if (rend2 > rstart) { 67 for (j = rstart; j < rend2; j++) v[j - rstart + j * lda] += alpha; 68 PetscCall(PetscLogFlops(rend2 - rstart)); 69 } 70 PetscCall(MatDenseRestoreArray(mat->A, &v)); 71 PetscFunctionReturn(PETSC_SUCCESS); 72 } 73 74 static PetscErrorCode MatGetRow_MPIDense(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 75 { 76 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 77 PetscInt lrow, rstart = A->rmap->rstart, rend = A->rmap->rend; 78 79 PetscFunctionBegin; 80 PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "only local rows"); 81 lrow = row - rstart; 82 PetscCall(MatGetRow(mat->A, lrow, nz, (const PetscInt **)idx, (const PetscScalar **)v)); 83 PetscFunctionReturn(PETSC_SUCCESS); 84 } 85 86 static PetscErrorCode MatRestoreRow_MPIDense(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 87 { 88 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 89 PetscInt lrow, rstart = A->rmap->rstart, rend = A->rmap->rend; 90 91 PetscFunctionBegin; 92 PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "only local rows"); 93 lrow = row - rstart; 94 PetscCall(MatRestoreRow(mat->A, lrow, nz, (const PetscInt **)idx, (const PetscScalar **)v)); 95 PetscFunctionReturn(PETSC_SUCCESS); 96 } 97 98 static PetscErrorCode MatGetDiagonalBlock_MPIDense(Mat A, Mat *a) 99 { 100 Mat_MPIDense *mdn = (Mat_MPIDense *)A->data; 101 PetscInt m = A->rmap->n, rstart = A->rmap->rstart; 102 PetscScalar *array; 103 MPI_Comm comm; 104 PetscBool flg; 105 Mat B; 106 107 PetscFunctionBegin; 108 PetscCall(MatHasCongruentLayouts(A, &flg)); 109 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only square matrices supported."); 110 PetscCall(PetscObjectQuery((PetscObject)A, "DiagonalBlock", (PetscObject *)&B)); 111 if (!B) { /* This should use MatDenseGetSubMatrix (not create), but we would need a call like MatRestoreDiagonalBlock */ 112 #if PetscDefined(HAVE_CUDA) 113 PetscCall(PetscObjectTypeCompare((PetscObject)mdn->A, MATSEQDENSECUDA, &flg)); 114 PetscCheck(!flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature", MATSEQDENSECUDA); 115 #elif PetscDefined(HAVE_HIP) 116 PetscCall(PetscObjectTypeCompare((PetscObject)mdn->A, MATSEQDENSEHIP, &flg)); 117 PetscCheck(!flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature", MATSEQDENSEHIP); 118 #endif 119 PetscCall(PetscObjectGetComm((PetscObject)mdn->A, &comm)); 120 PetscCall(MatCreate(comm, &B)); 121 PetscCall(MatSetSizes(B, m, m, m, m)); 122 PetscCall(MatSetType(B, ((PetscObject)mdn->A)->type_name)); 123 PetscCall(MatDenseGetArrayRead(mdn->A, (const PetscScalar **)&array)); 124 PetscCall(MatSeqDenseSetPreallocation(B, array + m * rstart)); 125 PetscCall(MatDenseRestoreArrayRead(mdn->A, (const PetscScalar **)&array)); 126 PetscCall(PetscObjectCompose((PetscObject)A, "DiagonalBlock", (PetscObject)B)); 127 *a = B; 128 PetscCall(MatDestroy(&B)); 129 } else *a = B; 130 PetscFunctionReturn(PETSC_SUCCESS); 131 } 132 133 static PetscErrorCode MatSetValues_MPIDense(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], const PetscScalar v[], InsertMode addv) 134 { 135 Mat_MPIDense *A = (Mat_MPIDense *)mat->data; 136 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend, row; 137 PetscBool roworiented = A->roworiented; 138 139 PetscFunctionBegin; 140 for (i = 0; i < m; i++) { 141 if (idxm[i] < 0) continue; 142 PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large"); 143 if (idxm[i] >= rstart && idxm[i] < rend) { 144 row = idxm[i] - rstart; 145 if (roworiented) { 146 PetscCall(MatSetValues(A->A, 1, &row, n, idxn, PetscSafePointerPlusOffset(v, i * n), addv)); 147 } else { 148 for (j = 0; j < n; j++) { 149 if (idxn[j] < 0) continue; 150 PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large"); 151 PetscCall(MatSetValues(A->A, 1, &row, 1, &idxn[j], PetscSafePointerPlusOffset(v, i + j * m), addv)); 152 } 153 } 154 } else if (!A->donotstash) { 155 mat->assembled = PETSC_FALSE; 156 if (roworiented) { 157 PetscCall(MatStashValuesRow_Private(&mat->stash, idxm[i], n, idxn, PetscSafePointerPlusOffset(v, i * n), PETSC_FALSE)); 158 } else { 159 PetscCall(MatStashValuesCol_Private(&mat->stash, idxm[i], n, idxn, PetscSafePointerPlusOffset(v, i), m, PETSC_FALSE)); 160 } 161 } 162 } 163 PetscFunctionReturn(PETSC_SUCCESS); 164 } 165 166 static PetscErrorCode MatGetValues_MPIDense(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[]) 167 { 168 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 169 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend, row; 170 171 PetscFunctionBegin; 172 for (i = 0; i < m; i++) { 173 if (idxm[i] < 0) continue; /* negative row */ 174 PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large"); 175 if (idxm[i] >= rstart && idxm[i] < rend) { 176 row = idxm[i] - rstart; 177 for (j = 0; j < n; j++) { 178 if (idxn[j] < 0) continue; /* negative column */ 179 PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large"); 180 PetscCall(MatGetValues(mdn->A, 1, &row, 1, &idxn[j], v + i * n + j)); 181 } 182 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported"); 183 } 184 PetscFunctionReturn(PETSC_SUCCESS); 185 } 186 187 static PetscErrorCode MatDenseGetLDA_MPIDense(Mat A, PetscInt *lda) 188 { 189 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 190 191 PetscFunctionBegin; 192 PetscCall(MatDenseGetLDA(a->A, lda)); 193 PetscFunctionReturn(PETSC_SUCCESS); 194 } 195 196 static PetscErrorCode MatDenseSetLDA_MPIDense(Mat A, PetscInt lda) 197 { 198 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 199 MatType mtype = MATSEQDENSE; 200 201 PetscFunctionBegin; 202 if (!a->A) { 203 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 204 PetscCall(PetscLayoutSetUp(A->rmap)); 205 PetscCall(PetscLayoutSetUp(A->cmap)); 206 PetscCall(MatCreate(PETSC_COMM_SELF, &a->A)); 207 PetscCall(MatSetSizes(a->A, A->rmap->n, A->cmap->N, A->rmap->n, A->cmap->N)); 208 #if PetscDefined(HAVE_CUDA) 209 PetscBool iscuda; 210 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIDENSECUDA, &iscuda)); 211 if (iscuda) mtype = MATSEQDENSECUDA; 212 #elif PetscDefined(HAVE_HIP) 213 PetscBool iship; 214 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIDENSEHIP, &iship)); 215 if (iship) mtype = MATSEQDENSEHIP; 216 #endif 217 PetscCall(MatSetType(a->A, mtype)); 218 } 219 PetscCall(MatDenseSetLDA(a->A, lda)); 220 PetscFunctionReturn(PETSC_SUCCESS); 221 } 222 223 static PetscErrorCode MatDenseGetArray_MPIDense(Mat A, PetscScalar **array) 224 { 225 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 226 227 PetscFunctionBegin; 228 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 229 PetscCall(MatDenseGetArray(a->A, array)); 230 PetscFunctionReturn(PETSC_SUCCESS); 231 } 232 233 static PetscErrorCode MatDenseGetArrayRead_MPIDense(Mat A, PetscScalar **array) 234 { 235 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 236 237 PetscFunctionBegin; 238 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 239 PetscCall(MatDenseGetArrayRead(a->A, (const PetscScalar **)array)); 240 PetscFunctionReturn(PETSC_SUCCESS); 241 } 242 243 static PetscErrorCode MatDenseGetArrayWrite_MPIDense(Mat A, PetscScalar **array) 244 { 245 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 246 247 PetscFunctionBegin; 248 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 249 PetscCall(MatDenseGetArrayWrite(a->A, array)); 250 PetscFunctionReturn(PETSC_SUCCESS); 251 } 252 253 static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A, const PetscScalar *array) 254 { 255 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 256 257 PetscFunctionBegin; 258 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 259 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 260 PetscCall(MatDensePlaceArray(a->A, array)); 261 PetscFunctionReturn(PETSC_SUCCESS); 262 } 263 264 static PetscErrorCode MatDenseResetArray_MPIDense(Mat A) 265 { 266 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 267 268 PetscFunctionBegin; 269 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 270 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 271 PetscCall(MatDenseResetArray(a->A)); 272 PetscFunctionReturn(PETSC_SUCCESS); 273 } 274 275 static PetscErrorCode MatDenseReplaceArray_MPIDense(Mat A, const PetscScalar *array) 276 { 277 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 278 279 PetscFunctionBegin; 280 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 281 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 282 PetscCall(MatDenseReplaceArray(a->A, array)); 283 PetscFunctionReturn(PETSC_SUCCESS); 284 } 285 286 static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A, IS isrow, IS iscol, MatReuse scall, Mat *B) 287 { 288 Mat_MPIDense *mat = (Mat_MPIDense *)A->data, *newmatd; 289 PetscInt lda, i, j, rstart, rend, nrows, ncols, Ncols, nlrows, nlcols; 290 const PetscInt *irow, *icol; 291 const PetscScalar *v; 292 PetscScalar *bv; 293 Mat newmat; 294 IS iscol_local; 295 MPI_Comm comm_is, comm_mat; 296 297 PetscFunctionBegin; 298 PetscCall(PetscObjectGetComm((PetscObject)A, &comm_mat)); 299 PetscCall(PetscObjectGetComm((PetscObject)iscol, &comm_is)); 300 PetscCheck(comm_mat == comm_is, PETSC_COMM_SELF, PETSC_ERR_ARG_NOTSAMECOMM, "IS communicator must match matrix communicator"); 301 302 PetscCall(ISAllGather(iscol, &iscol_local)); 303 PetscCall(ISGetIndices(isrow, &irow)); 304 PetscCall(ISGetIndices(iscol_local, &icol)); 305 PetscCall(ISGetLocalSize(isrow, &nrows)); 306 PetscCall(ISGetLocalSize(iscol, &ncols)); 307 PetscCall(ISGetSize(iscol, &Ncols)); /* global number of columns, size of iscol_local */ 308 309 /* No parallel redistribution currently supported! Should really check each index set 310 to confirm that it is OK. ... Currently supports only submatrix same partitioning as 311 original matrix! */ 312 313 PetscCall(MatGetLocalSize(A, &nlrows, &nlcols)); 314 PetscCall(MatGetOwnershipRange(A, &rstart, &rend)); 315 316 /* Check submatrix call */ 317 if (scall == MAT_REUSE_MATRIX) { 318 /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */ 319 /* Really need to test rows and column sizes! */ 320 newmat = *B; 321 } else { 322 /* Create and fill new matrix */ 323 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &newmat)); 324 PetscCall(MatSetSizes(newmat, nrows, ncols, PETSC_DECIDE, Ncols)); 325 PetscCall(MatSetType(newmat, ((PetscObject)A)->type_name)); 326 PetscCall(MatMPIDenseSetPreallocation(newmat, NULL)); 327 } 328 329 /* Now extract the data pointers and do the copy, column at a time */ 330 newmatd = (Mat_MPIDense *)newmat->data; 331 PetscCall(MatDenseGetArray(newmatd->A, &bv)); 332 PetscCall(MatDenseGetArrayRead(mat->A, &v)); 333 PetscCall(MatDenseGetLDA(mat->A, &lda)); 334 for (i = 0; i < Ncols; i++) { 335 const PetscScalar *av = v + lda * icol[i]; 336 for (j = 0; j < nrows; j++) *bv++ = av[irow[j] - rstart]; 337 } 338 PetscCall(MatDenseRestoreArrayRead(mat->A, &v)); 339 PetscCall(MatDenseRestoreArray(newmatd->A, &bv)); 340 341 /* Assemble the matrices so that the correct flags are set */ 342 PetscCall(MatAssemblyBegin(newmat, MAT_FINAL_ASSEMBLY)); 343 PetscCall(MatAssemblyEnd(newmat, MAT_FINAL_ASSEMBLY)); 344 345 /* Free work space */ 346 PetscCall(ISRestoreIndices(isrow, &irow)); 347 PetscCall(ISRestoreIndices(iscol_local, &icol)); 348 PetscCall(ISDestroy(&iscol_local)); 349 *B = newmat; 350 PetscFunctionReturn(PETSC_SUCCESS); 351 } 352 353 static PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A, PetscScalar **array) 354 { 355 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 356 357 PetscFunctionBegin; 358 PetscCall(MatDenseRestoreArray(a->A, array)); 359 PetscFunctionReturn(PETSC_SUCCESS); 360 } 361 362 static PetscErrorCode MatDenseRestoreArrayRead_MPIDense(Mat A, PetscScalar **array) 363 { 364 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 365 366 PetscFunctionBegin; 367 PetscCall(MatDenseRestoreArrayRead(a->A, (const PetscScalar **)array)); 368 PetscFunctionReturn(PETSC_SUCCESS); 369 } 370 371 static PetscErrorCode MatDenseRestoreArrayWrite_MPIDense(Mat A, PetscScalar **array) 372 { 373 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 374 375 PetscFunctionBegin; 376 PetscCall(MatDenseRestoreArrayWrite(a->A, array)); 377 PetscFunctionReturn(PETSC_SUCCESS); 378 } 379 380 static PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat, MatAssemblyType mode) 381 { 382 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 383 PetscInt nstash, reallocs; 384 385 PetscFunctionBegin; 386 if (mdn->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS); 387 388 PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range)); 389 PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs)); 390 PetscCall(PetscInfo(mdn->A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 391 PetscFunctionReturn(PETSC_SUCCESS); 392 } 393 394 static PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat, MatAssemblyType mode) 395 { 396 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 397 PetscInt i, *row, *col, flg, j, rstart, ncols; 398 PetscMPIInt n; 399 PetscScalar *val; 400 401 PetscFunctionBegin; 402 if (!mdn->donotstash && !mat->nooffprocentries) { 403 /* wait on receives */ 404 while (1) { 405 PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg)); 406 if (!flg) break; 407 408 for (i = 0; i < n;) { 409 /* Now identify the consecutive vals belonging to the same row */ 410 for (j = i, rstart = row[j]; j < n; j++) { 411 if (row[j] != rstart) break; 412 } 413 if (j < n) ncols = j - i; 414 else ncols = n - i; 415 /* Now assemble all these values with a single function call */ 416 PetscCall(MatSetValues_MPIDense(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode)); 417 i = j; 418 } 419 } 420 PetscCall(MatStashScatterEnd_Private(&mat->stash)); 421 } 422 423 PetscCall(MatAssemblyBegin(mdn->A, mode)); 424 PetscCall(MatAssemblyEnd(mdn->A, mode)); 425 PetscFunctionReturn(PETSC_SUCCESS); 426 } 427 428 static PetscErrorCode MatZeroEntries_MPIDense(Mat A) 429 { 430 Mat_MPIDense *l = (Mat_MPIDense *)A->data; 431 432 PetscFunctionBegin; 433 PetscCall(MatZeroEntries(l->A)); 434 PetscFunctionReturn(PETSC_SUCCESS); 435 } 436 437 static PetscErrorCode MatZeroRows_MPIDense(Mat A, PetscInt n, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 438 { 439 Mat_MPIDense *l = (Mat_MPIDense *)A->data; 440 PetscInt i, len, *lrows; 441 442 PetscFunctionBegin; 443 /* get locally owned rows */ 444 PetscCall(PetscLayoutMapLocal(A->rmap, n, rows, &len, &lrows, NULL)); 445 /* fix right-hand side if needed */ 446 if (x && b) { 447 const PetscScalar *xx; 448 PetscScalar *bb; 449 450 PetscCall(VecGetArrayRead(x, &xx)); 451 PetscCall(VecGetArrayWrite(b, &bb)); 452 for (i = 0; i < len; ++i) bb[lrows[i]] = diag * xx[lrows[i]]; 453 PetscCall(VecRestoreArrayRead(x, &xx)); 454 PetscCall(VecRestoreArrayWrite(b, &bb)); 455 } 456 PetscCall(MatZeroRows(l->A, len, lrows, 0.0, NULL, NULL)); 457 if (diag != 0.0) { 458 Vec d; 459 460 PetscCall(MatCreateVecs(A, NULL, &d)); 461 PetscCall(VecSet(d, diag)); 462 PetscCall(MatDiagonalSet(A, d, INSERT_VALUES)); 463 PetscCall(VecDestroy(&d)); 464 } 465 PetscCall(PetscFree(lrows)); 466 PetscFunctionReturn(PETSC_SUCCESS); 467 } 468 469 PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat, Vec, Vec); 470 PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat, Vec, Vec, Vec); 471 PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat, Vec, Vec); 472 PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat, Vec, Vec, Vec); 473 474 static PetscErrorCode MatMultColumnRange_MPIDense(Mat mat, Vec xx, Vec yy, PetscInt c_start, PetscInt c_end) 475 { 476 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 477 const PetscScalar *ax; 478 PetscScalar *ay; 479 PetscMemType axmtype, aymtype; 480 481 PetscFunctionBegin; 482 if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat)); 483 PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype)); 484 PetscCall(VecGetArrayWriteAndMemType(mdn->lvec, &ay, &aymtype)); 485 PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE)); 486 PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE)); 487 PetscCall(VecRestoreArrayWriteAndMemType(mdn->lvec, &ay)); 488 PetscCall(VecRestoreArrayReadAndMemType(xx, &ax)); 489 PetscUseMethod(mdn->A, "MatMultColumnRange_C", (Mat, Vec, Vec, PetscInt, PetscInt), (mdn->A, mdn->lvec, yy, c_start, c_end)); 490 PetscFunctionReturn(PETSC_SUCCESS); 491 } 492 493 static PetscErrorCode MatMult_MPIDense(Mat mat, Vec xx, Vec yy) 494 { 495 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 496 const PetscScalar *ax; 497 PetscScalar *ay; 498 PetscMemType axmtype, aymtype; 499 500 PetscFunctionBegin; 501 if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat)); 502 PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype)); 503 PetscCall(VecGetArrayWriteAndMemType(mdn->lvec, &ay, &aymtype)); 504 PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE)); 505 PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE)); 506 PetscCall(VecRestoreArrayWriteAndMemType(mdn->lvec, &ay)); 507 PetscCall(VecRestoreArrayReadAndMemType(xx, &ax)); 508 PetscCall((*mdn->A->ops->mult)(mdn->A, mdn->lvec, yy)); 509 PetscFunctionReturn(PETSC_SUCCESS); 510 } 511 512 static PetscErrorCode MatMultAddColumnRange_MPIDense(Mat mat, Vec xx, Vec yy, Vec zz, PetscInt c_start, PetscInt c_end) 513 { 514 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 515 const PetscScalar *ax; 516 PetscScalar *ay; 517 PetscMemType axmtype, aymtype; 518 519 PetscFunctionBegin; 520 if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat)); 521 PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype)); 522 PetscCall(VecGetArrayAndMemType(mdn->lvec, &ay, &aymtype)); 523 PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE)); 524 PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE)); 525 PetscCall(VecRestoreArrayAndMemType(mdn->lvec, &ay)); 526 PetscCall(VecRestoreArrayReadAndMemType(xx, &ax)); 527 PetscUseMethod(mdn->A, "MatMultAddColumnRange_C", (Mat, Vec, Vec, Vec, PetscInt, PetscInt), (mdn->A, mdn->lvec, yy, zz, c_start, c_end)); 528 PetscFunctionReturn(PETSC_SUCCESS); 529 } 530 531 static PetscErrorCode MatMultAdd_MPIDense(Mat mat, Vec xx, Vec yy, Vec zz) 532 { 533 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 534 const PetscScalar *ax; 535 PetscScalar *ay; 536 PetscMemType axmtype, aymtype; 537 538 PetscFunctionBegin; 539 if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat)); 540 PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype)); 541 PetscCall(VecGetArrayAndMemType(mdn->lvec, &ay, &aymtype)); 542 PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE)); 543 PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE)); 544 PetscCall(VecRestoreArrayAndMemType(mdn->lvec, &ay)); 545 PetscCall(VecRestoreArrayReadAndMemType(xx, &ax)); 546 PetscCall((*mdn->A->ops->multadd)(mdn->A, mdn->lvec, yy, zz)); 547 PetscFunctionReturn(PETSC_SUCCESS); 548 } 549 550 static PetscErrorCode MatMultHermitianTransposeColumnRange_MPIDense(Mat A, Vec xx, Vec yy, PetscInt c_start, PetscInt c_end) 551 { 552 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 553 const PetscScalar *ax; 554 PetscScalar *ay; 555 PetscMemType axmtype, aymtype; 556 PetscInt r_start, r_end; 557 PetscInt c_start_local, c_end_local; 558 559 PetscFunctionBegin; 560 if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A)); 561 PetscCall(VecZeroEntries(a->lvec)); 562 PetscCall(VecGetOwnershipRange(yy, &r_start, &r_end)); 563 c_start_local = PetscMax(c_start, r_start); 564 c_end_local = PetscMin(c_end, r_end); 565 PetscCall(VecGetArrayAndMemType(yy, &ay, &aymtype)); 566 if (c_end_local > c_start_local) { 567 if (PetscMemTypeHost(aymtype)) { 568 PetscCall(PetscArrayzero(&ay[c_start_local], (size_t)(c_end_local - c_start_local))); 569 } else { 570 PetscCall(PetscDeviceRegisterMemory(ay, aymtype, sizeof(*ay) * ((size_t)(r_end - r_start)))); 571 PetscCall(PetscDeviceArrayZero(NULL, &ay[c_start_local], (size_t)(c_end_local - c_start_local))); 572 } 573 } 574 PetscUseMethod(a->A, "MatMultHermitianTransposeColumnRange_C", (Mat, Vec, Vec, PetscInt, PetscInt), (a->A, xx, a->lvec, c_start, c_end)); 575 PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype)); 576 PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM)); 577 PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM)); 578 PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax)); 579 PetscCall(VecRestoreArrayAndMemType(yy, &ay)); 580 PetscFunctionReturn(PETSC_SUCCESS); 581 } 582 583 static PetscErrorCode MatMultTransposeKernel_MPIDense(Mat A, Vec xx, Vec yy, PetscBool herm) 584 { 585 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 586 const PetscScalar *ax; 587 PetscScalar *ay; 588 PetscMemType axmtype, aymtype; 589 590 PetscFunctionBegin; 591 if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A)); 592 PetscCall(VecSet(yy, 0.0)); 593 if (herm) PetscCall((*a->A->ops->multhermitiantranspose)(a->A, xx, a->lvec)); 594 else PetscCall((*a->A->ops->multtranspose)(a->A, xx, a->lvec)); 595 PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype)); 596 PetscCall(VecGetArrayAndMemType(yy, &ay, &aymtype)); 597 PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM)); 598 PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM)); 599 PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax)); 600 PetscCall(VecRestoreArrayAndMemType(yy, &ay)); 601 PetscFunctionReturn(PETSC_SUCCESS); 602 } 603 604 static PetscErrorCode MatMultHermitianTransposeAddColumnRange_MPIDense(Mat A, Vec xx, Vec yy, Vec zz, PetscInt c_start, PetscInt c_end) 605 { 606 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 607 const PetscScalar *ax; 608 PetscScalar *ay; 609 PetscMemType axmtype, aymtype; 610 611 PetscFunctionBegin; 612 if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A)); 613 PetscCall(VecCopy(yy, zz)); 614 PetscCall(VecZeroEntries(a->lvec)); 615 PetscUseMethod(a->A, "MatMultHermitianTransposeColumnRange_C", (Mat, Vec, Vec, PetscInt, PetscInt), (a->A, xx, a->lvec, c_start, c_end)); 616 PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype)); 617 PetscCall(VecGetArrayAndMemType(zz, &ay, &aymtype)); 618 PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM)); 619 PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM)); 620 PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax)); 621 PetscCall(VecRestoreArrayAndMemType(zz, &ay)); 622 PetscFunctionReturn(PETSC_SUCCESS); 623 } 624 625 static PetscErrorCode MatMultTransposeAddKernel_MPIDense(Mat A, Vec xx, Vec yy, Vec zz, PetscBool herm) 626 { 627 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 628 const PetscScalar *ax; 629 PetscScalar *ay; 630 PetscMemType axmtype, aymtype; 631 632 PetscFunctionBegin; 633 if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A)); 634 PetscCall(VecCopy(yy, zz)); 635 if (herm) PetscCall((*a->A->ops->multhermitiantranspose)(a->A, xx, a->lvec)); 636 else PetscCall((*a->A->ops->multtranspose)(a->A, xx, a->lvec)); 637 PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype)); 638 PetscCall(VecGetArrayAndMemType(zz, &ay, &aymtype)); 639 PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM)); 640 PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM)); 641 PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax)); 642 PetscCall(VecRestoreArrayAndMemType(zz, &ay)); 643 PetscFunctionReturn(PETSC_SUCCESS); 644 } 645 646 static PetscErrorCode MatMultTranspose_MPIDense(Mat A, Vec xx, Vec yy) 647 { 648 PetscFunctionBegin; 649 PetscCall(MatMultTransposeKernel_MPIDense(A, xx, yy, PETSC_FALSE)); 650 PetscFunctionReturn(PETSC_SUCCESS); 651 } 652 653 static PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A, Vec xx, Vec yy, Vec zz) 654 { 655 PetscFunctionBegin; 656 PetscCall(MatMultTransposeAddKernel_MPIDense(A, xx, yy, zz, PETSC_FALSE)); 657 PetscFunctionReturn(PETSC_SUCCESS); 658 } 659 660 static PetscErrorCode MatMultHermitianTranspose_MPIDense(Mat A, Vec xx, Vec yy) 661 { 662 PetscFunctionBegin; 663 PetscCall(MatMultTransposeKernel_MPIDense(A, xx, yy, PETSC_TRUE)); 664 PetscFunctionReturn(PETSC_SUCCESS); 665 } 666 667 static PetscErrorCode MatMultHermitianTransposeAdd_MPIDense(Mat A, Vec xx, Vec yy, Vec zz) 668 { 669 PetscFunctionBegin; 670 PetscCall(MatMultTransposeAddKernel_MPIDense(A, xx, yy, zz, PETSC_TRUE)); 671 PetscFunctionReturn(PETSC_SUCCESS); 672 } 673 674 PetscErrorCode MatGetDiagonal_MPIDense(Mat A, Vec v) 675 { 676 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 677 PetscInt lda, len, i, nl, ng, m = A->rmap->n, radd; 678 PetscScalar *x; 679 const PetscScalar *av; 680 681 PetscFunctionBegin; 682 PetscCall(VecGetArray(v, &x)); 683 PetscCall(VecGetSize(v, &ng)); 684 PetscCheck(ng == A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming mat and vec"); 685 PetscCall(VecGetLocalSize(v, &nl)); 686 len = PetscMin(a->A->rmap->n, a->A->cmap->n); 687 radd = A->rmap->rstart * m; 688 PetscCall(MatDenseGetArrayRead(a->A, &av)); 689 PetscCall(MatDenseGetLDA(a->A, &lda)); 690 for (i = 0; i < len; i++) x[i] = av[radd + i * lda + i]; 691 PetscCall(MatDenseRestoreArrayRead(a->A, &av)); 692 if (nl - i > 0) PetscCall(PetscArrayzero(x + i, nl - i)); 693 PetscCall(VecRestoreArray(v, &x)); 694 PetscFunctionReturn(PETSC_SUCCESS); 695 } 696 697 static PetscErrorCode MatDestroy_MPIDense(Mat mat) 698 { 699 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 700 701 PetscFunctionBegin; 702 PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N)); 703 PetscCall(MatStashDestroy_Private(&mat->stash)); 704 PetscCheck(!mdn->vecinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 705 PetscCheck(!mdn->matinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 706 PetscCall(MatDestroy(&mdn->A)); 707 PetscCall(VecDestroy(&mdn->lvec)); 708 PetscCall(PetscSFDestroy(&mdn->Mvctx)); 709 PetscCall(VecDestroy(&mdn->cvec)); 710 PetscCall(MatDestroy(&mdn->cmat)); 711 712 PetscCall(PetscFree(mat->data)); 713 PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL)); 714 715 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetLDA_C", NULL)); 716 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseSetLDA_C", NULL)); 717 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArray_C", NULL)); 718 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArray_C", NULL)); 719 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayRead_C", NULL)); 720 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayRead_C", NULL)); 721 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayWrite_C", NULL)); 722 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayWrite_C", NULL)); 723 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDensePlaceArray_C", NULL)); 724 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseResetArray_C", NULL)); 725 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseReplaceArray_C", NULL)); 726 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", NULL)); 727 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpiaij_C", NULL)); 728 #if defined(PETSC_HAVE_ELEMENTAL) 729 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_elemental_C", NULL)); 730 #endif 731 #if defined(PETSC_HAVE_SCALAPACK) 732 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_scalapack_C", NULL)); 733 #endif 734 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIDenseSetPreallocation_C", NULL)); 735 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidense_C", NULL)); 736 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaij_C", NULL)); 737 #if defined(PETSC_HAVE_CUDA) 738 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidense_C", NULL)); 739 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijcusparse_C", NULL)); 740 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensecuda_C", NULL)); 741 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidensecuda_mpidense_C", NULL)); 742 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidensecuda_C", NULL)); 743 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C", NULL)); 744 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensecuda_mpiaij_C", NULL)); 745 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C", NULL)); 746 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArray_C", NULL)); 747 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArrayRead_C", NULL)); 748 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArrayWrite_C", NULL)); 749 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArray_C", NULL)); 750 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArrayRead_C", NULL)); 751 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArrayWrite_C", NULL)); 752 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAPlaceArray_C", NULL)); 753 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAResetArray_C", NULL)); 754 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAReplaceArray_C", NULL)); 755 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDASetPreallocation_C", NULL)); 756 #endif 757 #if defined(PETSC_HAVE_HIP) 758 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidense_C", NULL)); 759 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijhipsparse_C", NULL)); 760 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensehip_C", NULL)); 761 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidensehip_mpidense_C", NULL)); 762 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidensehip_C", NULL)); 763 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidensehip_C", NULL)); 764 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensehip_mpiaij_C", NULL)); 765 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensehip_mpiaijhipsparse_C", NULL)); 766 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArray_C", NULL)); 767 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArrayRead_C", NULL)); 768 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArrayWrite_C", NULL)); 769 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArray_C", NULL)); 770 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArrayRead_C", NULL)); 771 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArrayWrite_C", NULL)); 772 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPPlaceArray_C", NULL)); 773 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPResetArray_C", NULL)); 774 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPReplaceArray_C", NULL)); 775 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPSetPreallocation_C", NULL)); 776 #endif 777 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumn_C", NULL)); 778 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumn_C", NULL)); 779 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", NULL)); 780 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", NULL)); 781 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", NULL)); 782 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", NULL)); 783 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", NULL)); 784 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", NULL)); 785 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetSubMatrix_C", NULL)); 786 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreSubMatrix_C", NULL)); 787 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultColumnRange_C", NULL)); 788 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultAddColumnRange_C", NULL)); 789 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultHermitianTransposeColumnRange_C", NULL)); 790 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultHermitianTransposeAddColumnRange_C", NULL)); 791 792 PetscCall(PetscObjectCompose((PetscObject)mat, "DiagonalBlock", NULL)); 793 PetscFunctionReturn(PETSC_SUCCESS); 794 } 795 796 #include <petscdraw.h> 797 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat, PetscViewer viewer) 798 { 799 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 800 PetscMPIInt rank; 801 PetscViewerType vtype; 802 PetscBool iascii, isdraw; 803 PetscViewer sviewer; 804 PetscViewerFormat format; 805 806 PetscFunctionBegin; 807 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 808 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 809 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 810 if (iascii) { 811 PetscCall(PetscViewerGetType(viewer, &vtype)); 812 PetscCall(PetscViewerGetFormat(viewer, &format)); 813 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 814 MatInfo info; 815 PetscCall(MatGetInfo(mat, MAT_LOCAL, &info)); 816 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 817 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %" PetscInt_FMT " \n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, 818 (PetscInt)info.memory)); 819 PetscCall(PetscViewerFlush(viewer)); 820 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 821 if (mdn->Mvctx) PetscCall(PetscSFView(mdn->Mvctx, viewer)); 822 PetscFunctionReturn(PETSC_SUCCESS); 823 } else if (format == PETSC_VIEWER_ASCII_INFO) { 824 PetscFunctionReturn(PETSC_SUCCESS); 825 } 826 } else if (isdraw) { 827 PetscDraw draw; 828 PetscBool isnull; 829 830 PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw)); 831 PetscCall(PetscDrawIsNull(draw, &isnull)); 832 if (isnull) PetscFunctionReturn(PETSC_SUCCESS); 833 } 834 835 { 836 /* assemble the entire matrix onto first processor. */ 837 Mat A; 838 PetscInt M = mat->rmap->N, N = mat->cmap->N, m, row, i, nz; 839 PetscInt *cols; 840 PetscScalar *vals; 841 842 PetscCall(MatCreate(PetscObjectComm((PetscObject)mat), &A)); 843 if (rank == 0) { 844 PetscCall(MatSetSizes(A, M, N, M, N)); 845 } else { 846 PetscCall(MatSetSizes(A, 0, 0, M, N)); 847 } 848 /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */ 849 PetscCall(MatSetType(A, MATMPIDENSE)); 850 PetscCall(MatMPIDenseSetPreallocation(A, NULL)); 851 852 /* Copy the matrix ... This isn't the most efficient means, 853 but it's quick for now */ 854 A->insertmode = INSERT_VALUES; 855 856 row = mat->rmap->rstart; 857 m = mdn->A->rmap->n; 858 for (i = 0; i < m; i++) { 859 PetscCall(MatGetRow_MPIDense(mat, row, &nz, &cols, &vals)); 860 PetscCall(MatSetValues_MPIDense(A, 1, &row, nz, cols, vals, INSERT_VALUES)); 861 PetscCall(MatRestoreRow_MPIDense(mat, row, &nz, &cols, &vals)); 862 row++; 863 } 864 865 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 866 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 867 PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 868 if (rank == 0) { 869 PetscCall(PetscObjectSetName((PetscObject)((Mat_MPIDense *)A->data)->A, ((PetscObject)mat)->name)); 870 PetscCall(MatView_SeqDense(((Mat_MPIDense *)A->data)->A, sviewer)); 871 } 872 PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 873 PetscCall(MatDestroy(&A)); 874 } 875 PetscFunctionReturn(PETSC_SUCCESS); 876 } 877 878 static PetscErrorCode MatView_MPIDense(Mat mat, PetscViewer viewer) 879 { 880 PetscBool iascii, isbinary, isdraw, issocket; 881 882 PetscFunctionBegin; 883 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 884 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 885 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket)); 886 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 887 888 if (iascii || issocket || isdraw) { 889 PetscCall(MatView_MPIDense_ASCIIorDraworSocket(mat, viewer)); 890 } else if (isbinary) PetscCall(MatView_Dense_Binary(mat, viewer)); 891 PetscFunctionReturn(PETSC_SUCCESS); 892 } 893 894 static PetscErrorCode MatGetInfo_MPIDense(Mat A, MatInfoType flag, MatInfo *info) 895 { 896 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 897 Mat mdn = mat->A; 898 PetscLogDouble isend[5], irecv[5]; 899 900 PetscFunctionBegin; 901 info->block_size = 1.0; 902 903 PetscCall(MatGetInfo(mdn, MAT_LOCAL, info)); 904 905 isend[0] = info->nz_used; 906 isend[1] = info->nz_allocated; 907 isend[2] = info->nz_unneeded; 908 isend[3] = info->memory; 909 isend[4] = info->mallocs; 910 if (flag == MAT_LOCAL) { 911 info->nz_used = isend[0]; 912 info->nz_allocated = isend[1]; 913 info->nz_unneeded = isend[2]; 914 info->memory = isend[3]; 915 info->mallocs = isend[4]; 916 } else if (flag == MAT_GLOBAL_MAX) { 917 PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)A))); 918 919 info->nz_used = irecv[0]; 920 info->nz_allocated = irecv[1]; 921 info->nz_unneeded = irecv[2]; 922 info->memory = irecv[3]; 923 info->mallocs = irecv[4]; 924 } else if (flag == MAT_GLOBAL_SUM) { 925 PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)A))); 926 927 info->nz_used = irecv[0]; 928 info->nz_allocated = irecv[1]; 929 info->nz_unneeded = irecv[2]; 930 info->memory = irecv[3]; 931 info->mallocs = irecv[4]; 932 } 933 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 934 info->fill_ratio_needed = 0; 935 info->factor_mallocs = 0; 936 PetscFunctionReturn(PETSC_SUCCESS); 937 } 938 939 static PetscErrorCode MatSetOption_MPIDense(Mat A, MatOption op, PetscBool flg) 940 { 941 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 942 943 PetscFunctionBegin; 944 switch (op) { 945 case MAT_NEW_NONZERO_LOCATIONS: 946 case MAT_NEW_NONZERO_LOCATION_ERR: 947 case MAT_NEW_NONZERO_ALLOCATION_ERR: 948 MatCheckPreallocated(A, 1); 949 PetscCall(MatSetOption(a->A, op, flg)); 950 break; 951 case MAT_ROW_ORIENTED: 952 MatCheckPreallocated(A, 1); 953 a->roworiented = flg; 954 PetscCall(MatSetOption(a->A, op, flg)); 955 break; 956 case MAT_IGNORE_OFF_PROC_ENTRIES: 957 a->donotstash = flg; 958 break; 959 case MAT_SYMMETRIC: 960 case MAT_STRUCTURALLY_SYMMETRIC: 961 case MAT_HERMITIAN: 962 case MAT_SYMMETRY_ETERNAL: 963 case MAT_STRUCTURAL_SYMMETRY_ETERNAL: 964 case MAT_SPD: 965 case MAT_SPD_ETERNAL: 966 /* if the diagonal matrix is square it inherits some of the properties above */ 967 if (a->A && A->rmap->n == A->cmap->n) PetscCall(MatSetOption(a->A, op, flg)); 968 break; 969 default: 970 break; 971 } 972 PetscFunctionReturn(PETSC_SUCCESS); 973 } 974 975 static PetscErrorCode MatDiagonalScale_MPIDense(Mat A, Vec ll, Vec rr) 976 { 977 Mat_MPIDense *mdn = (Mat_MPIDense *)A->data; 978 const PetscScalar *l; 979 PetscScalar x, *v, *vv, *r; 980 PetscInt i, j, s2a, s3a, s2, s3, m = mdn->A->rmap->n, n = mdn->A->cmap->n, lda; 981 982 PetscFunctionBegin; 983 PetscCall(MatDenseGetArray(mdn->A, &vv)); 984 PetscCall(MatDenseGetLDA(mdn->A, &lda)); 985 PetscCall(MatGetLocalSize(A, &s2, &s3)); 986 if (ll) { 987 PetscCall(VecGetLocalSize(ll, &s2a)); 988 PetscCheck(s2a == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Left scaling vector non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT, s2a, s2); 989 PetscCall(VecGetArrayRead(ll, &l)); 990 for (i = 0; i < m; i++) { 991 x = l[i]; 992 v = vv + i; 993 for (j = 0; j < n; j++) { 994 (*v) *= x; 995 v += lda; 996 } 997 } 998 PetscCall(VecRestoreArrayRead(ll, &l)); 999 PetscCall(PetscLogFlops(1.0 * n * m)); 1000 } 1001 if (rr) { 1002 const PetscScalar *ar; 1003 1004 PetscCall(VecGetLocalSize(rr, &s3a)); 1005 PetscCheck(s3a == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Right scaling vec non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT ".", s3a, s3); 1006 PetscCall(VecGetArrayRead(rr, &ar)); 1007 if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A)); 1008 PetscCall(VecGetArray(mdn->lvec, &r)); 1009 PetscCall(PetscSFBcastBegin(mdn->Mvctx, MPIU_SCALAR, ar, r, MPI_REPLACE)); 1010 PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ar, r, MPI_REPLACE)); 1011 PetscCall(VecRestoreArrayRead(rr, &ar)); 1012 for (i = 0; i < n; i++) { 1013 x = r[i]; 1014 v = vv + i * lda; 1015 for (j = 0; j < m; j++) (*v++) *= x; 1016 } 1017 PetscCall(VecRestoreArray(mdn->lvec, &r)); 1018 PetscCall(PetscLogFlops(1.0 * n * m)); 1019 } 1020 PetscCall(MatDenseRestoreArray(mdn->A, &vv)); 1021 PetscFunctionReturn(PETSC_SUCCESS); 1022 } 1023 1024 static PetscErrorCode MatNorm_MPIDense(Mat A, NormType type, PetscReal *nrm) 1025 { 1026 Mat_MPIDense *mdn = (Mat_MPIDense *)A->data; 1027 PetscInt i, j; 1028 PetscMPIInt size; 1029 PetscReal sum = 0.0; 1030 const PetscScalar *av, *v; 1031 1032 PetscFunctionBegin; 1033 PetscCall(MatDenseGetArrayRead(mdn->A, &av)); 1034 v = av; 1035 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 1036 if (size == 1) { 1037 PetscCall(MatNorm(mdn->A, type, nrm)); 1038 } else { 1039 if (type == NORM_FROBENIUS) { 1040 for (i = 0; i < mdn->A->cmap->n * mdn->A->rmap->n; i++) { 1041 sum += PetscRealPart(PetscConj(*v) * (*v)); 1042 v++; 1043 } 1044 PetscCallMPI(MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A))); 1045 *nrm = PetscSqrtReal(*nrm); 1046 PetscCall(PetscLogFlops(2.0 * mdn->A->cmap->n * mdn->A->rmap->n)); 1047 } else if (type == NORM_1) { 1048 PetscReal *tmp; 1049 1050 PetscCall(PetscCalloc1(A->cmap->N, &tmp)); 1051 *nrm = 0.0; 1052 v = av; 1053 for (j = 0; j < mdn->A->cmap->n; j++) { 1054 for (i = 0; i < mdn->A->rmap->n; i++) { 1055 tmp[j] += PetscAbsScalar(*v); 1056 v++; 1057 } 1058 } 1059 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, tmp, A->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A))); 1060 for (j = 0; j < A->cmap->N; j++) { 1061 if (tmp[j] > *nrm) *nrm = tmp[j]; 1062 } 1063 PetscCall(PetscFree(tmp)); 1064 PetscCall(PetscLogFlops(A->cmap->n * A->rmap->n)); 1065 } else if (type == NORM_INFINITY) { /* max row norm */ 1066 PetscCall(MatNorm(mdn->A, type, nrm)); 1067 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, nrm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A))); 1068 } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "No support for two norm"); 1069 } 1070 PetscCall(MatDenseRestoreArrayRead(mdn->A, &av)); 1071 PetscFunctionReturn(PETSC_SUCCESS); 1072 } 1073 1074 static PetscErrorCode MatTranspose_MPIDense(Mat A, MatReuse reuse, Mat *matout) 1075 { 1076 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1077 Mat B; 1078 PetscInt M = A->rmap->N, N = A->cmap->N, m, n, *rwork, rstart = A->rmap->rstart; 1079 PetscInt j, i, lda; 1080 PetscScalar *v; 1081 1082 PetscFunctionBegin; 1083 if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout)); 1084 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) { 1085 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 1086 PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M)); 1087 PetscCall(MatSetType(B, ((PetscObject)A)->type_name)); 1088 PetscCall(MatMPIDenseSetPreallocation(B, NULL)); 1089 } else B = *matout; 1090 1091 m = a->A->rmap->n; 1092 n = a->A->cmap->n; 1093 PetscCall(MatDenseGetArrayRead(a->A, (const PetscScalar **)&v)); 1094 PetscCall(MatDenseGetLDA(a->A, &lda)); 1095 PetscCall(PetscMalloc1(m, &rwork)); 1096 for (i = 0; i < m; i++) rwork[i] = rstart + i; 1097 for (j = 0; j < n; j++) { 1098 PetscCall(MatSetValues(B, 1, &j, m, rwork, v, INSERT_VALUES)); 1099 v = PetscSafePointerPlusOffset(v, lda); 1100 } 1101 PetscCall(MatDenseRestoreArrayRead(a->A, (const PetscScalar **)&v)); 1102 PetscCall(PetscFree(rwork)); 1103 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 1104 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 1105 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 1106 *matout = B; 1107 } else { 1108 PetscCall(MatHeaderMerge(A, &B)); 1109 } 1110 PetscFunctionReturn(PETSC_SUCCESS); 1111 } 1112 1113 static PetscErrorCode MatDuplicate_MPIDense(Mat, MatDuplicateOption, Mat *); 1114 PETSC_INTERN PetscErrorCode MatScale_MPIDense(Mat, PetscScalar); 1115 1116 static PetscErrorCode MatSetUp_MPIDense(Mat A) 1117 { 1118 PetscFunctionBegin; 1119 PetscCall(PetscLayoutSetUp(A->rmap)); 1120 PetscCall(PetscLayoutSetUp(A->cmap)); 1121 if (!A->preallocated) PetscCall(MatMPIDenseSetPreallocation(A, NULL)); 1122 PetscFunctionReturn(PETSC_SUCCESS); 1123 } 1124 1125 static PetscErrorCode MatAXPY_MPIDense(Mat Y, PetscScalar alpha, Mat X, MatStructure str) 1126 { 1127 Mat_MPIDense *A = (Mat_MPIDense *)Y->data, *B = (Mat_MPIDense *)X->data; 1128 1129 PetscFunctionBegin; 1130 PetscCall(MatAXPY(A->A, alpha, B->A, str)); 1131 PetscFunctionReturn(PETSC_SUCCESS); 1132 } 1133 1134 static PetscErrorCode MatConjugate_MPIDense(Mat mat) 1135 { 1136 Mat_MPIDense *a = (Mat_MPIDense *)mat->data; 1137 1138 PetscFunctionBegin; 1139 PetscCall(MatConjugate(a->A)); 1140 PetscFunctionReturn(PETSC_SUCCESS); 1141 } 1142 1143 static PetscErrorCode MatRealPart_MPIDense(Mat A) 1144 { 1145 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1146 1147 PetscFunctionBegin; 1148 PetscCall(MatRealPart(a->A)); 1149 PetscFunctionReturn(PETSC_SUCCESS); 1150 } 1151 1152 static PetscErrorCode MatImaginaryPart_MPIDense(Mat A) 1153 { 1154 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1155 1156 PetscFunctionBegin; 1157 PetscCall(MatImaginaryPart(a->A)); 1158 PetscFunctionReturn(PETSC_SUCCESS); 1159 } 1160 1161 static PetscErrorCode MatGetColumnVector_MPIDense(Mat A, Vec v, PetscInt col) 1162 { 1163 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1164 1165 PetscFunctionBegin; 1166 PetscCheck(a->A, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Missing local matrix"); 1167 PetscCheck(a->A->ops->getcolumnvector, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Missing get column operation"); 1168 PetscCall((*a->A->ops->getcolumnvector)(a->A, v, col)); 1169 PetscFunctionReturn(PETSC_SUCCESS); 1170 } 1171 1172 PETSC_INTERN PetscErrorCode MatGetColumnReductions_SeqDense(Mat, PetscInt, PetscReal *); 1173 1174 static PetscErrorCode MatGetColumnReductions_MPIDense(Mat A, PetscInt type, PetscReal *reductions) 1175 { 1176 PetscInt i, m, n; 1177 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1178 1179 PetscFunctionBegin; 1180 PetscCall(MatGetSize(A, &m, &n)); 1181 if (type == REDUCTION_MEAN_REALPART) { 1182 PetscCall(MatGetColumnReductions_SeqDense(a->A, (PetscInt)REDUCTION_SUM_REALPART, reductions)); 1183 } else if (type == REDUCTION_MEAN_IMAGINARYPART) { 1184 PetscCall(MatGetColumnReductions_SeqDense(a->A, (PetscInt)REDUCTION_SUM_IMAGINARYPART, reductions)); 1185 } else { 1186 PetscCall(MatGetColumnReductions_SeqDense(a->A, type, reductions)); 1187 } 1188 if (type == NORM_2) { 1189 for (i = 0; i < n; i++) reductions[i] *= reductions[i]; 1190 } 1191 if (type == NORM_INFINITY) { 1192 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, reductions, n, MPIU_REAL, MPIU_MAX, A->hdr.comm)); 1193 } else { 1194 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, reductions, n, MPIU_REAL, MPIU_SUM, A->hdr.comm)); 1195 } 1196 if (type == NORM_2) { 1197 for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]); 1198 } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 1199 for (i = 0; i < n; i++) reductions[i] /= m; 1200 } 1201 PetscFunctionReturn(PETSC_SUCCESS); 1202 } 1203 1204 static PetscErrorCode MatSetRandom_MPIDense(Mat x, PetscRandom rctx) 1205 { 1206 Mat_MPIDense *d = (Mat_MPIDense *)x->data; 1207 1208 PetscFunctionBegin; 1209 PetscCall(MatSetRandom(d->A, rctx)); 1210 #if defined(PETSC_HAVE_DEVICE) 1211 x->offloadmask = d->A->offloadmask; 1212 #endif 1213 PetscFunctionReturn(PETSC_SUCCESS); 1214 } 1215 1216 static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A, PetscBool *missing, PetscInt *d) 1217 { 1218 PetscFunctionBegin; 1219 *missing = PETSC_FALSE; 1220 PetscFunctionReturn(PETSC_SUCCESS); 1221 } 1222 1223 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat, Mat, PetscReal, Mat); 1224 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat, Mat, Mat); 1225 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat, Mat, PetscReal, Mat); 1226 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat, Mat, Mat); 1227 static PetscErrorCode MatEqual_MPIDense(Mat, Mat, PetscBool *); 1228 static PetscErrorCode MatLoad_MPIDense(Mat, PetscViewer); 1229 static PetscErrorCode MatProductSetFromOptions_MPIDense(Mat); 1230 1231 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense, 1232 MatGetRow_MPIDense, 1233 MatRestoreRow_MPIDense, 1234 MatMult_MPIDense, 1235 /* 4*/ MatMultAdd_MPIDense, 1236 MatMultTranspose_MPIDense, 1237 MatMultTransposeAdd_MPIDense, 1238 NULL, 1239 NULL, 1240 NULL, 1241 /* 10*/ NULL, 1242 NULL, 1243 NULL, 1244 NULL, 1245 MatTranspose_MPIDense, 1246 /* 15*/ MatGetInfo_MPIDense, 1247 MatEqual_MPIDense, 1248 MatGetDiagonal_MPIDense, 1249 MatDiagonalScale_MPIDense, 1250 MatNorm_MPIDense, 1251 /* 20*/ MatAssemblyBegin_MPIDense, 1252 MatAssemblyEnd_MPIDense, 1253 MatSetOption_MPIDense, 1254 MatZeroEntries_MPIDense, 1255 /* 24*/ MatZeroRows_MPIDense, 1256 NULL, 1257 NULL, 1258 NULL, 1259 NULL, 1260 /* 29*/ MatSetUp_MPIDense, 1261 NULL, 1262 NULL, 1263 MatGetDiagonalBlock_MPIDense, 1264 NULL, 1265 /* 34*/ MatDuplicate_MPIDense, 1266 NULL, 1267 NULL, 1268 NULL, 1269 NULL, 1270 /* 39*/ MatAXPY_MPIDense, 1271 MatCreateSubMatrices_MPIDense, 1272 NULL, 1273 MatGetValues_MPIDense, 1274 MatCopy_MPIDense, 1275 /* 44*/ NULL, 1276 MatScale_MPIDense, 1277 MatShift_MPIDense, 1278 NULL, 1279 NULL, 1280 /* 49*/ MatSetRandom_MPIDense, 1281 NULL, 1282 NULL, 1283 NULL, 1284 NULL, 1285 /* 54*/ NULL, 1286 NULL, 1287 NULL, 1288 NULL, 1289 NULL, 1290 /* 59*/ MatCreateSubMatrix_MPIDense, 1291 MatDestroy_MPIDense, 1292 MatView_MPIDense, 1293 NULL, 1294 NULL, 1295 /* 64*/ NULL, 1296 NULL, 1297 NULL, 1298 NULL, 1299 NULL, 1300 /* 69*/ NULL, 1301 NULL, 1302 NULL, 1303 NULL, 1304 NULL, 1305 /* 74*/ NULL, 1306 NULL, 1307 NULL, 1308 NULL, 1309 MatLoad_MPIDense, 1310 /* 79*/ NULL, 1311 NULL, 1312 NULL, 1313 NULL, 1314 /* 83*/ NULL, 1315 NULL, 1316 NULL, 1317 NULL, 1318 MatMatTransposeMultSymbolic_MPIDense_MPIDense, 1319 MatMatTransposeMultNumeric_MPIDense_MPIDense, 1320 /* 89*/ NULL, 1321 MatProductSetFromOptions_MPIDense, 1322 NULL, 1323 NULL, 1324 MatConjugate_MPIDense, 1325 /* 94*/ NULL, 1326 NULL, 1327 MatRealPart_MPIDense, 1328 MatImaginaryPart_MPIDense, 1329 NULL, 1330 /*99*/ NULL, 1331 NULL, 1332 NULL, 1333 NULL, 1334 MatGetColumnVector_MPIDense, 1335 /*104*/ MatMissingDiagonal_MPIDense, 1336 NULL, 1337 NULL, 1338 NULL, 1339 NULL, 1340 /*109*/ NULL, 1341 NULL, 1342 NULL, 1343 MatMultHermitianTranspose_MPIDense, 1344 MatMultHermitianTransposeAdd_MPIDense, 1345 /*114*/ NULL, 1346 NULL, 1347 MatGetColumnReductions_MPIDense, 1348 NULL, 1349 NULL, 1350 NULL, 1351 /*120*/ NULL, 1352 MatTransposeMatMultSymbolic_MPIDense_MPIDense, 1353 MatTransposeMatMultNumeric_MPIDense_MPIDense, 1354 NULL, 1355 /*124*/ NULL, 1356 NULL, 1357 NULL, 1358 NULL, 1359 NULL, 1360 /*129*/ NULL, 1361 NULL, 1362 MatCreateMPIMatConcatenateSeqMat_MPIDense, 1363 NULL, 1364 NULL, 1365 /*134*/ NULL, 1366 NULL, 1367 NULL, 1368 NULL, 1369 NULL, 1370 /*139*/ NULL, 1371 NULL, 1372 NULL, 1373 NULL, 1374 NULL}; 1375 1376 static PetscErrorCode MatMPIDenseSetPreallocation_MPIDense(Mat mat, PetscScalar *data) 1377 { 1378 Mat_MPIDense *a = (Mat_MPIDense *)mat->data; 1379 MatType mtype = MATSEQDENSE; 1380 1381 PetscFunctionBegin; 1382 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1383 PetscCall(PetscLayoutSetUp(mat->rmap)); 1384 PetscCall(PetscLayoutSetUp(mat->cmap)); 1385 if (!a->A) { 1386 PetscCall(MatCreate(PETSC_COMM_SELF, &a->A)); 1387 PetscCall(MatSetSizes(a->A, mat->rmap->n, mat->cmap->N, mat->rmap->n, mat->cmap->N)); 1388 } 1389 #if defined(PETSC_HAVE_CUDA) 1390 PetscBool iscuda; 1391 PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPIDENSECUDA, &iscuda)); 1392 if (iscuda) mtype = MATSEQDENSECUDA; 1393 #endif 1394 #if defined(PETSC_HAVE_HIP) 1395 PetscBool iship; 1396 PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPIDENSEHIP, &iship)); 1397 if (iship) mtype = MATSEQDENSEHIP; 1398 #endif 1399 PetscCall(MatSetType(a->A, mtype)); 1400 PetscCall(MatSeqDenseSetPreallocation(a->A, data)); 1401 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 1402 mat->offloadmask = a->A->offloadmask; 1403 #endif 1404 mat->preallocated = PETSC_TRUE; 1405 mat->assembled = PETSC_TRUE; 1406 PetscFunctionReturn(PETSC_SUCCESS); 1407 } 1408 1409 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIDense(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 1410 { 1411 Mat B, C; 1412 1413 PetscFunctionBegin; 1414 PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &C)); 1415 PetscCall(MatConvert_SeqAIJ_SeqDense(C, MATSEQDENSE, MAT_INITIAL_MATRIX, &B)); 1416 PetscCall(MatDestroy(&C)); 1417 if (reuse == MAT_REUSE_MATRIX) { 1418 C = *newmat; 1419 } else C = NULL; 1420 PetscCall(MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A), B, A->cmap->n, !C ? MAT_INITIAL_MATRIX : MAT_REUSE_MATRIX, &C)); 1421 PetscCall(MatDestroy(&B)); 1422 if (reuse == MAT_INPLACE_MATRIX) { 1423 PetscCall(MatHeaderReplace(A, &C)); 1424 } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C; 1425 PetscFunctionReturn(PETSC_SUCCESS); 1426 } 1427 1428 static PetscErrorCode MatConvert_MPIDense_MPIAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 1429 { 1430 Mat B, C; 1431 1432 PetscFunctionBegin; 1433 PetscCall(MatDenseGetLocalMatrix(A, &C)); 1434 PetscCall(MatConvert_SeqDense_SeqAIJ(C, MATSEQAIJ, MAT_INITIAL_MATRIX, &B)); 1435 if (reuse == MAT_REUSE_MATRIX) { 1436 C = *newmat; 1437 } else C = NULL; 1438 PetscCall(MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A), B, A->cmap->n, !C ? MAT_INITIAL_MATRIX : MAT_REUSE_MATRIX, &C)); 1439 PetscCall(MatDestroy(&B)); 1440 if (reuse == MAT_INPLACE_MATRIX) { 1441 PetscCall(MatHeaderReplace(A, &C)); 1442 } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C; 1443 PetscFunctionReturn(PETSC_SUCCESS); 1444 } 1445 1446 #if defined(PETSC_HAVE_ELEMENTAL) 1447 PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 1448 { 1449 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1450 Mat mat_elemental; 1451 PetscScalar *v; 1452 PetscInt m = A->rmap->n, N = A->cmap->N, rstart = A->rmap->rstart, i, *rows, *cols, lda; 1453 1454 PetscFunctionBegin; 1455 if (reuse == MAT_REUSE_MATRIX) { 1456 mat_elemental = *newmat; 1457 PetscCall(MatZeroEntries(*newmat)); 1458 } else { 1459 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental)); 1460 PetscCall(MatSetSizes(mat_elemental, PETSC_DECIDE, PETSC_DECIDE, A->rmap->N, A->cmap->N)); 1461 PetscCall(MatSetType(mat_elemental, MATELEMENTAL)); 1462 PetscCall(MatSetUp(mat_elemental)); 1463 PetscCall(MatSetOption(mat_elemental, MAT_ROW_ORIENTED, PETSC_FALSE)); 1464 } 1465 1466 PetscCall(PetscMalloc2(m, &rows, N, &cols)); 1467 for (i = 0; i < N; i++) cols[i] = i; 1468 for (i = 0; i < m; i++) rows[i] = rstart + i; 1469 1470 /* PETSc-Elemental interface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */ 1471 PetscCall(MatDenseGetArray(A, &v)); 1472 PetscCall(MatDenseGetLDA(a->A, &lda)); 1473 if (lda == m) PetscCall(MatSetValues(mat_elemental, m, rows, N, cols, v, ADD_VALUES)); 1474 else { 1475 for (i = 0; i < N; i++) PetscCall(MatSetValues(mat_elemental, m, rows, 1, &i, v + lda * i, ADD_VALUES)); 1476 } 1477 PetscCall(MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY)); 1478 PetscCall(MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY)); 1479 PetscCall(MatDenseRestoreArray(A, &v)); 1480 PetscCall(PetscFree2(rows, cols)); 1481 1482 if (reuse == MAT_INPLACE_MATRIX) { 1483 PetscCall(MatHeaderReplace(A, &mat_elemental)); 1484 } else { 1485 *newmat = mat_elemental; 1486 } 1487 PetscFunctionReturn(PETSC_SUCCESS); 1488 } 1489 #endif 1490 1491 static PetscErrorCode MatDenseGetColumn_MPIDense(Mat A, PetscInt col, PetscScalar **vals) 1492 { 1493 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 1494 1495 PetscFunctionBegin; 1496 PetscCall(MatDenseGetColumn(mat->A, col, vals)); 1497 PetscFunctionReturn(PETSC_SUCCESS); 1498 } 1499 1500 static PetscErrorCode MatDenseRestoreColumn_MPIDense(Mat A, PetscScalar **vals) 1501 { 1502 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 1503 1504 PetscFunctionBegin; 1505 PetscCall(MatDenseRestoreColumn(mat->A, vals)); 1506 PetscFunctionReturn(PETSC_SUCCESS); 1507 } 1508 1509 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIDense(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat) 1510 { 1511 Mat_MPIDense *mat; 1512 PetscInt m, nloc, N; 1513 1514 PetscFunctionBegin; 1515 PetscCall(MatGetSize(inmat, &m, &N)); 1516 PetscCall(MatGetLocalSize(inmat, NULL, &nloc)); 1517 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 1518 PetscInt sum; 1519 1520 if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnership(comm, &n, &N)); 1521 /* Check sum(n) = N */ 1522 PetscCallMPI(MPIU_Allreduce(&n, &sum, 1, MPIU_INT, MPI_SUM, comm)); 1523 PetscCheck(sum == N, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT, sum, N); 1524 1525 PetscCall(MatCreateDense(comm, m, n, PETSC_DETERMINE, N, NULL, outmat)); 1526 PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 1527 } 1528 1529 /* numeric phase */ 1530 mat = (Mat_MPIDense *)(*outmat)->data; 1531 PetscCall(MatCopy(inmat, mat->A, SAME_NONZERO_PATTERN)); 1532 PetscFunctionReturn(PETSC_SUCCESS); 1533 } 1534 1535 PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat A, PetscInt col, Vec *v) 1536 { 1537 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1538 PetscInt lda; 1539 1540 PetscFunctionBegin; 1541 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1542 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1543 if (!a->cvec) PetscCall(MatDenseCreateColumnVec_Private(A, &a->cvec)); 1544 a->vecinuse = col + 1; 1545 PetscCall(MatDenseGetLDA(a->A, &lda)); 1546 PetscCall(MatDenseGetArray(a->A, (PetscScalar **)&a->ptrinuse)); 1547 PetscCall(VecPlaceArray(a->cvec, PetscSafePointerPlusOffset(a->ptrinuse, (size_t)col * (size_t)lda))); 1548 *v = a->cvec; 1549 PetscFunctionReturn(PETSC_SUCCESS); 1550 } 1551 1552 PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat A, PetscInt col, Vec *v) 1553 { 1554 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1555 1556 PetscFunctionBegin; 1557 PetscCheck(a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first"); 1558 PetscCheck(a->cvec, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing internal column vector"); 1559 VecCheckAssembled(a->cvec); 1560 a->vecinuse = 0; 1561 PetscCall(MatDenseRestoreArray(a->A, (PetscScalar **)&a->ptrinuse)); 1562 PetscCall(VecResetArray(a->cvec)); 1563 if (v) *v = NULL; 1564 PetscFunctionReturn(PETSC_SUCCESS); 1565 } 1566 1567 PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat A, PetscInt col, Vec *v) 1568 { 1569 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1570 PetscInt lda; 1571 1572 PetscFunctionBegin; 1573 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1574 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1575 if (!a->cvec) PetscCall(MatDenseCreateColumnVec_Private(A, &a->cvec)); 1576 a->vecinuse = col + 1; 1577 PetscCall(MatDenseGetLDA(a->A, &lda)); 1578 PetscCall(MatDenseGetArrayRead(a->A, &a->ptrinuse)); 1579 PetscCall(VecPlaceArray(a->cvec, PetscSafePointerPlusOffset(a->ptrinuse, (size_t)col * (size_t)lda))); 1580 PetscCall(VecLockReadPush(a->cvec)); 1581 *v = a->cvec; 1582 PetscFunctionReturn(PETSC_SUCCESS); 1583 } 1584 1585 PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat A, PetscInt col, Vec *v) 1586 { 1587 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1588 1589 PetscFunctionBegin; 1590 PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first"); 1591 PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector"); 1592 VecCheckAssembled(a->cvec); 1593 a->vecinuse = 0; 1594 PetscCall(MatDenseRestoreArrayRead(a->A, &a->ptrinuse)); 1595 PetscCall(VecLockReadPop(a->cvec)); 1596 PetscCall(VecResetArray(a->cvec)); 1597 if (v) *v = NULL; 1598 PetscFunctionReturn(PETSC_SUCCESS); 1599 } 1600 1601 PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat A, PetscInt col, Vec *v) 1602 { 1603 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1604 PetscInt lda; 1605 1606 PetscFunctionBegin; 1607 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1608 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1609 if (!a->cvec) PetscCall(MatDenseCreateColumnVec_Private(A, &a->cvec)); 1610 a->vecinuse = col + 1; 1611 PetscCall(MatDenseGetLDA(a->A, &lda)); 1612 PetscCall(MatDenseGetArrayWrite(a->A, (PetscScalar **)&a->ptrinuse)); 1613 PetscCall(VecPlaceArray(a->cvec, PetscSafePointerPlusOffset(a->ptrinuse, (size_t)col * (size_t)lda))); 1614 *v = a->cvec; 1615 PetscFunctionReturn(PETSC_SUCCESS); 1616 } 1617 1618 PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat A, PetscInt col, Vec *v) 1619 { 1620 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1621 1622 PetscFunctionBegin; 1623 PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first"); 1624 PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector"); 1625 VecCheckAssembled(a->cvec); 1626 a->vecinuse = 0; 1627 PetscCall(MatDenseRestoreArrayWrite(a->A, (PetscScalar **)&a->ptrinuse)); 1628 PetscCall(VecResetArray(a->cvec)); 1629 if (v) *v = NULL; 1630 PetscFunctionReturn(PETSC_SUCCESS); 1631 } 1632 1633 static PetscErrorCode MatDenseGetSubMatrix_MPIDense(Mat A, PetscInt rbegin, PetscInt rend, PetscInt cbegin, PetscInt cend, Mat *v) 1634 { 1635 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1636 Mat_MPIDense *c; 1637 MPI_Comm comm; 1638 PetscInt prbegin, prend, pcbegin, pcend; 1639 1640 PetscFunctionBegin; 1641 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 1642 PetscCheck(!a->vecinuse, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1643 PetscCheck(!a->matinuse, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1644 prbegin = PetscMax(0, PetscMin(A->rmap->rend, rbegin) - A->rmap->rstart); 1645 prend = PetscMin(A->rmap->n, PetscMax(0, rend - A->rmap->rstart)); 1646 pcbegin = PetscMax(0, PetscMin(A->cmap->rend, cbegin) - A->cmap->rstart); 1647 pcend = PetscMin(A->cmap->n, PetscMax(0, cend - A->cmap->rstart)); 1648 if (!a->cmat) { 1649 PetscCall(MatCreate(comm, &a->cmat)); 1650 PetscCall(MatSetType(a->cmat, ((PetscObject)A)->type_name)); 1651 if (rend - rbegin == A->rmap->N) PetscCall(PetscLayoutReference(A->rmap, &a->cmat->rmap)); 1652 else { 1653 PetscCall(PetscLayoutSetLocalSize(a->cmat->rmap, prend - prbegin)); 1654 PetscCall(PetscLayoutSetSize(a->cmat->rmap, rend - rbegin)); 1655 PetscCall(PetscLayoutSetUp(a->cmat->rmap)); 1656 } 1657 if (cend - cbegin == A->cmap->N) PetscCall(PetscLayoutReference(A->cmap, &a->cmat->cmap)); 1658 else { 1659 PetscCall(PetscLayoutSetLocalSize(a->cmat->cmap, pcend - pcbegin)); 1660 PetscCall(PetscLayoutSetSize(a->cmat->cmap, cend - cbegin)); 1661 PetscCall(PetscLayoutSetUp(a->cmat->cmap)); 1662 } 1663 c = (Mat_MPIDense *)a->cmat->data; 1664 c->sub_rbegin = rbegin; 1665 c->sub_rend = rend; 1666 c->sub_cbegin = cbegin; 1667 c->sub_cend = cend; 1668 } 1669 c = (Mat_MPIDense *)a->cmat->data; 1670 if (c->sub_rbegin != rbegin || c->sub_rend != rend) { 1671 PetscCall(PetscLayoutDestroy(&a->cmat->rmap)); 1672 PetscCall(PetscLayoutCreate(comm, &a->cmat->rmap)); 1673 PetscCall(PetscLayoutSetLocalSize(a->cmat->rmap, prend - prbegin)); 1674 PetscCall(PetscLayoutSetSize(a->cmat->rmap, rend - rbegin)); 1675 PetscCall(PetscLayoutSetUp(a->cmat->rmap)); 1676 c->sub_rbegin = rbegin; 1677 c->sub_rend = rend; 1678 } 1679 if (c->sub_cbegin != cbegin || c->sub_cend != cend) { 1680 // special optimization: check if all columns are owned by rank 0, in which case no communication is necessary 1681 if ((cend - cbegin != a->cmat->cmap->N) || (A->cmap->range[1] != A->cmap->N)) { 1682 PetscCall(PetscLayoutDestroy(&a->cmat->cmap)); 1683 PetscCall(PetscLayoutCreate(comm, &a->cmat->cmap)); 1684 PetscCall(PetscLayoutSetLocalSize(a->cmat->cmap, pcend - pcbegin)); 1685 PetscCall(PetscLayoutSetSize(a->cmat->cmap, cend - cbegin)); 1686 PetscCall(PetscLayoutSetUp(a->cmat->cmap)); 1687 PetscCall(VecDestroy(&c->lvec)); 1688 PetscCall(PetscSFDestroy(&c->Mvctx)); 1689 } 1690 c->sub_cbegin = cbegin; 1691 c->sub_cend = cend; 1692 } 1693 PetscCheck(!c->A, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1694 PetscCall(MatDenseGetSubMatrix(a->A, prbegin, prend, cbegin, cend, &c->A)); 1695 1696 a->cmat->preallocated = PETSC_TRUE; 1697 a->cmat->assembled = PETSC_TRUE; 1698 #if defined(PETSC_HAVE_DEVICE) 1699 a->cmat->offloadmask = c->A->offloadmask; 1700 #endif 1701 a->matinuse = cbegin + 1; 1702 *v = a->cmat; 1703 PetscFunctionReturn(PETSC_SUCCESS); 1704 } 1705 1706 static PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat A, Mat *v) 1707 { 1708 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1709 Mat_MPIDense *c; 1710 1711 PetscFunctionBegin; 1712 PetscCheck(a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetSubMatrix() first"); 1713 PetscCheck(a->cmat, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal matrix"); 1714 PetscCheck(*v == a->cmat, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Not the matrix obtained from MatDenseGetSubMatrix()"); 1715 a->matinuse = 0; 1716 c = (Mat_MPIDense *)a->cmat->data; 1717 PetscCall(MatDenseRestoreSubMatrix(a->A, &c->A)); 1718 if (v) *v = NULL; 1719 #if defined(PETSC_HAVE_DEVICE) 1720 A->offloadmask = a->A->offloadmask; 1721 #endif 1722 PetscFunctionReturn(PETSC_SUCCESS); 1723 } 1724 1725 /*MC 1726 MATMPIDENSE - MATMPIDENSE = "mpidense" - A matrix type to be used for distributed dense matrices. 1727 1728 Options Database Key: 1729 . -mat_type mpidense - sets the matrix type to `MATMPIDENSE` during a call to `MatSetFromOptions()` 1730 1731 Level: beginner 1732 1733 .seealso: [](ch_matrices), `Mat`, `MatCreateDense()`, `MATSEQDENSE`, `MATDENSE` 1734 M*/ 1735 PetscErrorCode MatCreate_MPIDense(Mat mat) 1736 { 1737 Mat_MPIDense *a; 1738 1739 PetscFunctionBegin; 1740 PetscCall(PetscNew(&a)); 1741 mat->data = (void *)a; 1742 mat->ops[0] = MatOps_Values; 1743 1744 mat->insertmode = NOT_SET_VALUES; 1745 1746 /* build cache for off array entries formed */ 1747 a->donotstash = PETSC_FALSE; 1748 1749 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)mat), 1, &mat->stash)); 1750 1751 /* stuff used for matrix vector multiply */ 1752 a->lvec = NULL; 1753 a->Mvctx = NULL; 1754 a->roworiented = PETSC_TRUE; 1755 1756 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetLDA_C", MatDenseGetLDA_MPIDense)); 1757 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseSetLDA_C", MatDenseSetLDA_MPIDense)); 1758 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArray_C", MatDenseGetArray_MPIDense)); 1759 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArray_C", MatDenseRestoreArray_MPIDense)); 1760 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayRead_C", MatDenseGetArrayRead_MPIDense)); 1761 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayRead_C", MatDenseRestoreArrayRead_MPIDense)); 1762 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayWrite_C", MatDenseGetArrayWrite_MPIDense)); 1763 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayWrite_C", MatDenseRestoreArrayWrite_MPIDense)); 1764 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDensePlaceArray_C", MatDensePlaceArray_MPIDense)); 1765 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseResetArray_C", MatDenseResetArray_MPIDense)); 1766 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseReplaceArray_C", MatDenseReplaceArray_MPIDense)); 1767 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", MatDenseGetColumnVec_MPIDense)); 1768 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", MatDenseRestoreColumnVec_MPIDense)); 1769 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", MatDenseGetColumnVecRead_MPIDense)); 1770 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", MatDenseRestoreColumnVecRead_MPIDense)); 1771 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", MatDenseGetColumnVecWrite_MPIDense)); 1772 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", MatDenseRestoreColumnVecWrite_MPIDense)); 1773 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetSubMatrix_C", MatDenseGetSubMatrix_MPIDense)); 1774 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreSubMatrix_C", MatDenseRestoreSubMatrix_MPIDense)); 1775 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", MatConvert_MPIAIJ_MPIDense)); 1776 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpiaij_C", MatConvert_MPIDense_MPIAIJ)); 1777 #if defined(PETSC_HAVE_ELEMENTAL) 1778 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_elemental_C", MatConvert_MPIDense_Elemental)); 1779 #endif 1780 #if defined(PETSC_HAVE_SCALAPACK) 1781 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_scalapack_C", MatConvert_Dense_ScaLAPACK)); 1782 #endif 1783 #if defined(PETSC_HAVE_CUDA) 1784 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensecuda_C", MatConvert_MPIDense_MPIDenseCUDA)); 1785 #endif 1786 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIDenseSetPreallocation_C", MatMPIDenseSetPreallocation_MPIDense)); 1787 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense)); 1788 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaij_C", MatProductSetFromOptions_MPIDense_MPIAIJ)); 1789 #if defined(PETSC_HAVE_CUDA) 1790 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense)); 1791 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijcusparse_C", MatProductSetFromOptions_MPIDense_MPIAIJ)); 1792 #endif 1793 #if defined(PETSC_HAVE_HIP) 1794 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensehip_C", MatConvert_MPIDense_MPIDenseHIP)); 1795 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense)); 1796 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijhipsparse_C", MatProductSetFromOptions_MPIDense_MPIAIJ)); 1797 #endif 1798 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumn_C", MatDenseGetColumn_MPIDense)); 1799 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumn_C", MatDenseRestoreColumn_MPIDense)); 1800 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultColumnRange_C", MatMultColumnRange_MPIDense)); 1801 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultAddColumnRange_C", MatMultAddColumnRange_MPIDense)); 1802 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultHermitianTransposeColumnRange_C", MatMultHermitianTransposeColumnRange_MPIDense)); 1803 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultHermitianTransposeAddColumnRange_C", MatMultHermitianTransposeAddColumnRange_MPIDense)); 1804 PetscCall(PetscObjectChangeTypeName((PetscObject)mat, MATMPIDENSE)); 1805 PetscFunctionReturn(PETSC_SUCCESS); 1806 } 1807 1808 /*MC 1809 MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices. 1810 1811 This matrix type is identical to `MATSEQDENSE` when constructed with a single process communicator, 1812 and `MATMPIDENSE` otherwise. 1813 1814 Options Database Key: 1815 . -mat_type dense - sets the matrix type to `MATDENSE` during a call to `MatSetFromOptions()` 1816 1817 Level: beginner 1818 1819 .seealso: [](ch_matrices), `Mat`, `MATSEQDENSE`, `MATMPIDENSE`, `MATDENSECUDA`, `MATDENSEHIP` 1820 M*/ 1821 1822 /*@ 1823 MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries 1824 1825 Collective 1826 1827 Input Parameters: 1828 + B - the matrix 1829 - data - optional location of matrix data. Set to `NULL` for PETSc 1830 to control all matrix memory allocation. 1831 1832 Level: intermediate 1833 1834 Notes: 1835 The dense format is fully compatible with standard Fortran 1836 storage by columns. 1837 1838 The data input variable is intended primarily for Fortran programmers 1839 who wish to allocate their own matrix memory space. Most users should 1840 set `data` to `NULL`. 1841 1842 .seealso: [](ch_matrices), `Mat`, `MATMPIDENSE`, `MatCreate()`, `MatCreateSeqDense()`, `MatSetValues()` 1843 @*/ 1844 PetscErrorCode MatMPIDenseSetPreallocation(Mat B, PetscScalar *data) 1845 { 1846 PetscFunctionBegin; 1847 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 1848 PetscTryMethod(B, "MatMPIDenseSetPreallocation_C", (Mat, PetscScalar *), (B, data)); 1849 PetscFunctionReturn(PETSC_SUCCESS); 1850 } 1851 1852 /*@ 1853 MatDensePlaceArray - Allows one to replace the array in a `MATDENSE` matrix with an 1854 array provided by the user. This is useful to avoid copying an array 1855 into a matrix 1856 1857 Not Collective 1858 1859 Input Parameters: 1860 + mat - the matrix 1861 - array - the array in column major order 1862 1863 Level: developer 1864 1865 Note: 1866 Adding `const` to `array` was an oversight, see notes in `VecPlaceArray()`. 1867 1868 You can return to the original array with a call to `MatDenseResetArray()`. The user is responsible for freeing this array; it will not be 1869 freed when the matrix is destroyed. 1870 1871 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseGetArray()`, `MatDenseResetArray()`, `VecPlaceArray()`, `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`, `VecResetArray()`, 1872 `MatDenseReplaceArray()` 1873 @*/ 1874 PetscErrorCode MatDensePlaceArray(Mat mat, const PetscScalar *array) 1875 { 1876 PetscFunctionBegin; 1877 PetscValidHeaderSpecific(mat, MAT_CLASSID, 1); 1878 PetscUseMethod(mat, "MatDensePlaceArray_C", (Mat, const PetscScalar *), (mat, array)); 1879 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 1880 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 1881 mat->offloadmask = PETSC_OFFLOAD_CPU; 1882 #endif 1883 PetscFunctionReturn(PETSC_SUCCESS); 1884 } 1885 1886 /*@ 1887 MatDenseResetArray - Resets the matrix array to that it previously had before the call to `MatDensePlaceArray()` 1888 1889 Not Collective 1890 1891 Input Parameter: 1892 . mat - the matrix 1893 1894 Level: developer 1895 1896 Note: 1897 You can only call this after a call to `MatDensePlaceArray()` 1898 1899 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseGetArray()`, `MatDensePlaceArray()`, `VecPlaceArray()`, `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`, `VecResetArray()` 1900 @*/ 1901 PetscErrorCode MatDenseResetArray(Mat mat) 1902 { 1903 PetscFunctionBegin; 1904 PetscValidHeaderSpecific(mat, MAT_CLASSID, 1); 1905 PetscUseMethod(mat, "MatDenseResetArray_C", (Mat), (mat)); 1906 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 1907 PetscFunctionReturn(PETSC_SUCCESS); 1908 } 1909 1910 /*@ 1911 MatDenseReplaceArray - Allows one to replace the array in a dense matrix with an 1912 array provided by the user. This is useful to avoid copying an array 1913 into a matrix 1914 1915 Not Collective 1916 1917 Input Parameters: 1918 + mat - the matrix 1919 - array - the array in column major order 1920 1921 Level: developer 1922 1923 Note: 1924 Adding `const` to `array` was an oversight, see notes in `VecPlaceArray()`. 1925 1926 The memory passed in MUST be obtained with `PetscMalloc()` and CANNOT be 1927 freed by the user. It will be freed when the matrix is destroyed. 1928 1929 .seealso: [](ch_matrices), `Mat`, `MatDensePlaceArray()`, `MatDenseGetArray()`, `VecReplaceArray()` 1930 @*/ 1931 PetscErrorCode MatDenseReplaceArray(Mat mat, const PetscScalar *array) 1932 { 1933 PetscFunctionBegin; 1934 PetscValidHeaderSpecific(mat, MAT_CLASSID, 1); 1935 PetscUseMethod(mat, "MatDenseReplaceArray_C", (Mat, const PetscScalar *), (mat, array)); 1936 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 1937 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 1938 mat->offloadmask = PETSC_OFFLOAD_CPU; 1939 #endif 1940 PetscFunctionReturn(PETSC_SUCCESS); 1941 } 1942 1943 /*@ 1944 MatCreateDense - Creates a matrix in `MATDENSE` format. 1945 1946 Collective 1947 1948 Input Parameters: 1949 + comm - MPI communicator 1950 . m - number of local rows (or `PETSC_DECIDE` to have calculated if `M` is given) 1951 . n - number of local columns (or `PETSC_DECIDE` to have calculated if `N` is given) 1952 . M - number of global rows (or `PETSC_DECIDE` to have calculated if `m` is given) 1953 . N - number of global columns (or `PETSC_DECIDE` to have calculated if `n` is given) 1954 - data - optional location of matrix data. Set data to `NULL` (`PETSC_NULL_SCALAR_ARRAY` for Fortran users) for PETSc 1955 to control all matrix memory allocation. 1956 1957 Output Parameter: 1958 . A - the matrix 1959 1960 Level: intermediate 1961 1962 Notes: 1963 The dense format is fully compatible with standard Fortran 1964 storage by columns. 1965 1966 Although local portions of the matrix are stored in column-major 1967 order, the matrix is partitioned across MPI ranks by row. 1968 1969 The data input variable is intended primarily for Fortran programmers 1970 who wish to allocate their own matrix memory space. Most users should 1971 set `data` to `NULL` (`PETSC_NULL_SCALAR_ARRAY` for Fortran users). 1972 1973 The user MUST specify either the local or global matrix dimensions 1974 (possibly both). 1975 1976 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatCreate()`, `MatCreateSeqDense()`, `MatSetValues()` 1977 @*/ 1978 PetscErrorCode MatCreateDense(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscScalar data[], Mat *A) 1979 { 1980 PetscFunctionBegin; 1981 PetscCall(MatCreate(comm, A)); 1982 PetscCall(MatSetSizes(*A, m, n, M, N)); 1983 PetscCall(MatSetType(*A, MATDENSE)); 1984 PetscCall(MatSeqDenseSetPreallocation(*A, data)); 1985 PetscCall(MatMPIDenseSetPreallocation(*A, data)); 1986 PetscFunctionReturn(PETSC_SUCCESS); 1987 } 1988 1989 static PetscErrorCode MatDuplicate_MPIDense(Mat A, MatDuplicateOption cpvalues, Mat *newmat) 1990 { 1991 Mat mat; 1992 Mat_MPIDense *a, *oldmat = (Mat_MPIDense *)A->data; 1993 1994 PetscFunctionBegin; 1995 *newmat = NULL; 1996 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat)); 1997 PetscCall(MatSetSizes(mat, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 1998 PetscCall(MatSetType(mat, ((PetscObject)A)->type_name)); 1999 a = (Mat_MPIDense *)mat->data; 2000 2001 mat->factortype = A->factortype; 2002 mat->assembled = PETSC_TRUE; 2003 mat->preallocated = PETSC_TRUE; 2004 2005 mat->insertmode = NOT_SET_VALUES; 2006 a->donotstash = oldmat->donotstash; 2007 2008 PetscCall(PetscLayoutReference(A->rmap, &mat->rmap)); 2009 PetscCall(PetscLayoutReference(A->cmap, &mat->cmap)); 2010 2011 PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A)); 2012 2013 *newmat = mat; 2014 PetscFunctionReturn(PETSC_SUCCESS); 2015 } 2016 2017 static PetscErrorCode MatLoad_MPIDense(Mat newMat, PetscViewer viewer) 2018 { 2019 PetscBool isbinary; 2020 #if defined(PETSC_HAVE_HDF5) 2021 PetscBool ishdf5; 2022 #endif 2023 2024 PetscFunctionBegin; 2025 PetscValidHeaderSpecific(newMat, MAT_CLASSID, 1); 2026 PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2); 2027 /* force binary viewer to load .info file if it has not yet done so */ 2028 PetscCall(PetscViewerSetUp(viewer)); 2029 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 2030 #if defined(PETSC_HAVE_HDF5) 2031 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5)); 2032 #endif 2033 if (isbinary) { 2034 PetscCall(MatLoad_Dense_Binary(newMat, viewer)); 2035 #if defined(PETSC_HAVE_HDF5) 2036 } else if (ishdf5) { 2037 PetscCall(MatLoad_Dense_HDF5(newMat, viewer)); 2038 #endif 2039 } else SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name); 2040 PetscFunctionReturn(PETSC_SUCCESS); 2041 } 2042 2043 static PetscErrorCode MatEqual_MPIDense(Mat A, Mat B, PetscBool *flag) 2044 { 2045 Mat_MPIDense *matB = (Mat_MPIDense *)B->data, *matA = (Mat_MPIDense *)A->data; 2046 Mat a, b; 2047 2048 PetscFunctionBegin; 2049 a = matA->A; 2050 b = matB->A; 2051 PetscCall(MatEqual(a, b, flag)); 2052 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A))); 2053 PetscFunctionReturn(PETSC_SUCCESS); 2054 } 2055 2056 static PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(void *data) 2057 { 2058 Mat_TransMatMultDense *atb = (Mat_TransMatMultDense *)data; 2059 2060 PetscFunctionBegin; 2061 PetscCall(PetscFree2(atb->sendbuf, atb->recvcounts)); 2062 PetscCall(MatDestroy(&atb->atb)); 2063 PetscCall(PetscFree(atb)); 2064 PetscFunctionReturn(PETSC_SUCCESS); 2065 } 2066 2067 static PetscErrorCode MatDestroy_MatMatTransMult_MPIDense_MPIDense(void *data) 2068 { 2069 Mat_MatTransMultDense *abt = (Mat_MatTransMultDense *)data; 2070 2071 PetscFunctionBegin; 2072 PetscCall(PetscFree2(abt->buf[0], abt->buf[1])); 2073 PetscCall(PetscFree2(abt->recvcounts, abt->recvdispls)); 2074 PetscCall(PetscFree(abt)); 2075 PetscFunctionReturn(PETSC_SUCCESS); 2076 } 2077 2078 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C) 2079 { 2080 Mat_MPIDense *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data; 2081 Mat_TransMatMultDense *atb; 2082 MPI_Comm comm; 2083 PetscMPIInt size, *recvcounts; 2084 PetscScalar *carray, *sendbuf; 2085 const PetscScalar *atbarray; 2086 PetscInt i, cN = C->cmap->N, proc, k, j, lda; 2087 const PetscInt *ranges; 2088 2089 PetscFunctionBegin; 2090 MatCheckProduct(C, 3); 2091 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 2092 atb = (Mat_TransMatMultDense *)C->product->data; 2093 recvcounts = atb->recvcounts; 2094 sendbuf = atb->sendbuf; 2095 2096 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 2097 PetscCallMPI(MPI_Comm_size(comm, &size)); 2098 2099 /* compute atbarray = aseq^T * bseq */ 2100 PetscCall(MatTransposeMatMult(a->A, b->A, atb->atb ? MAT_REUSE_MATRIX : MAT_INITIAL_MATRIX, PETSC_DETERMINE, &atb->atb)); 2101 2102 PetscCall(MatGetOwnershipRanges(C, &ranges)); 2103 2104 if (ranges[1] == C->rmap->N) { 2105 /* all of the values are being reduced to rank 0: optimize this case to use MPI_Reduce and GPU aware MPI if available */ 2106 PetscInt atb_lda, c_lda; 2107 Mat atb_local = atb->atb; 2108 Mat atb_alloc = NULL; 2109 Mat c_local = c->A; 2110 Mat c_alloc = NULL; 2111 PetscMemType atb_memtype, c_memtype; 2112 const PetscScalar *atb_array = NULL; 2113 MPI_Datatype vector_type; 2114 PetscScalar *c_array = NULL; 2115 PetscMPIInt rank; 2116 2117 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 2118 2119 PetscCall(MatDenseGetLDA(atb_local, &atb_lda)); 2120 if (atb_lda != C->rmap->N) { 2121 // copy atb to a matrix that will have lda == the number of rows 2122 PetscCall(MatDuplicate(atb_local, MAT_DO_NOT_COPY_VALUES, &atb_alloc)); 2123 PetscCall(MatCopy(atb_local, atb_alloc, DIFFERENT_NONZERO_PATTERN)); 2124 atb_local = atb_alloc; 2125 } 2126 2127 if (rank == 0) { 2128 PetscCall(MatDenseGetLDA(c_local, &c_lda)); 2129 if (c_lda != C->rmap->N) { 2130 // copy c to a matrix that will have lda == the number of rows 2131 PetscCall(MatDuplicate(c_local, MAT_DO_NOT_COPY_VALUES, &c_alloc)); 2132 c_local = c_alloc; 2133 } 2134 PetscCall(MatZeroEntries(c_local)); 2135 } 2136 /* atb_local and c_local have nrows = lda = A->cmap->N and ncols = 2137 * B->cmap->N: use the a->Mvctx to use the best reduction method */ 2138 if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A)); 2139 vector_type = MPIU_SCALAR; 2140 if (B->cmap->N > 1) { 2141 PetscMPIInt mpi_N; 2142 2143 PetscCall(PetscMPIIntCast(B->cmap->N, &mpi_N)); 2144 PetscCallMPI(MPI_Type_contiguous(mpi_N, MPIU_SCALAR, &vector_type)); 2145 PetscCallMPI(MPI_Type_commit(&vector_type)); 2146 } 2147 PetscCall(MatDenseGetArrayReadAndMemType(atb_local, &atb_array, &atb_memtype)); 2148 PetscCall(MatDenseGetArrayWriteAndMemType(c_local, &c_array, &c_memtype)); 2149 PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, vector_type, atb_memtype, atb_array, c_memtype, c_array, MPIU_SUM)); 2150 PetscCall(PetscSFReduceEnd(a->Mvctx, vector_type, atb_array, c_array, MPIU_SUM)); 2151 PetscCall(MatDenseRestoreArrayWriteAndMemType(c_local, &c_array)); 2152 PetscCall(MatDenseRestoreArrayReadAndMemType(atb_local, &atb_array)); 2153 if (rank == 0 && c_local != c->A) PetscCall(MatCopy(c_local, c->A, DIFFERENT_NONZERO_PATTERN)); 2154 if (B->cmap->N > 1) PetscCallMPI(MPI_Type_free(&vector_type)); 2155 PetscCall(MatDestroy(&atb_alloc)); 2156 PetscCall(MatDestroy(&c_alloc)); 2157 PetscCall(MatSetOption(C, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 2158 PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY)); 2159 PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY)); 2160 PetscFunctionReturn(PETSC_SUCCESS); 2161 } 2162 2163 /* arrange atbarray into sendbuf */ 2164 PetscCall(MatDenseGetArrayRead(atb->atb, &atbarray)); 2165 PetscCall(MatDenseGetLDA(atb->atb, &lda)); 2166 for (proc = 0, k = 0; proc < size; proc++) { 2167 for (j = 0; j < cN; j++) { 2168 for (i = ranges[proc]; i < ranges[proc + 1]; i++) sendbuf[k++] = atbarray[i + j * lda]; 2169 } 2170 } 2171 PetscCall(MatDenseRestoreArrayRead(atb->atb, &atbarray)); 2172 2173 /* sum all atbarray to local values of C */ 2174 PetscCall(MatDenseGetArrayWrite(c->A, &carray)); 2175 PetscCallMPI(MPI_Reduce_scatter(sendbuf, carray, recvcounts, MPIU_SCALAR, MPIU_SUM, comm)); 2176 PetscCall(MatDenseRestoreArrayWrite(c->A, &carray)); 2177 PetscCall(MatSetOption(C, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 2178 PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY)); 2179 PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY)); 2180 PetscFunctionReturn(PETSC_SUCCESS); 2181 } 2182 2183 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C) 2184 { 2185 MPI_Comm comm; 2186 PetscMPIInt size; 2187 PetscInt cm = A->cmap->n, cM, cN = B->cmap->N; 2188 Mat_TransMatMultDense *atb; 2189 PetscBool cisdense = PETSC_FALSE; 2190 const PetscInt *ranges; 2191 2192 PetscFunctionBegin; 2193 MatCheckProduct(C, 4); 2194 PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty"); 2195 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 2196 if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) { 2197 SETERRQ(comm, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend); 2198 } 2199 2200 /* create matrix product C */ 2201 PetscCall(MatSetSizes(C, cm, B->cmap->n, A->cmap->N, B->cmap->N)); 2202 #if defined(PETSC_HAVE_CUDA) 2203 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSECUDA, "")); 2204 #endif 2205 #if defined(PETSC_HAVE_HIP) 2206 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSEHIP, "")); 2207 #endif 2208 if (!cisdense) PetscCall(MatSetType(C, ((PetscObject)A)->type_name)); 2209 PetscCall(MatSetUp(C)); 2210 2211 /* create data structure for reuse C */ 2212 PetscCallMPI(MPI_Comm_size(comm, &size)); 2213 PetscCall(PetscNew(&atb)); 2214 cM = C->rmap->N; 2215 PetscCall(PetscMalloc2(cM * cN, &atb->sendbuf, size, &atb->recvcounts)); 2216 PetscCall(MatGetOwnershipRanges(C, &ranges)); 2217 for (PetscMPIInt i = 0; i < size; i++) PetscCall(PetscMPIIntCast((ranges[i + 1] - ranges[i]) * cN, &atb->recvcounts[i])); 2218 C->product->data = atb; 2219 C->product->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense; 2220 PetscFunctionReturn(PETSC_SUCCESS); 2221 } 2222 2223 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C) 2224 { 2225 MPI_Comm comm; 2226 PetscMPIInt i, size; 2227 PetscInt maxRows, bufsiz; 2228 PetscMPIInt tag; 2229 PetscInt alg; 2230 Mat_MatTransMultDense *abt; 2231 Mat_Product *product = C->product; 2232 PetscBool flg; 2233 2234 PetscFunctionBegin; 2235 MatCheckProduct(C, 4); 2236 PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty"); 2237 /* check local size of A and B */ 2238 PetscCheck(A->cmap->n == B->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local column dimensions are incompatible, A (%" PetscInt_FMT ") != B (%" PetscInt_FMT ")", A->cmap->n, B->cmap->n); 2239 2240 PetscCall(PetscStrcmp(product->alg, "allgatherv", &flg)); 2241 alg = flg ? 0 : 1; 2242 2243 /* setup matrix product C */ 2244 PetscCall(MatSetSizes(C, A->rmap->n, B->rmap->n, A->rmap->N, B->rmap->N)); 2245 PetscCall(MatSetType(C, MATMPIDENSE)); 2246 PetscCall(MatSetUp(C)); 2247 PetscCall(PetscObjectGetNewTag((PetscObject)C, &tag)); 2248 2249 /* create data structure for reuse C */ 2250 PetscCall(PetscObjectGetComm((PetscObject)C, &comm)); 2251 PetscCallMPI(MPI_Comm_size(comm, &size)); 2252 PetscCall(PetscNew(&abt)); 2253 abt->tag = tag; 2254 abt->alg = alg; 2255 switch (alg) { 2256 case 1: /* alg: "cyclic" */ 2257 for (maxRows = 0, i = 0; i < size; i++) maxRows = PetscMax(maxRows, B->rmap->range[i + 1] - B->rmap->range[i]); 2258 bufsiz = A->cmap->N * maxRows; 2259 PetscCall(PetscMalloc2(bufsiz, &abt->buf[0], bufsiz, &abt->buf[1])); 2260 break; 2261 default: /* alg: "allgatherv" */ 2262 PetscCall(PetscMalloc2(B->rmap->n * B->cmap->N, &abt->buf[0], B->rmap->N * B->cmap->N, &abt->buf[1])); 2263 PetscCall(PetscMalloc2(size, &abt->recvcounts, size + 1, &abt->recvdispls)); 2264 for (i = 0; i <= size; i++) PetscCall(PetscMPIIntCast(B->rmap->range[i] * A->cmap->N, &abt->recvdispls[i])); 2265 for (i = 0; i < size; i++) PetscCall(PetscMPIIntCast(abt->recvdispls[i + 1] - abt->recvdispls[i], &abt->recvcounts[i])); 2266 break; 2267 } 2268 2269 C->product->data = abt; 2270 C->product->destroy = MatDestroy_MatMatTransMult_MPIDense_MPIDense; 2271 C->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_MPIDense_MPIDense; 2272 PetscFunctionReturn(PETSC_SUCCESS); 2273 } 2274 2275 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(Mat A, Mat B, Mat C) 2276 { 2277 Mat_MPIDense *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data; 2278 Mat_MatTransMultDense *abt; 2279 MPI_Comm comm; 2280 PetscMPIInt rank, size, sendto, recvfrom, recvisfrom; 2281 PetscScalar *sendbuf, *recvbuf = NULL, *cv; 2282 PetscInt i, cK = A->cmap->N, sendsiz, recvsiz, k, j, bn; 2283 PetscScalar _DOne = 1.0, _DZero = 0.0; 2284 const PetscScalar *av, *bv; 2285 PetscBLASInt cm, cn, ck, alda, blda = 0, clda; 2286 MPI_Request reqs[2]; 2287 const PetscInt *ranges; 2288 2289 PetscFunctionBegin; 2290 MatCheckProduct(C, 3); 2291 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 2292 abt = (Mat_MatTransMultDense *)C->product->data; 2293 PetscCall(PetscObjectGetComm((PetscObject)C, &comm)); 2294 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 2295 PetscCallMPI(MPI_Comm_size(comm, &size)); 2296 PetscCall(MatDenseGetArrayRead(a->A, &av)); 2297 PetscCall(MatDenseGetArrayRead(b->A, &bv)); 2298 PetscCall(MatDenseGetArrayWrite(c->A, &cv)); 2299 PetscCall(MatDenseGetLDA(a->A, &i)); 2300 PetscCall(PetscBLASIntCast(i, &alda)); 2301 PetscCall(MatDenseGetLDA(b->A, &i)); 2302 PetscCall(PetscBLASIntCast(i, &blda)); 2303 PetscCall(MatDenseGetLDA(c->A, &i)); 2304 PetscCall(PetscBLASIntCast(i, &clda)); 2305 PetscCall(MatGetOwnershipRanges(B, &ranges)); 2306 bn = B->rmap->n; 2307 if (blda == bn) { 2308 sendbuf = (PetscScalar *)bv; 2309 } else { 2310 sendbuf = abt->buf[0]; 2311 for (k = 0, i = 0; i < cK; i++) { 2312 for (j = 0; j < bn; j++, k++) sendbuf[k] = bv[i * blda + j]; 2313 } 2314 } 2315 if (size > 1) { 2316 sendto = (rank + size - 1) % size; 2317 recvfrom = (rank + size + 1) % size; 2318 } else { 2319 sendto = recvfrom = 0; 2320 } 2321 PetscCall(PetscBLASIntCast(cK, &ck)); 2322 PetscCall(PetscBLASIntCast(c->A->rmap->n, &cm)); 2323 recvisfrom = rank; 2324 for (i = 0; i < size; i++) { 2325 /* we have finished receiving in sending, bufs can be read/modified */ 2326 PetscMPIInt nextrecvisfrom = (recvisfrom + 1) % size; /* which process the next recvbuf will originate on */ 2327 PetscInt nextbn = ranges[nextrecvisfrom + 1] - ranges[nextrecvisfrom]; 2328 2329 if (nextrecvisfrom != rank) { 2330 /* start the cyclic sends from sendbuf, to recvbuf (which will switch to sendbuf) */ 2331 sendsiz = cK * bn; 2332 recvsiz = cK * nextbn; 2333 recvbuf = (i & 1) ? abt->buf[0] : abt->buf[1]; 2334 PetscCallMPI(MPIU_Isend(sendbuf, sendsiz, MPIU_SCALAR, sendto, abt->tag, comm, &reqs[0])); 2335 PetscCallMPI(MPIU_Irecv(recvbuf, recvsiz, MPIU_SCALAR, recvfrom, abt->tag, comm, &reqs[1])); 2336 } 2337 2338 /* local aseq * sendbuf^T */ 2339 PetscCall(PetscBLASIntCast(ranges[recvisfrom + 1] - ranges[recvisfrom], &cn)); 2340 if (cm && cn && ck) PetscCallBLAS("BLASgemm", BLASgemm_("N", "T", &cm, &cn, &ck, &_DOne, av, &alda, sendbuf, &cn, &_DZero, cv + clda * ranges[recvisfrom], &clda)); 2341 2342 if (nextrecvisfrom != rank) { 2343 /* wait for the sends and receives to complete, swap sendbuf and recvbuf */ 2344 PetscCallMPI(MPI_Waitall(2, reqs, MPI_STATUSES_IGNORE)); 2345 } 2346 bn = nextbn; 2347 recvisfrom = nextrecvisfrom; 2348 sendbuf = recvbuf; 2349 } 2350 PetscCall(MatDenseRestoreArrayRead(a->A, &av)); 2351 PetscCall(MatDenseRestoreArrayRead(b->A, &bv)); 2352 PetscCall(MatDenseRestoreArrayWrite(c->A, &cv)); 2353 PetscCall(MatSetOption(C, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 2354 PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY)); 2355 PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY)); 2356 PetscFunctionReturn(PETSC_SUCCESS); 2357 } 2358 2359 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(Mat A, Mat B, Mat C) 2360 { 2361 Mat_MPIDense *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data; 2362 Mat_MatTransMultDense *abt; 2363 MPI_Comm comm; 2364 PetscMPIInt size, ibn; 2365 PetscScalar *cv, *sendbuf, *recvbuf; 2366 const PetscScalar *av, *bv; 2367 PetscInt blda, i, cK = A->cmap->N, k, j, bn; 2368 PetscScalar _DOne = 1.0, _DZero = 0.0; 2369 PetscBLASInt cm, cn, ck, alda, clda; 2370 2371 PetscFunctionBegin; 2372 MatCheckProduct(C, 3); 2373 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 2374 abt = (Mat_MatTransMultDense *)C->product->data; 2375 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 2376 PetscCallMPI(MPI_Comm_size(comm, &size)); 2377 PetscCall(MatDenseGetArrayRead(a->A, &av)); 2378 PetscCall(MatDenseGetArrayRead(b->A, &bv)); 2379 PetscCall(MatDenseGetArrayWrite(c->A, &cv)); 2380 PetscCall(MatDenseGetLDA(a->A, &i)); 2381 PetscCall(PetscBLASIntCast(i, &alda)); 2382 PetscCall(MatDenseGetLDA(b->A, &blda)); 2383 PetscCall(MatDenseGetLDA(c->A, &i)); 2384 PetscCall(PetscBLASIntCast(i, &clda)); 2385 /* copy transpose of B into buf[0] */ 2386 bn = B->rmap->n; 2387 sendbuf = abt->buf[0]; 2388 recvbuf = abt->buf[1]; 2389 for (k = 0, j = 0; j < bn; j++) { 2390 for (i = 0; i < cK; i++, k++) sendbuf[k] = bv[i * blda + j]; 2391 } 2392 PetscCall(MatDenseRestoreArrayRead(b->A, &bv)); 2393 PetscCall(PetscMPIIntCast(bn * cK, &ibn)); 2394 PetscCallMPI(MPI_Allgatherv(sendbuf, ibn, MPIU_SCALAR, recvbuf, abt->recvcounts, abt->recvdispls, MPIU_SCALAR, comm)); 2395 PetscCall(PetscBLASIntCast(cK, &ck)); 2396 PetscCall(PetscBLASIntCast(c->A->rmap->n, &cm)); 2397 PetscCall(PetscBLASIntCast(c->A->cmap->n, &cn)); 2398 if (cm && cn && ck) PetscCallBLAS("BLASgemm", BLASgemm_("N", "N", &cm, &cn, &ck, &_DOne, av, &alda, recvbuf, &ck, &_DZero, cv, &clda)); 2399 PetscCall(MatDenseRestoreArrayRead(a->A, &av)); 2400 PetscCall(MatDenseRestoreArrayRead(b->A, &bv)); 2401 PetscCall(MatDenseRestoreArrayWrite(c->A, &cv)); 2402 PetscCall(MatSetOption(C, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 2403 PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY)); 2404 PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY)); 2405 PetscFunctionReturn(PETSC_SUCCESS); 2406 } 2407 2408 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C) 2409 { 2410 Mat_MatTransMultDense *abt; 2411 2412 PetscFunctionBegin; 2413 MatCheckProduct(C, 3); 2414 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 2415 abt = (Mat_MatTransMultDense *)C->product->data; 2416 switch (abt->alg) { 2417 case 1: 2418 PetscCall(MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(A, B, C)); 2419 break; 2420 default: 2421 PetscCall(MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(A, B, C)); 2422 break; 2423 } 2424 PetscFunctionReturn(PETSC_SUCCESS); 2425 } 2426 2427 static PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(void *data) 2428 { 2429 Mat_MatMultDense *ab = (Mat_MatMultDense *)data; 2430 2431 PetscFunctionBegin; 2432 PetscCall(MatDestroy(&ab->Ce)); 2433 PetscCall(MatDestroy(&ab->Ae)); 2434 PetscCall(MatDestroy(&ab->Be)); 2435 PetscCall(PetscFree(ab)); 2436 PetscFunctionReturn(PETSC_SUCCESS); 2437 } 2438 2439 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C) 2440 { 2441 Mat_MatMultDense *ab; 2442 Mat_MPIDense *mdn = (Mat_MPIDense *)A->data; 2443 Mat_MPIDense *b = (Mat_MPIDense *)B->data; 2444 2445 PetscFunctionBegin; 2446 MatCheckProduct(C, 3); 2447 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Missing product data"); 2448 ab = (Mat_MatMultDense *)C->product->data; 2449 if (ab->Ae && ab->Ce) { 2450 #if PetscDefined(HAVE_ELEMENTAL) 2451 PetscCall(MatConvert_MPIDense_Elemental(A, MATELEMENTAL, MAT_REUSE_MATRIX, &ab->Ae)); 2452 PetscCall(MatConvert_MPIDense_Elemental(B, MATELEMENTAL, MAT_REUSE_MATRIX, &ab->Be)); 2453 PetscCall(MatMatMultNumeric_Elemental(ab->Ae, ab->Be, ab->Ce)); 2454 PetscCall(MatConvert(ab->Ce, MATMPIDENSE, MAT_REUSE_MATRIX, &C)); 2455 #else 2456 SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "PETSC_HAVE_ELEMENTAL not defined"); 2457 #endif 2458 } else { 2459 MPI_Comm comm; 2460 const PetscScalar *read; 2461 PetscScalar *write; 2462 PetscInt lda; 2463 const PetscInt *ranges; 2464 PetscMPIInt size; 2465 2466 if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A)); /* cannot be done during the symbolic phase because of possible calls to MatProductReplaceMats() */ 2467 comm = PetscObjectComm((PetscObject)B); 2468 PetscCallMPI(MPI_Comm_size(comm, &size)); 2469 PetscCall(PetscLayoutGetRanges(B->rmap, &ranges)); 2470 if (ranges[1] == ranges[size]) { 2471 // optimize for the case where the B matrix is broadcast from rank 0 2472 PetscInt b_lda, be_lda; 2473 Mat b_local = b->A; 2474 Mat b_alloc = NULL; 2475 Mat be_local = ab->Be; 2476 Mat be_alloc = NULL; 2477 PetscMemType b_memtype, be_memtype; 2478 const PetscScalar *b_array = NULL; 2479 MPI_Datatype vector_type; 2480 PetscScalar *be_array = NULL; 2481 PetscMPIInt rank; 2482 2483 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 2484 PetscCall(MatDenseGetLDA(be_local, &be_lda)); 2485 if (be_lda != B->rmap->N) { 2486 PetscCall(MatDuplicate(be_local, MAT_DO_NOT_COPY_VALUES, &be_alloc)); 2487 be_local = be_alloc; 2488 } 2489 2490 if (rank == 0) { 2491 PetscCall(MatDenseGetLDA(b_local, &b_lda)); 2492 if (b_lda != B->rmap->N) { 2493 PetscCall(MatDuplicate(b_local, MAT_DO_NOT_COPY_VALUES, &b_alloc)); 2494 PetscCall(MatCopy(b_local, b_alloc, DIFFERENT_NONZERO_PATTERN)); 2495 b_local = b_alloc; 2496 } 2497 } 2498 vector_type = MPIU_SCALAR; 2499 if (B->cmap->N > 1) { 2500 PetscMPIInt mpi_N; 2501 2502 PetscCall(PetscMPIIntCast(B->cmap->N, &mpi_N)); 2503 PetscCallMPI(MPI_Type_contiguous(mpi_N, MPIU_SCALAR, &vector_type)); 2504 PetscCallMPI(MPI_Type_commit(&vector_type)); 2505 } 2506 PetscCall(MatDenseGetArrayReadAndMemType(b_local, &b_array, &b_memtype)); 2507 PetscCall(MatDenseGetArrayWriteAndMemType(be_local, &be_array, &be_memtype)); 2508 PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, vector_type, b_memtype, b_array, be_memtype, be_array, MPI_REPLACE)); 2509 PetscCall(PetscSFBcastEnd(mdn->Mvctx, vector_type, b_array, be_array, MPI_REPLACE)); 2510 PetscCall(MatDenseRestoreArrayWriteAndMemType(be_local, &be_array)); 2511 PetscCall(MatDenseRestoreArrayReadAndMemType(b_local, &b_array)); 2512 if (be_local != ab->Be) PetscCall(MatCopy(be_local, ab->Be, DIFFERENT_NONZERO_PATTERN)); 2513 if (B->cmap->N > 1) PetscCallMPI(MPI_Type_free(&vector_type)); 2514 PetscCall(MatDestroy(&be_alloc)); 2515 PetscCall(MatDestroy(&b_alloc)); 2516 } else { 2517 PetscCall(MatDenseGetLDA(B, &lda)); 2518 PetscCall(MatDenseGetArrayRead(B, &read)); 2519 PetscCall(MatDenseGetArrayWrite(ab->Be, &write)); 2520 for (PetscInt i = 0; i < C->cmap->N; ++i) { 2521 PetscCall(PetscSFBcastBegin(mdn->Mvctx, MPIU_SCALAR, read + i * lda, write + i * ab->Be->rmap->n, MPI_REPLACE)); 2522 PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, read + i * lda, write + i * ab->Be->rmap->n, MPI_REPLACE)); 2523 } 2524 PetscCall(MatDenseRestoreArrayWrite(ab->Be, &write)); 2525 PetscCall(MatDenseRestoreArrayRead(B, &read)); 2526 } 2527 PetscCall(MatMatMultNumeric_SeqDense_SeqDense(((Mat_MPIDense *)A->data)->A, ab->Be, ((Mat_MPIDense *)C->data)->A)); 2528 } 2529 PetscFunctionReturn(PETSC_SUCCESS); 2530 } 2531 2532 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C) 2533 { 2534 Mat_Product *product = C->product; 2535 PetscInt alg; 2536 Mat_MatMultDense *ab; 2537 PetscBool flg; 2538 2539 PetscFunctionBegin; 2540 MatCheckProduct(C, 4); 2541 PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty"); 2542 /* check local size of A and B */ 2543 PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ", %" PetscInt_FMT ")", 2544 A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend); 2545 2546 PetscCall(PetscStrcmp(product->alg, "petsc", &flg)); 2547 alg = flg ? 0 : 1; 2548 2549 /* setup C */ 2550 PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, A->rmap->N, B->cmap->N)); 2551 PetscCall(MatSetType(C, MATMPIDENSE)); 2552 PetscCall(MatSetUp(C)); 2553 2554 /* create data structure for reuse Cdense */ 2555 PetscCall(PetscNew(&ab)); 2556 2557 switch (alg) { 2558 case 1: /* alg: "elemental" */ 2559 #if PetscDefined(HAVE_ELEMENTAL) 2560 /* create elemental matrices Ae and Be */ 2561 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &ab->Ae)); 2562 PetscCall(MatSetSizes(ab->Ae, PETSC_DECIDE, PETSC_DECIDE, A->rmap->N, A->cmap->N)); 2563 PetscCall(MatSetType(ab->Ae, MATELEMENTAL)); 2564 PetscCall(MatSetUp(ab->Ae)); 2565 PetscCall(MatSetOption(ab->Ae, MAT_ROW_ORIENTED, PETSC_FALSE)); 2566 2567 PetscCall(MatCreate(PetscObjectComm((PetscObject)B), &ab->Be)); 2568 PetscCall(MatSetSizes(ab->Be, PETSC_DECIDE, PETSC_DECIDE, B->rmap->N, B->cmap->N)); 2569 PetscCall(MatSetType(ab->Be, MATELEMENTAL)); 2570 PetscCall(MatSetUp(ab->Be)); 2571 PetscCall(MatSetOption(ab->Be, MAT_ROW_ORIENTED, PETSC_FALSE)); 2572 2573 /* compute symbolic Ce = Ae*Be */ 2574 PetscCall(MatCreate(PetscObjectComm((PetscObject)C), &ab->Ce)); 2575 PetscCall(MatMatMultSymbolic_Elemental(ab->Ae, ab->Be, fill, ab->Ce)); 2576 #else 2577 SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "PETSC_HAVE_ELEMENTAL not defined"); 2578 #endif 2579 break; 2580 default: /* alg: "petsc" */ 2581 ab->Ae = NULL; 2582 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, A->cmap->N, B->cmap->N, NULL, &ab->Be)); 2583 ab->Ce = NULL; 2584 break; 2585 } 2586 2587 C->product->data = ab; 2588 C->product->destroy = MatDestroy_MatMatMult_MPIDense_MPIDense; 2589 C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense; 2590 PetscFunctionReturn(PETSC_SUCCESS); 2591 } 2592 2593 static PetscErrorCode MatProductSetFromOptions_MPIDense_AB(Mat C) 2594 { 2595 Mat_Product *product = C->product; 2596 const char *algTypes[2] = {"petsc", "elemental"}; 2597 PetscInt alg, nalg = PetscDefined(HAVE_ELEMENTAL) ? 2 : 1; 2598 PetscBool flg = PETSC_FALSE; 2599 2600 PetscFunctionBegin; 2601 /* Set default algorithm */ 2602 alg = 0; /* default is PETSc */ 2603 PetscCall(PetscStrcmp(product->alg, "default", &flg)); 2604 if (flg) PetscCall(MatProductSetAlgorithm(C, algTypes[alg])); 2605 2606 /* Get runtime option */ 2607 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_AB", "Mat"); 2608 PetscCall(PetscOptionsEList("-mat_product_algorithm", "Algorithmic approach", "MatProduct_AB", algTypes, nalg, algTypes[alg], &alg, &flg)); 2609 PetscOptionsEnd(); 2610 if (flg) PetscCall(MatProductSetAlgorithm(C, algTypes[alg])); 2611 2612 C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIDense; 2613 C->ops->productsymbolic = MatProductSymbolic_AB; 2614 PetscFunctionReturn(PETSC_SUCCESS); 2615 } 2616 2617 static PetscErrorCode MatProductSetFromOptions_MPIDense_AtB(Mat C) 2618 { 2619 Mat_Product *product = C->product; 2620 Mat A = product->A, B = product->B; 2621 2622 PetscFunctionBegin; 2623 PetscCheck(A->rmap->rstart == B->rmap->rstart && A->rmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", 2624 A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend); 2625 C->ops->transposematmultsymbolic = MatTransposeMatMultSymbolic_MPIDense_MPIDense; 2626 C->ops->productsymbolic = MatProductSymbolic_AtB; 2627 PetscFunctionReturn(PETSC_SUCCESS); 2628 } 2629 2630 static PetscErrorCode MatProductSetFromOptions_MPIDense_ABt(Mat C) 2631 { 2632 Mat_Product *product = C->product; 2633 const char *algTypes[2] = {"allgatherv", "cyclic"}; 2634 PetscInt alg, nalg = 2; 2635 PetscBool flg = PETSC_FALSE; 2636 2637 PetscFunctionBegin; 2638 /* Set default algorithm */ 2639 alg = 0; /* default is allgatherv */ 2640 PetscCall(PetscStrcmp(product->alg, "default", &flg)); 2641 if (flg) PetscCall(MatProductSetAlgorithm(C, algTypes[alg])); 2642 2643 /* Get runtime option */ 2644 if (product->api_user) { 2645 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatMatTransposeMult", "Mat"); 2646 PetscCall(PetscOptionsEList("-matmattransmult_mpidense_mpidense_via", "Algorithmic approach", "MatMatTransposeMult", algTypes, nalg, algTypes[alg], &alg, &flg)); 2647 PetscOptionsEnd(); 2648 } else { 2649 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_ABt", "Mat"); 2650 PetscCall(PetscOptionsEList("-mat_product_algorithm", "Algorithmic approach", "MatProduct_ABt", algTypes, nalg, algTypes[alg], &alg, &flg)); 2651 PetscOptionsEnd(); 2652 } 2653 if (flg) PetscCall(MatProductSetAlgorithm(C, algTypes[alg])); 2654 2655 C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_MPIDense_MPIDense; 2656 C->ops->productsymbolic = MatProductSymbolic_ABt; 2657 PetscFunctionReturn(PETSC_SUCCESS); 2658 } 2659 2660 static PetscErrorCode MatProductSetFromOptions_MPIDense(Mat C) 2661 { 2662 Mat_Product *product = C->product; 2663 2664 PetscFunctionBegin; 2665 switch (product->type) { 2666 case MATPRODUCT_AB: 2667 PetscCall(MatProductSetFromOptions_MPIDense_AB(C)); 2668 break; 2669 case MATPRODUCT_AtB: 2670 PetscCall(MatProductSetFromOptions_MPIDense_AtB(C)); 2671 break; 2672 case MATPRODUCT_ABt: 2673 PetscCall(MatProductSetFromOptions_MPIDense_ABt(C)); 2674 break; 2675 default: 2676 break; 2677 } 2678 PetscFunctionReturn(PETSC_SUCCESS); 2679 } 2680