1 /* 2 Basic functions for basic parallel dense matrices. 3 Portions of this code are under: 4 Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. 5 */ 6 7 #include <../src/mat/impls/dense/mpi/mpidense.h> /*I "petscmat.h" I*/ 8 #include <../src/mat/impls/aij/mpi/mpiaij.h> 9 #include <petscblaslapack.h> 10 #include <petsc/private/vecimpl.h> 11 #include <petscdevice.h> 12 #include <petsc/private/deviceimpl.h> 13 14 /*@ 15 MatDenseGetLocalMatrix - For a `MATMPIDENSE` or `MATSEQDENSE` matrix returns the sequential 16 matrix that represents the operator. For sequential matrices it returns itself. 17 18 Input Parameter: 19 . A - the sequential or MPI `MATDENSE` matrix 20 21 Output Parameter: 22 . B - the inner matrix 23 24 Level: intermediate 25 26 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MATMPIDENSE`, `MATSEQDENSE` 27 @*/ 28 PetscErrorCode MatDenseGetLocalMatrix(Mat A, Mat *B) 29 { 30 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 31 PetscBool flg; 32 33 PetscFunctionBegin; 34 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 35 PetscAssertPointer(B, 2); 36 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIDENSE, &flg)); 37 if (flg) *B = mat->A; 38 else { 39 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQDENSE, &flg)); 40 PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Not for matrix type %s", ((PetscObject)A)->type_name); 41 *B = A; 42 } 43 PetscFunctionReturn(PETSC_SUCCESS); 44 } 45 46 static PetscErrorCode MatCopy_MPIDense(Mat A, Mat B, MatStructure s) 47 { 48 Mat_MPIDense *Amat = (Mat_MPIDense *)A->data; 49 Mat_MPIDense *Bmat = (Mat_MPIDense *)B->data; 50 51 PetscFunctionBegin; 52 PetscCall(MatCopy(Amat->A, Bmat->A, s)); 53 PetscFunctionReturn(PETSC_SUCCESS); 54 } 55 56 PetscErrorCode MatShift_MPIDense(Mat A, PetscScalar alpha) 57 { 58 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 59 PetscInt j, lda, rstart = A->rmap->rstart, rend = A->rmap->rend, rend2; 60 PetscScalar *v; 61 62 PetscFunctionBegin; 63 PetscCall(MatDenseGetArray(mat->A, &v)); 64 PetscCall(MatDenseGetLDA(mat->A, &lda)); 65 rend2 = PetscMin(rend, A->cmap->N); 66 if (rend2 > rstart) { 67 for (j = rstart; j < rend2; j++) v[j - rstart + j * lda] += alpha; 68 PetscCall(PetscLogFlops(rend2 - rstart)); 69 } 70 PetscCall(MatDenseRestoreArray(mat->A, &v)); 71 PetscFunctionReturn(PETSC_SUCCESS); 72 } 73 74 static PetscErrorCode MatGetRow_MPIDense(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 75 { 76 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 77 PetscInt lrow, rstart = A->rmap->rstart, rend = A->rmap->rend; 78 79 PetscFunctionBegin; 80 PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "only local rows"); 81 lrow = row - rstart; 82 PetscCall(MatGetRow(mat->A, lrow, nz, (const PetscInt **)idx, (const PetscScalar **)v)); 83 PetscFunctionReturn(PETSC_SUCCESS); 84 } 85 86 static PetscErrorCode MatRestoreRow_MPIDense(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 87 { 88 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 89 PetscInt lrow, rstart = A->rmap->rstart, rend = A->rmap->rend; 90 91 PetscFunctionBegin; 92 PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "only local rows"); 93 lrow = row - rstart; 94 PetscCall(MatRestoreRow(mat->A, lrow, nz, (const PetscInt **)idx, (const PetscScalar **)v)); 95 PetscFunctionReturn(PETSC_SUCCESS); 96 } 97 98 static PetscErrorCode MatGetDiagonalBlock_MPIDense(Mat A, Mat *a) 99 { 100 Mat_MPIDense *mdn = (Mat_MPIDense *)A->data; 101 PetscInt m = A->rmap->n, rstart = A->rmap->rstart; 102 PetscScalar *array; 103 MPI_Comm comm; 104 PetscBool flg; 105 Mat B; 106 107 PetscFunctionBegin; 108 PetscCall(MatHasCongruentLayouts(A, &flg)); 109 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only square matrices supported."); 110 PetscCall(PetscObjectQuery((PetscObject)A, "DiagonalBlock", (PetscObject *)&B)); 111 if (!B) { /* This should use MatDenseGetSubMatrix (not create), but we would need a call like MatRestoreDiagonalBlock */ 112 #if PetscDefined(HAVE_CUDA) 113 PetscCall(PetscObjectTypeCompare((PetscObject)mdn->A, MATSEQDENSECUDA, &flg)); 114 PetscCheck(!flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature", MATSEQDENSECUDA); 115 #elif PetscDefined(HAVE_HIP) 116 PetscCall(PetscObjectTypeCompare((PetscObject)mdn->A, MATSEQDENSEHIP, &flg)); 117 PetscCheck(!flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature", MATSEQDENSEHIP); 118 #endif 119 PetscCall(PetscObjectGetComm((PetscObject)mdn->A, &comm)); 120 PetscCall(MatCreate(comm, &B)); 121 PetscCall(MatSetSizes(B, m, m, m, m)); 122 PetscCall(MatSetType(B, ((PetscObject)mdn->A)->type_name)); 123 PetscCall(MatDenseGetArrayRead(mdn->A, (const PetscScalar **)&array)); 124 PetscCall(MatSeqDenseSetPreallocation(B, array + m * rstart)); 125 PetscCall(MatDenseRestoreArrayRead(mdn->A, (const PetscScalar **)&array)); 126 PetscCall(PetscObjectCompose((PetscObject)A, "DiagonalBlock", (PetscObject)B)); 127 *a = B; 128 PetscCall(MatDestroy(&B)); 129 } else *a = B; 130 PetscFunctionReturn(PETSC_SUCCESS); 131 } 132 133 static PetscErrorCode MatSetValues_MPIDense(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], const PetscScalar v[], InsertMode addv) 134 { 135 Mat_MPIDense *A = (Mat_MPIDense *)mat->data; 136 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend, row; 137 PetscBool roworiented = A->roworiented; 138 139 PetscFunctionBegin; 140 for (i = 0; i < m; i++) { 141 if (idxm[i] < 0) continue; 142 PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large"); 143 if (idxm[i] >= rstart && idxm[i] < rend) { 144 row = idxm[i] - rstart; 145 if (roworiented) { 146 PetscCall(MatSetValues(A->A, 1, &row, n, idxn, PetscSafePointerPlusOffset(v, i * n), addv)); 147 } else { 148 for (j = 0; j < n; j++) { 149 if (idxn[j] < 0) continue; 150 PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large"); 151 PetscCall(MatSetValues(A->A, 1, &row, 1, &idxn[j], PetscSafePointerPlusOffset(v, i + j * m), addv)); 152 } 153 } 154 } else if (!A->donotstash) { 155 mat->assembled = PETSC_FALSE; 156 if (roworiented) { 157 PetscCall(MatStashValuesRow_Private(&mat->stash, idxm[i], n, idxn, PetscSafePointerPlusOffset(v, i * n), PETSC_FALSE)); 158 } else { 159 PetscCall(MatStashValuesCol_Private(&mat->stash, idxm[i], n, idxn, PetscSafePointerPlusOffset(v, i), m, PETSC_FALSE)); 160 } 161 } 162 } 163 PetscFunctionReturn(PETSC_SUCCESS); 164 } 165 166 static PetscErrorCode MatGetValues_MPIDense(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[]) 167 { 168 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 169 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend, row; 170 171 PetscFunctionBegin; 172 for (i = 0; i < m; i++) { 173 if (idxm[i] < 0) continue; /* negative row */ 174 PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large"); 175 if (idxm[i] >= rstart && idxm[i] < rend) { 176 row = idxm[i] - rstart; 177 for (j = 0; j < n; j++) { 178 if (idxn[j] < 0) continue; /* negative column */ 179 PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large"); 180 PetscCall(MatGetValues(mdn->A, 1, &row, 1, &idxn[j], v + i * n + j)); 181 } 182 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported"); 183 } 184 PetscFunctionReturn(PETSC_SUCCESS); 185 } 186 187 static PetscErrorCode MatDenseGetLDA_MPIDense(Mat A, PetscInt *lda) 188 { 189 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 190 191 PetscFunctionBegin; 192 PetscCall(MatDenseGetLDA(a->A, lda)); 193 PetscFunctionReturn(PETSC_SUCCESS); 194 } 195 196 static PetscErrorCode MatDenseSetLDA_MPIDense(Mat A, PetscInt lda) 197 { 198 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 199 MatType mtype = MATSEQDENSE; 200 201 PetscFunctionBegin; 202 if (!a->A) { 203 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 204 PetscCall(PetscLayoutSetUp(A->rmap)); 205 PetscCall(PetscLayoutSetUp(A->cmap)); 206 PetscCall(MatCreate(PETSC_COMM_SELF, &a->A)); 207 PetscCall(MatSetSizes(a->A, A->rmap->n, A->cmap->N, A->rmap->n, A->cmap->N)); 208 #if PetscDefined(HAVE_CUDA) 209 PetscBool iscuda; 210 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIDENSECUDA, &iscuda)); 211 if (iscuda) mtype = MATSEQDENSECUDA; 212 #elif PetscDefined(HAVE_HIP) 213 PetscBool iship; 214 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIDENSEHIP, &iship)); 215 if (iship) mtype = MATSEQDENSEHIP; 216 #endif 217 PetscCall(MatSetType(a->A, mtype)); 218 } 219 PetscCall(MatDenseSetLDA(a->A, lda)); 220 PetscFunctionReturn(PETSC_SUCCESS); 221 } 222 223 static PetscErrorCode MatDenseGetArray_MPIDense(Mat A, PetscScalar **array) 224 { 225 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 226 227 PetscFunctionBegin; 228 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 229 PetscCall(MatDenseGetArray(a->A, array)); 230 PetscFunctionReturn(PETSC_SUCCESS); 231 } 232 233 static PetscErrorCode MatDenseGetArrayRead_MPIDense(Mat A, PetscScalar **array) 234 { 235 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 236 237 PetscFunctionBegin; 238 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 239 PetscCall(MatDenseGetArrayRead(a->A, (const PetscScalar **)array)); 240 PetscFunctionReturn(PETSC_SUCCESS); 241 } 242 243 static PetscErrorCode MatDenseGetArrayWrite_MPIDense(Mat A, PetscScalar **array) 244 { 245 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 246 247 PetscFunctionBegin; 248 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 249 PetscCall(MatDenseGetArrayWrite(a->A, array)); 250 PetscFunctionReturn(PETSC_SUCCESS); 251 } 252 253 static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A, const PetscScalar *array) 254 { 255 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 256 257 PetscFunctionBegin; 258 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 259 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 260 PetscCall(MatDensePlaceArray(a->A, array)); 261 PetscFunctionReturn(PETSC_SUCCESS); 262 } 263 264 static PetscErrorCode MatDenseResetArray_MPIDense(Mat A) 265 { 266 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 267 268 PetscFunctionBegin; 269 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 270 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 271 PetscCall(MatDenseResetArray(a->A)); 272 PetscFunctionReturn(PETSC_SUCCESS); 273 } 274 275 static PetscErrorCode MatDenseReplaceArray_MPIDense(Mat A, const PetscScalar *array) 276 { 277 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 278 279 PetscFunctionBegin; 280 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 281 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 282 PetscCall(MatDenseReplaceArray(a->A, array)); 283 PetscFunctionReturn(PETSC_SUCCESS); 284 } 285 286 static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A, IS isrow, IS iscol, MatReuse scall, Mat *B) 287 { 288 Mat_MPIDense *mat = (Mat_MPIDense *)A->data, *newmatd; 289 PetscInt lda, i, j, rstart, rend, nrows, ncols, Ncols, nlrows, nlcols; 290 const PetscInt *irow, *icol; 291 const PetscScalar *v; 292 PetscScalar *bv; 293 Mat newmat; 294 IS iscol_local; 295 MPI_Comm comm_is, comm_mat; 296 297 PetscFunctionBegin; 298 PetscCall(PetscObjectGetComm((PetscObject)A, &comm_mat)); 299 PetscCall(PetscObjectGetComm((PetscObject)iscol, &comm_is)); 300 PetscCheck(comm_mat == comm_is, PETSC_COMM_SELF, PETSC_ERR_ARG_NOTSAMECOMM, "IS communicator must match matrix communicator"); 301 302 PetscCall(ISAllGather(iscol, &iscol_local)); 303 PetscCall(ISGetIndices(isrow, &irow)); 304 PetscCall(ISGetIndices(iscol_local, &icol)); 305 PetscCall(ISGetLocalSize(isrow, &nrows)); 306 PetscCall(ISGetLocalSize(iscol, &ncols)); 307 PetscCall(ISGetSize(iscol, &Ncols)); /* global number of columns, size of iscol_local */ 308 309 /* No parallel redistribution currently supported! Should really check each index set 310 to confirm that it is OK. ... Currently supports only submatrix same partitioning as 311 original matrix! */ 312 313 PetscCall(MatGetLocalSize(A, &nlrows, &nlcols)); 314 PetscCall(MatGetOwnershipRange(A, &rstart, &rend)); 315 316 /* Check submatrix call */ 317 if (scall == MAT_REUSE_MATRIX) { 318 /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */ 319 /* Really need to test rows and column sizes! */ 320 newmat = *B; 321 } else { 322 /* Create and fill new matrix */ 323 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &newmat)); 324 PetscCall(MatSetSizes(newmat, nrows, ncols, PETSC_DECIDE, Ncols)); 325 PetscCall(MatSetType(newmat, ((PetscObject)A)->type_name)); 326 PetscCall(MatMPIDenseSetPreallocation(newmat, NULL)); 327 } 328 329 /* Now extract the data pointers and do the copy, column at a time */ 330 newmatd = (Mat_MPIDense *)newmat->data; 331 PetscCall(MatDenseGetArray(newmatd->A, &bv)); 332 PetscCall(MatDenseGetArrayRead(mat->A, &v)); 333 PetscCall(MatDenseGetLDA(mat->A, &lda)); 334 for (i = 0; i < Ncols; i++) { 335 const PetscScalar *av = v + lda * icol[i]; 336 for (j = 0; j < nrows; j++) *bv++ = av[irow[j] - rstart]; 337 } 338 PetscCall(MatDenseRestoreArrayRead(mat->A, &v)); 339 PetscCall(MatDenseRestoreArray(newmatd->A, &bv)); 340 341 /* Assemble the matrices so that the correct flags are set */ 342 PetscCall(MatAssemblyBegin(newmat, MAT_FINAL_ASSEMBLY)); 343 PetscCall(MatAssemblyEnd(newmat, MAT_FINAL_ASSEMBLY)); 344 345 /* Free work space */ 346 PetscCall(ISRestoreIndices(isrow, &irow)); 347 PetscCall(ISRestoreIndices(iscol_local, &icol)); 348 PetscCall(ISDestroy(&iscol_local)); 349 *B = newmat; 350 PetscFunctionReturn(PETSC_SUCCESS); 351 } 352 353 static PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A, PetscScalar **array) 354 { 355 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 356 357 PetscFunctionBegin; 358 PetscCall(MatDenseRestoreArray(a->A, array)); 359 PetscFunctionReturn(PETSC_SUCCESS); 360 } 361 362 static PetscErrorCode MatDenseRestoreArrayRead_MPIDense(Mat A, PetscScalar **array) 363 { 364 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 365 366 PetscFunctionBegin; 367 PetscCall(MatDenseRestoreArrayRead(a->A, (const PetscScalar **)array)); 368 PetscFunctionReturn(PETSC_SUCCESS); 369 } 370 371 static PetscErrorCode MatDenseRestoreArrayWrite_MPIDense(Mat A, PetscScalar **array) 372 { 373 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 374 375 PetscFunctionBegin; 376 PetscCall(MatDenseRestoreArrayWrite(a->A, array)); 377 PetscFunctionReturn(PETSC_SUCCESS); 378 } 379 380 static PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat, MatAssemblyType mode) 381 { 382 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 383 PetscInt nstash, reallocs; 384 385 PetscFunctionBegin; 386 if (mdn->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS); 387 388 PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range)); 389 PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs)); 390 PetscCall(PetscInfo(mdn->A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 391 PetscFunctionReturn(PETSC_SUCCESS); 392 } 393 394 static PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat, MatAssemblyType mode) 395 { 396 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 397 PetscInt i, *row, *col, flg, j, rstart, ncols; 398 PetscMPIInt n; 399 PetscScalar *val; 400 401 PetscFunctionBegin; 402 if (!mdn->donotstash && !mat->nooffprocentries) { 403 /* wait on receives */ 404 while (1) { 405 PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg)); 406 if (!flg) break; 407 408 for (i = 0; i < n;) { 409 /* Now identify the consecutive vals belonging to the same row */ 410 for (j = i, rstart = row[j]; j < n; j++) { 411 if (row[j] != rstart) break; 412 } 413 if (j < n) ncols = j - i; 414 else ncols = n - i; 415 /* Now assemble all these values with a single function call */ 416 PetscCall(MatSetValues_MPIDense(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode)); 417 i = j; 418 } 419 } 420 PetscCall(MatStashScatterEnd_Private(&mat->stash)); 421 } 422 423 PetscCall(MatAssemblyBegin(mdn->A, mode)); 424 PetscCall(MatAssemblyEnd(mdn->A, mode)); 425 PetscFunctionReturn(PETSC_SUCCESS); 426 } 427 428 static PetscErrorCode MatZeroEntries_MPIDense(Mat A) 429 { 430 Mat_MPIDense *l = (Mat_MPIDense *)A->data; 431 432 PetscFunctionBegin; 433 PetscCall(MatZeroEntries(l->A)); 434 PetscFunctionReturn(PETSC_SUCCESS); 435 } 436 437 static PetscErrorCode MatZeroRows_MPIDense(Mat A, PetscInt n, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 438 { 439 Mat_MPIDense *l = (Mat_MPIDense *)A->data; 440 PetscInt i, len, *lrows; 441 442 PetscFunctionBegin; 443 /* get locally owned rows */ 444 PetscCall(PetscLayoutMapLocal(A->rmap, n, rows, &len, &lrows, NULL)); 445 /* fix right-hand side if needed */ 446 if (x && b) { 447 const PetscScalar *xx; 448 PetscScalar *bb; 449 450 PetscCall(VecGetArrayRead(x, &xx)); 451 PetscCall(VecGetArrayWrite(b, &bb)); 452 for (i = 0; i < len; ++i) bb[lrows[i]] = diag * xx[lrows[i]]; 453 PetscCall(VecRestoreArrayRead(x, &xx)); 454 PetscCall(VecRestoreArrayWrite(b, &bb)); 455 } 456 PetscCall(MatZeroRows(l->A, len, lrows, 0.0, NULL, NULL)); 457 if (diag != 0.0) { 458 Vec d; 459 460 PetscCall(MatCreateVecs(A, NULL, &d)); 461 PetscCall(VecSet(d, diag)); 462 PetscCall(MatDiagonalSet(A, d, INSERT_VALUES)); 463 PetscCall(VecDestroy(&d)); 464 } 465 PetscCall(PetscFree(lrows)); 466 PetscFunctionReturn(PETSC_SUCCESS); 467 } 468 469 PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat, Vec, Vec); 470 PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat, Vec, Vec, Vec); 471 PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat, Vec, Vec); 472 PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat, Vec, Vec, Vec); 473 474 static PetscErrorCode MatMultColumnRange_MPIDense(Mat mat, Vec xx, Vec yy, PetscInt c_start, PetscInt c_end) 475 { 476 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 477 const PetscScalar *ax; 478 PetscScalar *ay; 479 PetscMemType axmtype, aymtype; 480 481 PetscFunctionBegin; 482 if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat)); 483 PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype)); 484 PetscCall(VecGetArrayWriteAndMemType(mdn->lvec, &ay, &aymtype)); 485 PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE)); 486 PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE)); 487 PetscCall(VecRestoreArrayWriteAndMemType(mdn->lvec, &ay)); 488 PetscCall(VecRestoreArrayReadAndMemType(xx, &ax)); 489 PetscUseMethod(mdn->A, "MatMultColumnRange_C", (Mat, Vec, Vec, PetscInt, PetscInt), (mdn->A, mdn->lvec, yy, c_start, c_end)); 490 PetscFunctionReturn(PETSC_SUCCESS); 491 } 492 493 static PetscErrorCode MatMult_MPIDense(Mat mat, Vec xx, Vec yy) 494 { 495 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 496 const PetscScalar *ax; 497 PetscScalar *ay; 498 PetscMemType axmtype, aymtype; 499 500 PetscFunctionBegin; 501 if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat)); 502 PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype)); 503 PetscCall(VecGetArrayWriteAndMemType(mdn->lvec, &ay, &aymtype)); 504 PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE)); 505 PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE)); 506 PetscCall(VecRestoreArrayWriteAndMemType(mdn->lvec, &ay)); 507 PetscCall(VecRestoreArrayReadAndMemType(xx, &ax)); 508 PetscCall((*mdn->A->ops->mult)(mdn->A, mdn->lvec, yy)); 509 PetscFunctionReturn(PETSC_SUCCESS); 510 } 511 512 static PetscErrorCode MatMultAddColumnRange_MPIDense(Mat mat, Vec xx, Vec yy, Vec zz, PetscInt c_start, PetscInt c_end) 513 { 514 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 515 const PetscScalar *ax; 516 PetscScalar *ay; 517 PetscMemType axmtype, aymtype; 518 519 PetscFunctionBegin; 520 if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat)); 521 PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype)); 522 PetscCall(VecGetArrayAndMemType(mdn->lvec, &ay, &aymtype)); 523 PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE)); 524 PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE)); 525 PetscCall(VecRestoreArrayAndMemType(mdn->lvec, &ay)); 526 PetscCall(VecRestoreArrayReadAndMemType(xx, &ax)); 527 PetscUseMethod(mdn->A, "MatMultAddColumnRange_C", (Mat, Vec, Vec, Vec, PetscInt, PetscInt), (mdn->A, mdn->lvec, yy, zz, c_start, c_end)); 528 PetscFunctionReturn(PETSC_SUCCESS); 529 } 530 531 static PetscErrorCode MatMultAdd_MPIDense(Mat mat, Vec xx, Vec yy, Vec zz) 532 { 533 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 534 const PetscScalar *ax; 535 PetscScalar *ay; 536 PetscMemType axmtype, aymtype; 537 538 PetscFunctionBegin; 539 if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat)); 540 PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype)); 541 PetscCall(VecGetArrayAndMemType(mdn->lvec, &ay, &aymtype)); 542 PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE)); 543 PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE)); 544 PetscCall(VecRestoreArrayAndMemType(mdn->lvec, &ay)); 545 PetscCall(VecRestoreArrayReadAndMemType(xx, &ax)); 546 PetscCall((*mdn->A->ops->multadd)(mdn->A, mdn->lvec, yy, zz)); 547 PetscFunctionReturn(PETSC_SUCCESS); 548 } 549 550 static PetscErrorCode MatMultHermitianTransposeColumnRange_MPIDense(Mat A, Vec xx, Vec yy, PetscInt c_start, PetscInt c_end) 551 { 552 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 553 const PetscScalar *ax; 554 PetscScalar *ay; 555 PetscMemType axmtype, aymtype; 556 PetscInt r_start, r_end; 557 PetscInt c_start_local, c_end_local; 558 559 PetscFunctionBegin; 560 if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A)); 561 PetscCall(VecZeroEntries(a->lvec)); 562 PetscCall(VecGetOwnershipRange(yy, &r_start, &r_end)); 563 c_start_local = PetscMax(c_start, r_start); 564 c_end_local = PetscMin(c_end, r_end); 565 PetscCall(VecGetArrayAndMemType(yy, &ay, &aymtype)); 566 if (c_end_local > c_start_local) { 567 if (PetscMemTypeHost(aymtype)) { 568 PetscCall(PetscArrayzero(&ay[c_start_local], (size_t)(c_end_local - c_start_local))); 569 } else { 570 PetscCall(PetscDeviceRegisterMemory(ay, aymtype, sizeof(*ay) * ((size_t)(r_end - r_start)))); 571 PetscCall(PetscDeviceArrayZero(NULL, &ay[c_start_local], (size_t)(c_end_local - c_start_local))); 572 } 573 } 574 PetscUseMethod(a->A, "MatMultHermitianTransposeColumnRange_C", (Mat, Vec, Vec, PetscInt, PetscInt), (a->A, xx, a->lvec, c_start, c_end)); 575 PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype)); 576 PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM)); 577 PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM)); 578 PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax)); 579 PetscCall(VecRestoreArrayAndMemType(yy, &ay)); 580 PetscFunctionReturn(PETSC_SUCCESS); 581 } 582 583 static PetscErrorCode MatMultTransposeKernel_MPIDense(Mat A, Vec xx, Vec yy, PetscBool herm) 584 { 585 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 586 const PetscScalar *ax; 587 PetscScalar *ay; 588 PetscMemType axmtype, aymtype; 589 590 PetscFunctionBegin; 591 if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A)); 592 PetscCall(VecSet(yy, 0.0)); 593 if (herm) PetscCall((*a->A->ops->multhermitiantranspose)(a->A, xx, a->lvec)); 594 else PetscCall((*a->A->ops->multtranspose)(a->A, xx, a->lvec)); 595 PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype)); 596 PetscCall(VecGetArrayAndMemType(yy, &ay, &aymtype)); 597 PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM)); 598 PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM)); 599 PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax)); 600 PetscCall(VecRestoreArrayAndMemType(yy, &ay)); 601 PetscFunctionReturn(PETSC_SUCCESS); 602 } 603 604 static PetscErrorCode MatMultHermitianTransposeAddColumnRange_MPIDense(Mat A, Vec xx, Vec yy, Vec zz, PetscInt c_start, PetscInt c_end) 605 { 606 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 607 const PetscScalar *ax; 608 PetscScalar *ay; 609 PetscMemType axmtype, aymtype; 610 611 PetscFunctionBegin; 612 if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A)); 613 PetscCall(VecCopy(yy, zz)); 614 PetscCall(VecZeroEntries(a->lvec)); 615 PetscUseMethod(a->A, "MatMultHermitianTransposeColumnRange_C", (Mat, Vec, Vec, PetscInt, PetscInt), (a->A, xx, a->lvec, c_start, c_end)); 616 PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype)); 617 PetscCall(VecGetArrayAndMemType(zz, &ay, &aymtype)); 618 PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM)); 619 PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM)); 620 PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax)); 621 PetscCall(VecRestoreArrayAndMemType(zz, &ay)); 622 PetscFunctionReturn(PETSC_SUCCESS); 623 } 624 625 static PetscErrorCode MatMultTransposeAddKernel_MPIDense(Mat A, Vec xx, Vec yy, Vec zz, PetscBool herm) 626 { 627 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 628 const PetscScalar *ax; 629 PetscScalar *ay; 630 PetscMemType axmtype, aymtype; 631 632 PetscFunctionBegin; 633 if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A)); 634 PetscCall(VecCopy(yy, zz)); 635 if (herm) PetscCall((*a->A->ops->multhermitiantranspose)(a->A, xx, a->lvec)); 636 else PetscCall((*a->A->ops->multtranspose)(a->A, xx, a->lvec)); 637 PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype)); 638 PetscCall(VecGetArrayAndMemType(zz, &ay, &aymtype)); 639 PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM)); 640 PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM)); 641 PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax)); 642 PetscCall(VecRestoreArrayAndMemType(zz, &ay)); 643 PetscFunctionReturn(PETSC_SUCCESS); 644 } 645 646 static PetscErrorCode MatMultTranspose_MPIDense(Mat A, Vec xx, Vec yy) 647 { 648 PetscFunctionBegin; 649 PetscCall(MatMultTransposeKernel_MPIDense(A, xx, yy, PETSC_FALSE)); 650 PetscFunctionReturn(PETSC_SUCCESS); 651 } 652 653 static PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A, Vec xx, Vec yy, Vec zz) 654 { 655 PetscFunctionBegin; 656 PetscCall(MatMultTransposeAddKernel_MPIDense(A, xx, yy, zz, PETSC_FALSE)); 657 PetscFunctionReturn(PETSC_SUCCESS); 658 } 659 660 static PetscErrorCode MatMultHermitianTranspose_MPIDense(Mat A, Vec xx, Vec yy) 661 { 662 PetscFunctionBegin; 663 PetscCall(MatMultTransposeKernel_MPIDense(A, xx, yy, PETSC_TRUE)); 664 PetscFunctionReturn(PETSC_SUCCESS); 665 } 666 667 static PetscErrorCode MatMultHermitianTransposeAdd_MPIDense(Mat A, Vec xx, Vec yy, Vec zz) 668 { 669 PetscFunctionBegin; 670 PetscCall(MatMultTransposeAddKernel_MPIDense(A, xx, yy, zz, PETSC_TRUE)); 671 PetscFunctionReturn(PETSC_SUCCESS); 672 } 673 674 PetscErrorCode MatGetDiagonal_MPIDense(Mat A, Vec v) 675 { 676 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 677 PetscInt lda, len, i, nl, ng, m = A->rmap->n, radd; 678 PetscScalar *x; 679 const PetscScalar *av; 680 681 PetscFunctionBegin; 682 PetscCall(VecGetArray(v, &x)); 683 PetscCall(VecGetSize(v, &ng)); 684 PetscCheck(ng == A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming mat and vec"); 685 PetscCall(VecGetLocalSize(v, &nl)); 686 len = PetscMin(a->A->rmap->n, a->A->cmap->n); 687 radd = A->rmap->rstart * m; 688 PetscCall(MatDenseGetArrayRead(a->A, &av)); 689 PetscCall(MatDenseGetLDA(a->A, &lda)); 690 for (i = 0; i < len; i++) x[i] = av[radd + i * lda + i]; 691 PetscCall(MatDenseRestoreArrayRead(a->A, &av)); 692 if (nl - i > 0) PetscCall(PetscArrayzero(x + i, nl - i)); 693 PetscCall(VecRestoreArray(v, &x)); 694 PetscFunctionReturn(PETSC_SUCCESS); 695 } 696 697 static PetscErrorCode MatDestroy_MPIDense(Mat mat) 698 { 699 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 700 701 PetscFunctionBegin; 702 PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N)); 703 PetscCall(MatStashDestroy_Private(&mat->stash)); 704 PetscCheck(!mdn->vecinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 705 PetscCheck(!mdn->matinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 706 PetscCall(MatDestroy(&mdn->A)); 707 PetscCall(VecDestroy(&mdn->lvec)); 708 PetscCall(PetscSFDestroy(&mdn->Mvctx)); 709 PetscCall(VecDestroy(&mdn->cvec)); 710 PetscCall(MatDestroy(&mdn->cmat)); 711 712 PetscCall(PetscFree(mat->data)); 713 PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL)); 714 715 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetLDA_C", NULL)); 716 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseSetLDA_C", NULL)); 717 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArray_C", NULL)); 718 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArray_C", NULL)); 719 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayRead_C", NULL)); 720 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayRead_C", NULL)); 721 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayWrite_C", NULL)); 722 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayWrite_C", NULL)); 723 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDensePlaceArray_C", NULL)); 724 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseResetArray_C", NULL)); 725 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseReplaceArray_C", NULL)); 726 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", NULL)); 727 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpiaij_C", NULL)); 728 #if defined(PETSC_HAVE_ELEMENTAL) 729 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_elemental_C", NULL)); 730 #endif 731 #if defined(PETSC_HAVE_SCALAPACK) 732 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_scalapack_C", NULL)); 733 #endif 734 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIDenseSetPreallocation_C", NULL)); 735 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidense_C", NULL)); 736 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaij_C", NULL)); 737 #if defined(PETSC_HAVE_CUDA) 738 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidense_C", NULL)); 739 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijcusparse_C", NULL)); 740 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensecuda_C", NULL)); 741 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidensecuda_mpidense_C", NULL)); 742 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidensecuda_C", NULL)); 743 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C", NULL)); 744 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensecuda_mpiaij_C", NULL)); 745 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C", NULL)); 746 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArray_C", NULL)); 747 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArrayRead_C", NULL)); 748 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArrayWrite_C", NULL)); 749 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArray_C", NULL)); 750 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArrayRead_C", NULL)); 751 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArrayWrite_C", NULL)); 752 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAPlaceArray_C", NULL)); 753 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAResetArray_C", NULL)); 754 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAReplaceArray_C", NULL)); 755 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDASetPreallocation_C", NULL)); 756 #endif 757 #if defined(PETSC_HAVE_HIP) 758 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidense_C", NULL)); 759 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijhipsparse_C", NULL)); 760 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensehip_C", NULL)); 761 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidensehip_mpidense_C", NULL)); 762 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidensehip_C", NULL)); 763 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidensehip_C", NULL)); 764 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensehip_mpiaij_C", NULL)); 765 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensehip_mpiaijhipsparse_C", NULL)); 766 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArray_C", NULL)); 767 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArrayRead_C", NULL)); 768 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArrayWrite_C", NULL)); 769 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArray_C", NULL)); 770 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArrayRead_C", NULL)); 771 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArrayWrite_C", NULL)); 772 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPPlaceArray_C", NULL)); 773 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPResetArray_C", NULL)); 774 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPReplaceArray_C", NULL)); 775 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPSetPreallocation_C", NULL)); 776 #endif 777 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumn_C", NULL)); 778 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumn_C", NULL)); 779 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", NULL)); 780 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", NULL)); 781 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", NULL)); 782 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", NULL)); 783 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", NULL)); 784 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", NULL)); 785 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetSubMatrix_C", NULL)); 786 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreSubMatrix_C", NULL)); 787 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultColumnRange_C", NULL)); 788 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultAddColumnRange_C", NULL)); 789 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultHermitianTransposeColumnRange_C", NULL)); 790 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultHermitianTransposeAddColumnRange_C", NULL)); 791 792 PetscCall(PetscObjectCompose((PetscObject)mat, "DiagonalBlock", NULL)); 793 PetscFunctionReturn(PETSC_SUCCESS); 794 } 795 796 #include <petscdraw.h> 797 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat, PetscViewer viewer) 798 { 799 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 800 PetscMPIInt rank; 801 PetscViewerType vtype; 802 PetscBool iascii, isdraw; 803 PetscViewer sviewer; 804 PetscViewerFormat format; 805 806 PetscFunctionBegin; 807 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 808 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 809 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 810 if (iascii) { 811 PetscCall(PetscViewerGetType(viewer, &vtype)); 812 PetscCall(PetscViewerGetFormat(viewer, &format)); 813 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 814 MatInfo info; 815 PetscCall(MatGetInfo(mat, MAT_LOCAL, &info)); 816 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 817 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %" PetscInt_FMT " \n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, 818 (PetscInt)info.memory)); 819 PetscCall(PetscViewerFlush(viewer)); 820 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 821 if (mdn->Mvctx) PetscCall(PetscSFView(mdn->Mvctx, viewer)); 822 PetscFunctionReturn(PETSC_SUCCESS); 823 } else if (format == PETSC_VIEWER_ASCII_INFO) { 824 PetscFunctionReturn(PETSC_SUCCESS); 825 } 826 } else if (isdraw) { 827 PetscDraw draw; 828 PetscBool isnull; 829 830 PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw)); 831 PetscCall(PetscDrawIsNull(draw, &isnull)); 832 if (isnull) PetscFunctionReturn(PETSC_SUCCESS); 833 } 834 835 { 836 /* assemble the entire matrix onto first processor. */ 837 Mat A; 838 PetscInt M = mat->rmap->N, N = mat->cmap->N, m, row, i, nz; 839 PetscInt *cols; 840 PetscScalar *vals; 841 842 PetscCall(MatCreate(PetscObjectComm((PetscObject)mat), &A)); 843 if (rank == 0) { 844 PetscCall(MatSetSizes(A, M, N, M, N)); 845 } else { 846 PetscCall(MatSetSizes(A, 0, 0, M, N)); 847 } 848 /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */ 849 PetscCall(MatSetType(A, MATMPIDENSE)); 850 PetscCall(MatMPIDenseSetPreallocation(A, NULL)); 851 852 /* Copy the matrix ... This isn't the most efficient means, 853 but it's quick for now */ 854 A->insertmode = INSERT_VALUES; 855 856 row = mat->rmap->rstart; 857 m = mdn->A->rmap->n; 858 for (i = 0; i < m; i++) { 859 PetscCall(MatGetRow_MPIDense(mat, row, &nz, &cols, &vals)); 860 PetscCall(MatSetValues_MPIDense(A, 1, &row, nz, cols, vals, INSERT_VALUES)); 861 PetscCall(MatRestoreRow_MPIDense(mat, row, &nz, &cols, &vals)); 862 row++; 863 } 864 865 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 866 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 867 PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 868 if (rank == 0) { 869 PetscCall(PetscObjectSetName((PetscObject)((Mat_MPIDense *)A->data)->A, ((PetscObject)mat)->name)); 870 PetscCall(MatView_SeqDense(((Mat_MPIDense *)A->data)->A, sviewer)); 871 } 872 PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 873 PetscCall(MatDestroy(&A)); 874 } 875 PetscFunctionReturn(PETSC_SUCCESS); 876 } 877 878 static PetscErrorCode MatView_MPIDense(Mat mat, PetscViewer viewer) 879 { 880 PetscBool iascii, isbinary, isdraw, issocket; 881 882 PetscFunctionBegin; 883 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 884 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 885 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket)); 886 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 887 888 if (iascii || issocket || isdraw) { 889 PetscCall(MatView_MPIDense_ASCIIorDraworSocket(mat, viewer)); 890 } else if (isbinary) PetscCall(MatView_Dense_Binary(mat, viewer)); 891 PetscFunctionReturn(PETSC_SUCCESS); 892 } 893 894 static PetscErrorCode MatGetInfo_MPIDense(Mat A, MatInfoType flag, MatInfo *info) 895 { 896 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 897 Mat mdn = mat->A; 898 PetscLogDouble isend[5], irecv[5]; 899 900 PetscFunctionBegin; 901 info->block_size = 1.0; 902 903 PetscCall(MatGetInfo(mdn, MAT_LOCAL, info)); 904 905 isend[0] = info->nz_used; 906 isend[1] = info->nz_allocated; 907 isend[2] = info->nz_unneeded; 908 isend[3] = info->memory; 909 isend[4] = info->mallocs; 910 if (flag == MAT_LOCAL) { 911 info->nz_used = isend[0]; 912 info->nz_allocated = isend[1]; 913 info->nz_unneeded = isend[2]; 914 info->memory = isend[3]; 915 info->mallocs = isend[4]; 916 } else if (flag == MAT_GLOBAL_MAX) { 917 PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)A))); 918 919 info->nz_used = irecv[0]; 920 info->nz_allocated = irecv[1]; 921 info->nz_unneeded = irecv[2]; 922 info->memory = irecv[3]; 923 info->mallocs = irecv[4]; 924 } else if (flag == MAT_GLOBAL_SUM) { 925 PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)A))); 926 927 info->nz_used = irecv[0]; 928 info->nz_allocated = irecv[1]; 929 info->nz_unneeded = irecv[2]; 930 info->memory = irecv[3]; 931 info->mallocs = irecv[4]; 932 } 933 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 934 info->fill_ratio_needed = 0; 935 info->factor_mallocs = 0; 936 PetscFunctionReturn(PETSC_SUCCESS); 937 } 938 939 static PetscErrorCode MatSetOption_MPIDense(Mat A, MatOption op, PetscBool flg) 940 { 941 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 942 943 PetscFunctionBegin; 944 switch (op) { 945 case MAT_NEW_NONZERO_LOCATIONS: 946 case MAT_NEW_NONZERO_LOCATION_ERR: 947 case MAT_NEW_NONZERO_ALLOCATION_ERR: 948 MatCheckPreallocated(A, 1); 949 PetscCall(MatSetOption(a->A, op, flg)); 950 break; 951 case MAT_ROW_ORIENTED: 952 MatCheckPreallocated(A, 1); 953 a->roworiented = flg; 954 PetscCall(MatSetOption(a->A, op, flg)); 955 break; 956 case MAT_IGNORE_OFF_PROC_ENTRIES: 957 a->donotstash = flg; 958 break; 959 case MAT_SYMMETRIC: 960 case MAT_STRUCTURALLY_SYMMETRIC: 961 case MAT_HERMITIAN: 962 case MAT_SYMMETRY_ETERNAL: 963 case MAT_STRUCTURAL_SYMMETRY_ETERNAL: 964 case MAT_SPD: 965 case MAT_SPD_ETERNAL: 966 /* if the diagonal matrix is square it inherits some of the properties above */ 967 if (a->A && A->rmap->n == A->cmap->n) PetscCall(MatSetOption(a->A, op, flg)); 968 break; 969 default: 970 break; 971 } 972 PetscFunctionReturn(PETSC_SUCCESS); 973 } 974 975 static PetscErrorCode MatDiagonalScale_MPIDense(Mat A, Vec ll, Vec rr) 976 { 977 Mat_MPIDense *mdn = (Mat_MPIDense *)A->data; 978 const PetscScalar *l; 979 PetscScalar x, *v, *vv, *r; 980 PetscInt i, j, s2a, s3a, s2, s3, m = mdn->A->rmap->n, n = mdn->A->cmap->n, lda; 981 982 PetscFunctionBegin; 983 PetscCall(MatDenseGetArray(mdn->A, &vv)); 984 PetscCall(MatDenseGetLDA(mdn->A, &lda)); 985 PetscCall(MatGetLocalSize(A, &s2, &s3)); 986 if (ll) { 987 PetscCall(VecGetLocalSize(ll, &s2a)); 988 PetscCheck(s2a == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Left scaling vector non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT, s2a, s2); 989 PetscCall(VecGetArrayRead(ll, &l)); 990 for (i = 0; i < m; i++) { 991 x = l[i]; 992 v = vv + i; 993 for (j = 0; j < n; j++) { 994 (*v) *= x; 995 v += lda; 996 } 997 } 998 PetscCall(VecRestoreArrayRead(ll, &l)); 999 PetscCall(PetscLogFlops(1.0 * n * m)); 1000 } 1001 if (rr) { 1002 const PetscScalar *ar; 1003 1004 PetscCall(VecGetLocalSize(rr, &s3a)); 1005 PetscCheck(s3a == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Right scaling vec non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT ".", s3a, s3); 1006 PetscCall(VecGetArrayRead(rr, &ar)); 1007 if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A)); 1008 PetscCall(VecGetArray(mdn->lvec, &r)); 1009 PetscCall(PetscSFBcastBegin(mdn->Mvctx, MPIU_SCALAR, ar, r, MPI_REPLACE)); 1010 PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ar, r, MPI_REPLACE)); 1011 PetscCall(VecRestoreArrayRead(rr, &ar)); 1012 for (i = 0; i < n; i++) { 1013 x = r[i]; 1014 v = vv + i * lda; 1015 for (j = 0; j < m; j++) (*v++) *= x; 1016 } 1017 PetscCall(VecRestoreArray(mdn->lvec, &r)); 1018 PetscCall(PetscLogFlops(1.0 * n * m)); 1019 } 1020 PetscCall(MatDenseRestoreArray(mdn->A, &vv)); 1021 PetscFunctionReturn(PETSC_SUCCESS); 1022 } 1023 1024 static PetscErrorCode MatNorm_MPIDense(Mat A, NormType type, PetscReal *nrm) 1025 { 1026 Mat_MPIDense *mdn = (Mat_MPIDense *)A->data; 1027 PetscInt i, j; 1028 PetscMPIInt size; 1029 PetscReal sum = 0.0; 1030 const PetscScalar *av, *v; 1031 1032 PetscFunctionBegin; 1033 PetscCall(MatDenseGetArrayRead(mdn->A, &av)); 1034 v = av; 1035 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 1036 if (size == 1) { 1037 PetscCall(MatNorm(mdn->A, type, nrm)); 1038 } else { 1039 if (type == NORM_FROBENIUS) { 1040 for (i = 0; i < mdn->A->cmap->n * mdn->A->rmap->n; i++) { 1041 sum += PetscRealPart(PetscConj(*v) * (*v)); 1042 v++; 1043 } 1044 PetscCallMPI(MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A))); 1045 *nrm = PetscSqrtReal(*nrm); 1046 PetscCall(PetscLogFlops(2.0 * mdn->A->cmap->n * mdn->A->rmap->n)); 1047 } else if (type == NORM_1) { 1048 PetscReal *tmp; 1049 1050 PetscCall(PetscCalloc1(A->cmap->N, &tmp)); 1051 *nrm = 0.0; 1052 v = av; 1053 for (j = 0; j < mdn->A->cmap->n; j++) { 1054 for (i = 0; i < mdn->A->rmap->n; i++) { 1055 tmp[j] += PetscAbsScalar(*v); 1056 v++; 1057 } 1058 } 1059 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, tmp, A->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A))); 1060 for (j = 0; j < A->cmap->N; j++) { 1061 if (tmp[j] > *nrm) *nrm = tmp[j]; 1062 } 1063 PetscCall(PetscFree(tmp)); 1064 PetscCall(PetscLogFlops(A->cmap->n * A->rmap->n)); 1065 } else if (type == NORM_INFINITY) { /* max row norm */ 1066 PetscCall(MatNorm(mdn->A, type, nrm)); 1067 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, nrm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A))); 1068 } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "No support for two norm"); 1069 } 1070 PetscCall(MatDenseRestoreArrayRead(mdn->A, &av)); 1071 PetscFunctionReturn(PETSC_SUCCESS); 1072 } 1073 1074 static PetscErrorCode MatTranspose_MPIDense(Mat A, MatReuse reuse, Mat *matout) 1075 { 1076 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1077 Mat B; 1078 PetscInt M = A->rmap->N, N = A->cmap->N, m, n, *rwork, rstart = A->rmap->rstart; 1079 PetscInt j, i, lda; 1080 PetscScalar *v; 1081 1082 PetscFunctionBegin; 1083 if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout)); 1084 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) { 1085 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 1086 PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M)); 1087 PetscCall(MatSetType(B, ((PetscObject)A)->type_name)); 1088 PetscCall(MatMPIDenseSetPreallocation(B, NULL)); 1089 } else B = *matout; 1090 1091 m = a->A->rmap->n; 1092 n = a->A->cmap->n; 1093 PetscCall(MatDenseGetArrayRead(a->A, (const PetscScalar **)&v)); 1094 PetscCall(MatDenseGetLDA(a->A, &lda)); 1095 PetscCall(PetscMalloc1(m, &rwork)); 1096 for (i = 0; i < m; i++) rwork[i] = rstart + i; 1097 for (j = 0; j < n; j++) { 1098 PetscCall(MatSetValues(B, 1, &j, m, rwork, v, INSERT_VALUES)); 1099 v = PetscSafePointerPlusOffset(v, lda); 1100 } 1101 PetscCall(MatDenseRestoreArrayRead(a->A, (const PetscScalar **)&v)); 1102 PetscCall(PetscFree(rwork)); 1103 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 1104 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 1105 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 1106 *matout = B; 1107 } else { 1108 PetscCall(MatHeaderMerge(A, &B)); 1109 } 1110 PetscFunctionReturn(PETSC_SUCCESS); 1111 } 1112 1113 static PetscErrorCode MatDuplicate_MPIDense(Mat, MatDuplicateOption, Mat *); 1114 PETSC_INTERN PetscErrorCode MatScale_MPIDense(Mat, PetscScalar); 1115 1116 static PetscErrorCode MatSetUp_MPIDense(Mat A) 1117 { 1118 PetscFunctionBegin; 1119 PetscCall(PetscLayoutSetUp(A->rmap)); 1120 PetscCall(PetscLayoutSetUp(A->cmap)); 1121 if (!A->preallocated) PetscCall(MatMPIDenseSetPreallocation(A, NULL)); 1122 PetscFunctionReturn(PETSC_SUCCESS); 1123 } 1124 1125 static PetscErrorCode MatAXPY_MPIDense(Mat Y, PetscScalar alpha, Mat X, MatStructure str) 1126 { 1127 Mat_MPIDense *A = (Mat_MPIDense *)Y->data, *B = (Mat_MPIDense *)X->data; 1128 1129 PetscFunctionBegin; 1130 PetscCall(MatAXPY(A->A, alpha, B->A, str)); 1131 PetscFunctionReturn(PETSC_SUCCESS); 1132 } 1133 1134 static PetscErrorCode MatConjugate_MPIDense(Mat mat) 1135 { 1136 Mat_MPIDense *a = (Mat_MPIDense *)mat->data; 1137 1138 PetscFunctionBegin; 1139 PetscCall(MatConjugate(a->A)); 1140 PetscFunctionReturn(PETSC_SUCCESS); 1141 } 1142 1143 static PetscErrorCode MatRealPart_MPIDense(Mat A) 1144 { 1145 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1146 1147 PetscFunctionBegin; 1148 PetscCall(MatRealPart(a->A)); 1149 PetscFunctionReturn(PETSC_SUCCESS); 1150 } 1151 1152 static PetscErrorCode MatImaginaryPart_MPIDense(Mat A) 1153 { 1154 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1155 1156 PetscFunctionBegin; 1157 PetscCall(MatImaginaryPart(a->A)); 1158 PetscFunctionReturn(PETSC_SUCCESS); 1159 } 1160 1161 static PetscErrorCode MatGetColumnVector_MPIDense(Mat A, Vec v, PetscInt col) 1162 { 1163 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1164 1165 PetscFunctionBegin; 1166 PetscCheck(a->A, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Missing local matrix"); 1167 PetscCheck(a->A->ops->getcolumnvector, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Missing get column operation"); 1168 PetscCall((*a->A->ops->getcolumnvector)(a->A, v, col)); 1169 PetscFunctionReturn(PETSC_SUCCESS); 1170 } 1171 1172 PETSC_INTERN PetscErrorCode MatGetColumnReductions_SeqDense(Mat, PetscInt, PetscReal *); 1173 1174 static PetscErrorCode MatGetColumnReductions_MPIDense(Mat A, PetscInt type, PetscReal *reductions) 1175 { 1176 PetscInt i, m, n; 1177 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1178 1179 PetscFunctionBegin; 1180 PetscCall(MatGetSize(A, &m, &n)); 1181 if (type == REDUCTION_MEAN_REALPART) { 1182 PetscCall(MatGetColumnReductions_SeqDense(a->A, (PetscInt)REDUCTION_SUM_REALPART, reductions)); 1183 } else if (type == REDUCTION_MEAN_IMAGINARYPART) { 1184 PetscCall(MatGetColumnReductions_SeqDense(a->A, (PetscInt)REDUCTION_SUM_IMAGINARYPART, reductions)); 1185 } else { 1186 PetscCall(MatGetColumnReductions_SeqDense(a->A, type, reductions)); 1187 } 1188 if (type == NORM_2) { 1189 for (i = 0; i < n; i++) reductions[i] *= reductions[i]; 1190 } 1191 if (type == NORM_INFINITY) { 1192 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, reductions, n, MPIU_REAL, MPIU_MAX, A->hdr.comm)); 1193 } else { 1194 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, reductions, n, MPIU_REAL, MPIU_SUM, A->hdr.comm)); 1195 } 1196 if (type == NORM_2) { 1197 for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]); 1198 } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 1199 for (i = 0; i < n; i++) reductions[i] /= m; 1200 } 1201 PetscFunctionReturn(PETSC_SUCCESS); 1202 } 1203 1204 static PetscErrorCode MatSetRandom_MPIDense(Mat x, PetscRandom rctx) 1205 { 1206 Mat_MPIDense *d = (Mat_MPIDense *)x->data; 1207 1208 PetscFunctionBegin; 1209 PetscCall(MatSetRandom(d->A, rctx)); 1210 #if defined(PETSC_HAVE_DEVICE) 1211 x->offloadmask = d->A->offloadmask; 1212 #endif 1213 PetscFunctionReturn(PETSC_SUCCESS); 1214 } 1215 1216 static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A, PetscBool *missing, PetscInt *d) 1217 { 1218 PetscFunctionBegin; 1219 *missing = PETSC_FALSE; 1220 PetscFunctionReturn(PETSC_SUCCESS); 1221 } 1222 1223 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat, Mat, PetscReal, Mat); 1224 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat, Mat, Mat); 1225 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat, Mat, PetscReal, Mat); 1226 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat, Mat, Mat); 1227 static PetscErrorCode MatEqual_MPIDense(Mat, Mat, PetscBool *); 1228 static PetscErrorCode MatLoad_MPIDense(Mat, PetscViewer); 1229 static PetscErrorCode MatProductSetFromOptions_MPIDense(Mat); 1230 1231 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense, 1232 MatGetRow_MPIDense, 1233 MatRestoreRow_MPIDense, 1234 MatMult_MPIDense, 1235 /* 4*/ MatMultAdd_MPIDense, 1236 MatMultTranspose_MPIDense, 1237 MatMultTransposeAdd_MPIDense, 1238 NULL, 1239 NULL, 1240 NULL, 1241 /* 10*/ NULL, 1242 NULL, 1243 NULL, 1244 NULL, 1245 MatTranspose_MPIDense, 1246 /* 15*/ MatGetInfo_MPIDense, 1247 MatEqual_MPIDense, 1248 MatGetDiagonal_MPIDense, 1249 MatDiagonalScale_MPIDense, 1250 MatNorm_MPIDense, 1251 /* 20*/ MatAssemblyBegin_MPIDense, 1252 MatAssemblyEnd_MPIDense, 1253 MatSetOption_MPIDense, 1254 MatZeroEntries_MPIDense, 1255 /* 24*/ MatZeroRows_MPIDense, 1256 NULL, 1257 NULL, 1258 NULL, 1259 NULL, 1260 /* 29*/ MatSetUp_MPIDense, 1261 NULL, 1262 NULL, 1263 MatGetDiagonalBlock_MPIDense, 1264 NULL, 1265 /* 34*/ MatDuplicate_MPIDense, 1266 NULL, 1267 NULL, 1268 NULL, 1269 NULL, 1270 /* 39*/ MatAXPY_MPIDense, 1271 MatCreateSubMatrices_MPIDense, 1272 NULL, 1273 MatGetValues_MPIDense, 1274 MatCopy_MPIDense, 1275 /* 44*/ NULL, 1276 MatScale_MPIDense, 1277 MatShift_MPIDense, 1278 NULL, 1279 NULL, 1280 /* 49*/ MatSetRandom_MPIDense, 1281 NULL, 1282 NULL, 1283 NULL, 1284 NULL, 1285 /* 54*/ NULL, 1286 NULL, 1287 NULL, 1288 NULL, 1289 NULL, 1290 /* 59*/ MatCreateSubMatrix_MPIDense, 1291 MatDestroy_MPIDense, 1292 MatView_MPIDense, 1293 NULL, 1294 NULL, 1295 /* 64*/ NULL, 1296 NULL, 1297 NULL, 1298 NULL, 1299 NULL, 1300 /* 69*/ NULL, 1301 NULL, 1302 NULL, 1303 NULL, 1304 NULL, 1305 /* 74*/ NULL, 1306 NULL, 1307 NULL, 1308 NULL, 1309 NULL, 1310 /* 79*/ NULL, 1311 NULL, 1312 NULL, 1313 NULL, 1314 /* 83*/ MatLoad_MPIDense, 1315 NULL, 1316 NULL, 1317 NULL, 1318 NULL, 1319 NULL, 1320 /* 89*/ NULL, 1321 NULL, 1322 NULL, 1323 NULL, 1324 NULL, 1325 /* 94*/ NULL, 1326 NULL, 1327 MatMatTransposeMultSymbolic_MPIDense_MPIDense, 1328 MatMatTransposeMultNumeric_MPIDense_MPIDense, 1329 NULL, 1330 /* 99*/ MatProductSetFromOptions_MPIDense, 1331 NULL, 1332 NULL, 1333 MatConjugate_MPIDense, 1334 NULL, 1335 /*104*/ NULL, 1336 MatRealPart_MPIDense, 1337 MatImaginaryPart_MPIDense, 1338 NULL, 1339 NULL, 1340 /*109*/ NULL, 1341 NULL, 1342 NULL, 1343 MatGetColumnVector_MPIDense, 1344 MatMissingDiagonal_MPIDense, 1345 /*114*/ NULL, 1346 NULL, 1347 NULL, 1348 NULL, 1349 NULL, 1350 /*119*/ NULL, 1351 NULL, 1352 MatMultHermitianTranspose_MPIDense, 1353 MatMultHermitianTransposeAdd_MPIDense, 1354 NULL, 1355 /*124*/ NULL, 1356 MatGetColumnReductions_MPIDense, 1357 NULL, 1358 NULL, 1359 NULL, 1360 /*129*/ NULL, 1361 NULL, 1362 MatTransposeMatMultSymbolic_MPIDense_MPIDense, 1363 MatTransposeMatMultNumeric_MPIDense_MPIDense, 1364 NULL, 1365 /*134*/ NULL, 1366 NULL, 1367 NULL, 1368 NULL, 1369 NULL, 1370 /*139*/ NULL, 1371 NULL, 1372 NULL, 1373 NULL, 1374 NULL, 1375 MatCreateMPIMatConcatenateSeqMat_MPIDense, 1376 /*145*/ NULL, 1377 NULL, 1378 NULL, 1379 NULL, 1380 NULL, 1381 /*150*/ NULL, 1382 NULL, 1383 NULL, 1384 NULL, 1385 NULL, 1386 /*155*/ NULL, 1387 NULL}; 1388 1389 static PetscErrorCode MatMPIDenseSetPreallocation_MPIDense(Mat mat, PetscScalar *data) 1390 { 1391 Mat_MPIDense *a = (Mat_MPIDense *)mat->data; 1392 MatType mtype = MATSEQDENSE; 1393 1394 PetscFunctionBegin; 1395 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1396 PetscCall(PetscLayoutSetUp(mat->rmap)); 1397 PetscCall(PetscLayoutSetUp(mat->cmap)); 1398 if (!a->A) { 1399 PetscCall(MatCreate(PETSC_COMM_SELF, &a->A)); 1400 PetscCall(MatSetSizes(a->A, mat->rmap->n, mat->cmap->N, mat->rmap->n, mat->cmap->N)); 1401 } 1402 #if defined(PETSC_HAVE_CUDA) 1403 PetscBool iscuda; 1404 PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPIDENSECUDA, &iscuda)); 1405 if (iscuda) mtype = MATSEQDENSECUDA; 1406 #endif 1407 #if defined(PETSC_HAVE_HIP) 1408 PetscBool iship; 1409 PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPIDENSEHIP, &iship)); 1410 if (iship) mtype = MATSEQDENSEHIP; 1411 #endif 1412 PetscCall(MatSetType(a->A, mtype)); 1413 PetscCall(MatSeqDenseSetPreallocation(a->A, data)); 1414 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 1415 mat->offloadmask = a->A->offloadmask; 1416 #endif 1417 mat->preallocated = PETSC_TRUE; 1418 mat->assembled = PETSC_TRUE; 1419 PetscFunctionReturn(PETSC_SUCCESS); 1420 } 1421 1422 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIDense(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 1423 { 1424 Mat B, C; 1425 1426 PetscFunctionBegin; 1427 PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &C)); 1428 PetscCall(MatConvert_SeqAIJ_SeqDense(C, MATSEQDENSE, MAT_INITIAL_MATRIX, &B)); 1429 PetscCall(MatDestroy(&C)); 1430 if (reuse == MAT_REUSE_MATRIX) { 1431 C = *newmat; 1432 } else C = NULL; 1433 PetscCall(MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A), B, A->cmap->n, !C ? MAT_INITIAL_MATRIX : MAT_REUSE_MATRIX, &C)); 1434 PetscCall(MatDestroy(&B)); 1435 if (reuse == MAT_INPLACE_MATRIX) { 1436 PetscCall(MatHeaderReplace(A, &C)); 1437 } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C; 1438 PetscFunctionReturn(PETSC_SUCCESS); 1439 } 1440 1441 static PetscErrorCode MatConvert_MPIDense_MPIAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 1442 { 1443 Mat B, C; 1444 1445 PetscFunctionBegin; 1446 PetscCall(MatDenseGetLocalMatrix(A, &C)); 1447 PetscCall(MatConvert_SeqDense_SeqAIJ(C, MATSEQAIJ, MAT_INITIAL_MATRIX, &B)); 1448 if (reuse == MAT_REUSE_MATRIX) { 1449 C = *newmat; 1450 } else C = NULL; 1451 PetscCall(MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A), B, A->cmap->n, !C ? MAT_INITIAL_MATRIX : MAT_REUSE_MATRIX, &C)); 1452 PetscCall(MatDestroy(&B)); 1453 if (reuse == MAT_INPLACE_MATRIX) { 1454 PetscCall(MatHeaderReplace(A, &C)); 1455 } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C; 1456 PetscFunctionReturn(PETSC_SUCCESS); 1457 } 1458 1459 #if defined(PETSC_HAVE_ELEMENTAL) 1460 PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 1461 { 1462 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1463 Mat mat_elemental; 1464 PetscScalar *v; 1465 PetscInt m = A->rmap->n, N = A->cmap->N, rstart = A->rmap->rstart, i, *rows, *cols, lda; 1466 1467 PetscFunctionBegin; 1468 if (reuse == MAT_REUSE_MATRIX) { 1469 mat_elemental = *newmat; 1470 PetscCall(MatZeroEntries(*newmat)); 1471 } else { 1472 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental)); 1473 PetscCall(MatSetSizes(mat_elemental, PETSC_DECIDE, PETSC_DECIDE, A->rmap->N, A->cmap->N)); 1474 PetscCall(MatSetType(mat_elemental, MATELEMENTAL)); 1475 PetscCall(MatSetUp(mat_elemental)); 1476 PetscCall(MatSetOption(mat_elemental, MAT_ROW_ORIENTED, PETSC_FALSE)); 1477 } 1478 1479 PetscCall(PetscMalloc2(m, &rows, N, &cols)); 1480 for (i = 0; i < N; i++) cols[i] = i; 1481 for (i = 0; i < m; i++) rows[i] = rstart + i; 1482 1483 /* PETSc-Elemental interface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */ 1484 PetscCall(MatDenseGetArray(A, &v)); 1485 PetscCall(MatDenseGetLDA(a->A, &lda)); 1486 if (lda == m) PetscCall(MatSetValues(mat_elemental, m, rows, N, cols, v, ADD_VALUES)); 1487 else { 1488 for (i = 0; i < N; i++) PetscCall(MatSetValues(mat_elemental, m, rows, 1, &i, v + lda * i, ADD_VALUES)); 1489 } 1490 PetscCall(MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY)); 1491 PetscCall(MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY)); 1492 PetscCall(MatDenseRestoreArray(A, &v)); 1493 PetscCall(PetscFree2(rows, cols)); 1494 1495 if (reuse == MAT_INPLACE_MATRIX) { 1496 PetscCall(MatHeaderReplace(A, &mat_elemental)); 1497 } else { 1498 *newmat = mat_elemental; 1499 } 1500 PetscFunctionReturn(PETSC_SUCCESS); 1501 } 1502 #endif 1503 1504 static PetscErrorCode MatDenseGetColumn_MPIDense(Mat A, PetscInt col, PetscScalar **vals) 1505 { 1506 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 1507 1508 PetscFunctionBegin; 1509 PetscCall(MatDenseGetColumn(mat->A, col, vals)); 1510 PetscFunctionReturn(PETSC_SUCCESS); 1511 } 1512 1513 static PetscErrorCode MatDenseRestoreColumn_MPIDense(Mat A, PetscScalar **vals) 1514 { 1515 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 1516 1517 PetscFunctionBegin; 1518 PetscCall(MatDenseRestoreColumn(mat->A, vals)); 1519 PetscFunctionReturn(PETSC_SUCCESS); 1520 } 1521 1522 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIDense(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat) 1523 { 1524 Mat_MPIDense *mat; 1525 PetscInt m, nloc, N; 1526 1527 PetscFunctionBegin; 1528 PetscCall(MatGetSize(inmat, &m, &N)); 1529 PetscCall(MatGetLocalSize(inmat, NULL, &nloc)); 1530 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 1531 PetscInt sum; 1532 1533 if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnership(comm, &n, &N)); 1534 /* Check sum(n) = N */ 1535 PetscCallMPI(MPIU_Allreduce(&n, &sum, 1, MPIU_INT, MPI_SUM, comm)); 1536 PetscCheck(sum == N, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT, sum, N); 1537 1538 PetscCall(MatCreateDense(comm, m, n, PETSC_DETERMINE, N, NULL, outmat)); 1539 PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 1540 } 1541 1542 /* numeric phase */ 1543 mat = (Mat_MPIDense *)(*outmat)->data; 1544 PetscCall(MatCopy(inmat, mat->A, SAME_NONZERO_PATTERN)); 1545 PetscFunctionReturn(PETSC_SUCCESS); 1546 } 1547 1548 PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat A, PetscInt col, Vec *v) 1549 { 1550 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1551 PetscInt lda; 1552 1553 PetscFunctionBegin; 1554 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1555 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1556 if (!a->cvec) PetscCall(MatDenseCreateColumnVec_Private(A, &a->cvec)); 1557 a->vecinuse = col + 1; 1558 PetscCall(MatDenseGetLDA(a->A, &lda)); 1559 PetscCall(MatDenseGetArray(a->A, (PetscScalar **)&a->ptrinuse)); 1560 PetscCall(VecPlaceArray(a->cvec, PetscSafePointerPlusOffset(a->ptrinuse, (size_t)col * (size_t)lda))); 1561 *v = a->cvec; 1562 PetscFunctionReturn(PETSC_SUCCESS); 1563 } 1564 1565 PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat A, PetscInt col, Vec *v) 1566 { 1567 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1568 1569 PetscFunctionBegin; 1570 PetscCheck(a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first"); 1571 PetscCheck(a->cvec, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing internal column vector"); 1572 VecCheckAssembled(a->cvec); 1573 a->vecinuse = 0; 1574 PetscCall(MatDenseRestoreArray(a->A, (PetscScalar **)&a->ptrinuse)); 1575 PetscCall(VecResetArray(a->cvec)); 1576 if (v) *v = NULL; 1577 PetscFunctionReturn(PETSC_SUCCESS); 1578 } 1579 1580 PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat A, PetscInt col, Vec *v) 1581 { 1582 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1583 PetscInt lda; 1584 1585 PetscFunctionBegin; 1586 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1587 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1588 if (!a->cvec) PetscCall(MatDenseCreateColumnVec_Private(A, &a->cvec)); 1589 a->vecinuse = col + 1; 1590 PetscCall(MatDenseGetLDA(a->A, &lda)); 1591 PetscCall(MatDenseGetArrayRead(a->A, &a->ptrinuse)); 1592 PetscCall(VecPlaceArray(a->cvec, PetscSafePointerPlusOffset(a->ptrinuse, (size_t)col * (size_t)lda))); 1593 PetscCall(VecLockReadPush(a->cvec)); 1594 *v = a->cvec; 1595 PetscFunctionReturn(PETSC_SUCCESS); 1596 } 1597 1598 PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat A, PetscInt col, Vec *v) 1599 { 1600 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1601 1602 PetscFunctionBegin; 1603 PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first"); 1604 PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector"); 1605 VecCheckAssembled(a->cvec); 1606 a->vecinuse = 0; 1607 PetscCall(MatDenseRestoreArrayRead(a->A, &a->ptrinuse)); 1608 PetscCall(VecLockReadPop(a->cvec)); 1609 PetscCall(VecResetArray(a->cvec)); 1610 if (v) *v = NULL; 1611 PetscFunctionReturn(PETSC_SUCCESS); 1612 } 1613 1614 PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat A, PetscInt col, Vec *v) 1615 { 1616 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1617 PetscInt lda; 1618 1619 PetscFunctionBegin; 1620 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1621 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1622 if (!a->cvec) PetscCall(MatDenseCreateColumnVec_Private(A, &a->cvec)); 1623 a->vecinuse = col + 1; 1624 PetscCall(MatDenseGetLDA(a->A, &lda)); 1625 PetscCall(MatDenseGetArrayWrite(a->A, (PetscScalar **)&a->ptrinuse)); 1626 PetscCall(VecPlaceArray(a->cvec, PetscSafePointerPlusOffset(a->ptrinuse, (size_t)col * (size_t)lda))); 1627 *v = a->cvec; 1628 PetscFunctionReturn(PETSC_SUCCESS); 1629 } 1630 1631 PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat A, PetscInt col, Vec *v) 1632 { 1633 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1634 1635 PetscFunctionBegin; 1636 PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first"); 1637 PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector"); 1638 VecCheckAssembled(a->cvec); 1639 a->vecinuse = 0; 1640 PetscCall(MatDenseRestoreArrayWrite(a->A, (PetscScalar **)&a->ptrinuse)); 1641 PetscCall(VecResetArray(a->cvec)); 1642 if (v) *v = NULL; 1643 PetscFunctionReturn(PETSC_SUCCESS); 1644 } 1645 1646 static PetscErrorCode MatDenseGetSubMatrix_MPIDense(Mat A, PetscInt rbegin, PetscInt rend, PetscInt cbegin, PetscInt cend, Mat *v) 1647 { 1648 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1649 Mat_MPIDense *c; 1650 MPI_Comm comm; 1651 PetscInt prbegin, prend, pcbegin, pcend; 1652 1653 PetscFunctionBegin; 1654 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 1655 PetscCheck(!a->vecinuse, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1656 PetscCheck(!a->matinuse, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1657 prbegin = PetscMax(0, PetscMin(A->rmap->rend, rbegin) - A->rmap->rstart); 1658 prend = PetscMin(A->rmap->n, PetscMax(0, rend - A->rmap->rstart)); 1659 pcbegin = PetscMax(0, PetscMin(A->cmap->rend, cbegin) - A->cmap->rstart); 1660 pcend = PetscMin(A->cmap->n, PetscMax(0, cend - A->cmap->rstart)); 1661 if (!a->cmat) { 1662 PetscCall(MatCreate(comm, &a->cmat)); 1663 PetscCall(MatSetType(a->cmat, ((PetscObject)A)->type_name)); 1664 if (rend - rbegin == A->rmap->N) PetscCall(PetscLayoutReference(A->rmap, &a->cmat->rmap)); 1665 else { 1666 PetscCall(PetscLayoutSetLocalSize(a->cmat->rmap, prend - prbegin)); 1667 PetscCall(PetscLayoutSetSize(a->cmat->rmap, rend - rbegin)); 1668 PetscCall(PetscLayoutSetUp(a->cmat->rmap)); 1669 } 1670 if (cend - cbegin == A->cmap->N) PetscCall(PetscLayoutReference(A->cmap, &a->cmat->cmap)); 1671 else { 1672 PetscCall(PetscLayoutSetLocalSize(a->cmat->cmap, pcend - pcbegin)); 1673 PetscCall(PetscLayoutSetSize(a->cmat->cmap, cend - cbegin)); 1674 PetscCall(PetscLayoutSetUp(a->cmat->cmap)); 1675 } 1676 c = (Mat_MPIDense *)a->cmat->data; 1677 c->sub_rbegin = rbegin; 1678 c->sub_rend = rend; 1679 c->sub_cbegin = cbegin; 1680 c->sub_cend = cend; 1681 } 1682 c = (Mat_MPIDense *)a->cmat->data; 1683 if (c->sub_rbegin != rbegin || c->sub_rend != rend) { 1684 PetscCall(PetscLayoutDestroy(&a->cmat->rmap)); 1685 PetscCall(PetscLayoutCreate(comm, &a->cmat->rmap)); 1686 PetscCall(PetscLayoutSetLocalSize(a->cmat->rmap, prend - prbegin)); 1687 PetscCall(PetscLayoutSetSize(a->cmat->rmap, rend - rbegin)); 1688 PetscCall(PetscLayoutSetUp(a->cmat->rmap)); 1689 c->sub_rbegin = rbegin; 1690 c->sub_rend = rend; 1691 } 1692 if (c->sub_cbegin != cbegin || c->sub_cend != cend) { 1693 // special optimization: check if all columns are owned by rank 0, in which case no communication is necessary 1694 if ((cend - cbegin != a->cmat->cmap->N) || (A->cmap->range[1] != A->cmap->N)) { 1695 PetscCall(PetscLayoutDestroy(&a->cmat->cmap)); 1696 PetscCall(PetscLayoutCreate(comm, &a->cmat->cmap)); 1697 PetscCall(PetscLayoutSetLocalSize(a->cmat->cmap, pcend - pcbegin)); 1698 PetscCall(PetscLayoutSetSize(a->cmat->cmap, cend - cbegin)); 1699 PetscCall(PetscLayoutSetUp(a->cmat->cmap)); 1700 PetscCall(VecDestroy(&c->lvec)); 1701 PetscCall(PetscSFDestroy(&c->Mvctx)); 1702 } 1703 c->sub_cbegin = cbegin; 1704 c->sub_cend = cend; 1705 } 1706 PetscCheck(!c->A, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1707 PetscCall(MatDenseGetSubMatrix(a->A, prbegin, prend, cbegin, cend, &c->A)); 1708 1709 a->cmat->preallocated = PETSC_TRUE; 1710 a->cmat->assembled = PETSC_TRUE; 1711 #if defined(PETSC_HAVE_DEVICE) 1712 a->cmat->offloadmask = c->A->offloadmask; 1713 #endif 1714 a->matinuse = cbegin + 1; 1715 *v = a->cmat; 1716 PetscFunctionReturn(PETSC_SUCCESS); 1717 } 1718 1719 static PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat A, Mat *v) 1720 { 1721 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1722 Mat_MPIDense *c; 1723 1724 PetscFunctionBegin; 1725 PetscCheck(a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetSubMatrix() first"); 1726 PetscCheck(a->cmat, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal matrix"); 1727 PetscCheck(*v == a->cmat, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Not the matrix obtained from MatDenseGetSubMatrix()"); 1728 a->matinuse = 0; 1729 c = (Mat_MPIDense *)a->cmat->data; 1730 PetscCall(MatDenseRestoreSubMatrix(a->A, &c->A)); 1731 if (v) *v = NULL; 1732 #if defined(PETSC_HAVE_DEVICE) 1733 A->offloadmask = a->A->offloadmask; 1734 #endif 1735 PetscFunctionReturn(PETSC_SUCCESS); 1736 } 1737 1738 /*MC 1739 MATMPIDENSE - MATMPIDENSE = "mpidense" - A matrix type to be used for distributed dense matrices. 1740 1741 Options Database Key: 1742 . -mat_type mpidense - sets the matrix type to `MATMPIDENSE` during a call to `MatSetFromOptions()` 1743 1744 Level: beginner 1745 1746 .seealso: [](ch_matrices), `Mat`, `MatCreateDense()`, `MATSEQDENSE`, `MATDENSE` 1747 M*/ 1748 PetscErrorCode MatCreate_MPIDense(Mat mat) 1749 { 1750 Mat_MPIDense *a; 1751 1752 PetscFunctionBegin; 1753 PetscCall(PetscNew(&a)); 1754 mat->data = (void *)a; 1755 mat->ops[0] = MatOps_Values; 1756 1757 mat->insertmode = NOT_SET_VALUES; 1758 1759 /* build cache for off array entries formed */ 1760 a->donotstash = PETSC_FALSE; 1761 1762 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)mat), 1, &mat->stash)); 1763 1764 /* stuff used for matrix vector multiply */ 1765 a->lvec = NULL; 1766 a->Mvctx = NULL; 1767 a->roworiented = PETSC_TRUE; 1768 1769 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetLDA_C", MatDenseGetLDA_MPIDense)); 1770 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseSetLDA_C", MatDenseSetLDA_MPIDense)); 1771 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArray_C", MatDenseGetArray_MPIDense)); 1772 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArray_C", MatDenseRestoreArray_MPIDense)); 1773 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayRead_C", MatDenseGetArrayRead_MPIDense)); 1774 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayRead_C", MatDenseRestoreArrayRead_MPIDense)); 1775 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayWrite_C", MatDenseGetArrayWrite_MPIDense)); 1776 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayWrite_C", MatDenseRestoreArrayWrite_MPIDense)); 1777 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDensePlaceArray_C", MatDensePlaceArray_MPIDense)); 1778 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseResetArray_C", MatDenseResetArray_MPIDense)); 1779 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseReplaceArray_C", MatDenseReplaceArray_MPIDense)); 1780 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", MatDenseGetColumnVec_MPIDense)); 1781 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", MatDenseRestoreColumnVec_MPIDense)); 1782 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", MatDenseGetColumnVecRead_MPIDense)); 1783 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", MatDenseRestoreColumnVecRead_MPIDense)); 1784 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", MatDenseGetColumnVecWrite_MPIDense)); 1785 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", MatDenseRestoreColumnVecWrite_MPIDense)); 1786 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetSubMatrix_C", MatDenseGetSubMatrix_MPIDense)); 1787 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreSubMatrix_C", MatDenseRestoreSubMatrix_MPIDense)); 1788 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", MatConvert_MPIAIJ_MPIDense)); 1789 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpiaij_C", MatConvert_MPIDense_MPIAIJ)); 1790 #if defined(PETSC_HAVE_ELEMENTAL) 1791 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_elemental_C", MatConvert_MPIDense_Elemental)); 1792 #endif 1793 #if defined(PETSC_HAVE_SCALAPACK) 1794 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_scalapack_C", MatConvert_Dense_ScaLAPACK)); 1795 #endif 1796 #if defined(PETSC_HAVE_CUDA) 1797 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensecuda_C", MatConvert_MPIDense_MPIDenseCUDA)); 1798 #endif 1799 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIDenseSetPreallocation_C", MatMPIDenseSetPreallocation_MPIDense)); 1800 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense)); 1801 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaij_C", MatProductSetFromOptions_MPIDense_MPIAIJ)); 1802 #if defined(PETSC_HAVE_CUDA) 1803 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense)); 1804 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijcusparse_C", MatProductSetFromOptions_MPIDense_MPIAIJ)); 1805 #endif 1806 #if defined(PETSC_HAVE_HIP) 1807 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensehip_C", MatConvert_MPIDense_MPIDenseHIP)); 1808 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense)); 1809 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijhipsparse_C", MatProductSetFromOptions_MPIDense_MPIAIJ)); 1810 #endif 1811 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumn_C", MatDenseGetColumn_MPIDense)); 1812 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumn_C", MatDenseRestoreColumn_MPIDense)); 1813 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultColumnRange_C", MatMultColumnRange_MPIDense)); 1814 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultAddColumnRange_C", MatMultAddColumnRange_MPIDense)); 1815 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultHermitianTransposeColumnRange_C", MatMultHermitianTransposeColumnRange_MPIDense)); 1816 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultHermitianTransposeAddColumnRange_C", MatMultHermitianTransposeAddColumnRange_MPIDense)); 1817 PetscCall(PetscObjectChangeTypeName((PetscObject)mat, MATMPIDENSE)); 1818 PetscFunctionReturn(PETSC_SUCCESS); 1819 } 1820 1821 /*MC 1822 MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices. 1823 1824 This matrix type is identical to `MATSEQDENSE` when constructed with a single process communicator, 1825 and `MATMPIDENSE` otherwise. 1826 1827 Options Database Key: 1828 . -mat_type dense - sets the matrix type to `MATDENSE` during a call to `MatSetFromOptions()` 1829 1830 Level: beginner 1831 1832 .seealso: [](ch_matrices), `Mat`, `MATSEQDENSE`, `MATMPIDENSE`, `MATDENSECUDA`, `MATDENSEHIP` 1833 M*/ 1834 1835 /*@ 1836 MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries 1837 1838 Collective 1839 1840 Input Parameters: 1841 + B - the matrix 1842 - data - optional location of matrix data. Set to `NULL` for PETSc 1843 to control all matrix memory allocation. 1844 1845 Level: intermediate 1846 1847 Notes: 1848 The dense format is fully compatible with standard Fortran 1849 storage by columns. 1850 1851 The data input variable is intended primarily for Fortran programmers 1852 who wish to allocate their own matrix memory space. Most users should 1853 set `data` to `NULL`. 1854 1855 .seealso: [](ch_matrices), `Mat`, `MATMPIDENSE`, `MatCreate()`, `MatCreateSeqDense()`, `MatSetValues()` 1856 @*/ 1857 PetscErrorCode MatMPIDenseSetPreallocation(Mat B, PetscScalar *data) 1858 { 1859 PetscFunctionBegin; 1860 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 1861 PetscTryMethod(B, "MatMPIDenseSetPreallocation_C", (Mat, PetscScalar *), (B, data)); 1862 PetscFunctionReturn(PETSC_SUCCESS); 1863 } 1864 1865 /*@ 1866 MatDensePlaceArray - Allows one to replace the array in a `MATDENSE` matrix with an 1867 array provided by the user. This is useful to avoid copying an array 1868 into a matrix 1869 1870 Not Collective 1871 1872 Input Parameters: 1873 + mat - the matrix 1874 - array - the array in column major order 1875 1876 Level: developer 1877 1878 Note: 1879 Adding `const` to `array` was an oversight, see notes in `VecPlaceArray()`. 1880 1881 You can return to the original array with a call to `MatDenseResetArray()`. The user is responsible for freeing this array; it will not be 1882 freed when the matrix is destroyed. 1883 1884 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseGetArray()`, `MatDenseResetArray()`, `VecPlaceArray()`, `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`, `VecResetArray()`, 1885 `MatDenseReplaceArray()` 1886 @*/ 1887 PetscErrorCode MatDensePlaceArray(Mat mat, const PetscScalar *array) 1888 { 1889 PetscFunctionBegin; 1890 PetscValidHeaderSpecific(mat, MAT_CLASSID, 1); 1891 PetscUseMethod(mat, "MatDensePlaceArray_C", (Mat, const PetscScalar *), (mat, array)); 1892 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 1893 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 1894 mat->offloadmask = PETSC_OFFLOAD_CPU; 1895 #endif 1896 PetscFunctionReturn(PETSC_SUCCESS); 1897 } 1898 1899 /*@ 1900 MatDenseResetArray - Resets the matrix array to that it previously had before the call to `MatDensePlaceArray()` 1901 1902 Not Collective 1903 1904 Input Parameter: 1905 . mat - the matrix 1906 1907 Level: developer 1908 1909 Note: 1910 You can only call this after a call to `MatDensePlaceArray()` 1911 1912 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseGetArray()`, `MatDensePlaceArray()`, `VecPlaceArray()`, `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`, `VecResetArray()` 1913 @*/ 1914 PetscErrorCode MatDenseResetArray(Mat mat) 1915 { 1916 PetscFunctionBegin; 1917 PetscValidHeaderSpecific(mat, MAT_CLASSID, 1); 1918 PetscUseMethod(mat, "MatDenseResetArray_C", (Mat), (mat)); 1919 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 1920 PetscFunctionReturn(PETSC_SUCCESS); 1921 } 1922 1923 /*@ 1924 MatDenseReplaceArray - Allows one to replace the array in a dense matrix with an 1925 array provided by the user. This is useful to avoid copying an array 1926 into a matrix 1927 1928 Not Collective 1929 1930 Input Parameters: 1931 + mat - the matrix 1932 - array - the array in column major order 1933 1934 Level: developer 1935 1936 Note: 1937 Adding `const` to `array` was an oversight, see notes in `VecPlaceArray()`. 1938 1939 The memory passed in MUST be obtained with `PetscMalloc()` and CANNOT be 1940 freed by the user. It will be freed when the matrix is destroyed. 1941 1942 .seealso: [](ch_matrices), `Mat`, `MatDensePlaceArray()`, `MatDenseGetArray()`, `VecReplaceArray()` 1943 @*/ 1944 PetscErrorCode MatDenseReplaceArray(Mat mat, const PetscScalar *array) 1945 { 1946 PetscFunctionBegin; 1947 PetscValidHeaderSpecific(mat, MAT_CLASSID, 1); 1948 PetscUseMethod(mat, "MatDenseReplaceArray_C", (Mat, const PetscScalar *), (mat, array)); 1949 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 1950 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 1951 mat->offloadmask = PETSC_OFFLOAD_CPU; 1952 #endif 1953 PetscFunctionReturn(PETSC_SUCCESS); 1954 } 1955 1956 /*@ 1957 MatCreateDense - Creates a matrix in `MATDENSE` format. 1958 1959 Collective 1960 1961 Input Parameters: 1962 + comm - MPI communicator 1963 . m - number of local rows (or `PETSC_DECIDE` to have calculated if `M` is given) 1964 . n - number of local columns (or `PETSC_DECIDE` to have calculated if `N` is given) 1965 . M - number of global rows (or `PETSC_DECIDE` to have calculated if `m` is given) 1966 . N - number of global columns (or `PETSC_DECIDE` to have calculated if `n` is given) 1967 - data - optional location of matrix data. Set data to `NULL` (`PETSC_NULL_SCALAR` for Fortran users) for PETSc 1968 to control all matrix memory allocation. 1969 1970 Output Parameter: 1971 . A - the matrix 1972 1973 Level: intermediate 1974 1975 Notes: 1976 The dense format is fully compatible with standard Fortran 1977 storage by columns. 1978 1979 Although local portions of the matrix are stored in column-major 1980 order, the matrix is partitioned across MPI ranks by row. 1981 1982 The data input variable is intended primarily for Fortran programmers 1983 who wish to allocate their own matrix memory space. Most users should 1984 set `data` to `NULL` (`PETSC_NULL_SCALAR` for Fortran users). 1985 1986 The user MUST specify either the local or global matrix dimensions 1987 (possibly both). 1988 1989 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatCreate()`, `MatCreateSeqDense()`, `MatSetValues()` 1990 @*/ 1991 PetscErrorCode MatCreateDense(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscScalar data[], Mat *A) 1992 { 1993 PetscFunctionBegin; 1994 PetscCall(MatCreate(comm, A)); 1995 PetscCall(MatSetSizes(*A, m, n, M, N)); 1996 PetscCall(MatSetType(*A, MATDENSE)); 1997 PetscCall(MatSeqDenseSetPreallocation(*A, data)); 1998 PetscCall(MatMPIDenseSetPreallocation(*A, data)); 1999 PetscFunctionReturn(PETSC_SUCCESS); 2000 } 2001 2002 static PetscErrorCode MatDuplicate_MPIDense(Mat A, MatDuplicateOption cpvalues, Mat *newmat) 2003 { 2004 Mat mat; 2005 Mat_MPIDense *a, *oldmat = (Mat_MPIDense *)A->data; 2006 2007 PetscFunctionBegin; 2008 *newmat = NULL; 2009 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat)); 2010 PetscCall(MatSetSizes(mat, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 2011 PetscCall(MatSetType(mat, ((PetscObject)A)->type_name)); 2012 a = (Mat_MPIDense *)mat->data; 2013 2014 mat->factortype = A->factortype; 2015 mat->assembled = PETSC_TRUE; 2016 mat->preallocated = PETSC_TRUE; 2017 2018 mat->insertmode = NOT_SET_VALUES; 2019 a->donotstash = oldmat->donotstash; 2020 2021 PetscCall(PetscLayoutReference(A->rmap, &mat->rmap)); 2022 PetscCall(PetscLayoutReference(A->cmap, &mat->cmap)); 2023 2024 PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A)); 2025 2026 *newmat = mat; 2027 PetscFunctionReturn(PETSC_SUCCESS); 2028 } 2029 2030 static PetscErrorCode MatLoad_MPIDense(Mat newMat, PetscViewer viewer) 2031 { 2032 PetscBool isbinary; 2033 #if defined(PETSC_HAVE_HDF5) 2034 PetscBool ishdf5; 2035 #endif 2036 2037 PetscFunctionBegin; 2038 PetscValidHeaderSpecific(newMat, MAT_CLASSID, 1); 2039 PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2); 2040 /* force binary viewer to load .info file if it has not yet done so */ 2041 PetscCall(PetscViewerSetUp(viewer)); 2042 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 2043 #if defined(PETSC_HAVE_HDF5) 2044 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5)); 2045 #endif 2046 if (isbinary) { 2047 PetscCall(MatLoad_Dense_Binary(newMat, viewer)); 2048 #if defined(PETSC_HAVE_HDF5) 2049 } else if (ishdf5) { 2050 PetscCall(MatLoad_Dense_HDF5(newMat, viewer)); 2051 #endif 2052 } else SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name); 2053 PetscFunctionReturn(PETSC_SUCCESS); 2054 } 2055 2056 static PetscErrorCode MatEqual_MPIDense(Mat A, Mat B, PetscBool *flag) 2057 { 2058 Mat_MPIDense *matB = (Mat_MPIDense *)B->data, *matA = (Mat_MPIDense *)A->data; 2059 Mat a, b; 2060 2061 PetscFunctionBegin; 2062 a = matA->A; 2063 b = matB->A; 2064 PetscCall(MatEqual(a, b, flag)); 2065 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A))); 2066 PetscFunctionReturn(PETSC_SUCCESS); 2067 } 2068 2069 static PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(void *data) 2070 { 2071 Mat_TransMatMultDense *atb = (Mat_TransMatMultDense *)data; 2072 2073 PetscFunctionBegin; 2074 PetscCall(PetscFree2(atb->sendbuf, atb->recvcounts)); 2075 PetscCall(MatDestroy(&atb->atb)); 2076 PetscCall(PetscFree(atb)); 2077 PetscFunctionReturn(PETSC_SUCCESS); 2078 } 2079 2080 static PetscErrorCode MatDestroy_MatMatTransMult_MPIDense_MPIDense(void *data) 2081 { 2082 Mat_MatTransMultDense *abt = (Mat_MatTransMultDense *)data; 2083 2084 PetscFunctionBegin; 2085 PetscCall(PetscFree2(abt->buf[0], abt->buf[1])); 2086 PetscCall(PetscFree2(abt->recvcounts, abt->recvdispls)); 2087 PetscCall(PetscFree(abt)); 2088 PetscFunctionReturn(PETSC_SUCCESS); 2089 } 2090 2091 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C) 2092 { 2093 Mat_MPIDense *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data; 2094 Mat_TransMatMultDense *atb; 2095 MPI_Comm comm; 2096 PetscMPIInt size, *recvcounts; 2097 PetscScalar *carray, *sendbuf; 2098 const PetscScalar *atbarray; 2099 PetscInt i, cN = C->cmap->N, proc, k, j, lda; 2100 const PetscInt *ranges; 2101 2102 PetscFunctionBegin; 2103 MatCheckProduct(C, 3); 2104 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 2105 atb = (Mat_TransMatMultDense *)C->product->data; 2106 recvcounts = atb->recvcounts; 2107 sendbuf = atb->sendbuf; 2108 2109 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 2110 PetscCallMPI(MPI_Comm_size(comm, &size)); 2111 2112 /* compute atbarray = aseq^T * bseq */ 2113 PetscCall(MatTransposeMatMult(a->A, b->A, atb->atb ? MAT_REUSE_MATRIX : MAT_INITIAL_MATRIX, PETSC_DETERMINE, &atb->atb)); 2114 2115 PetscCall(MatGetOwnershipRanges(C, &ranges)); 2116 2117 if (ranges[1] == C->rmap->N) { 2118 /* all of the values are being reduced to rank 0: optimize this case to use MPI_Reduce and GPU aware MPI if available */ 2119 PetscInt atb_lda, c_lda; 2120 Mat atb_local = atb->atb; 2121 Mat atb_alloc = NULL; 2122 Mat c_local = c->A; 2123 Mat c_alloc = NULL; 2124 PetscMemType atb_memtype, c_memtype; 2125 const PetscScalar *atb_array = NULL; 2126 MPI_Datatype vector_type; 2127 PetscScalar *c_array = NULL; 2128 PetscMPIInt rank; 2129 2130 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 2131 2132 PetscCall(MatDenseGetLDA(atb_local, &atb_lda)); 2133 if (atb_lda != C->rmap->N) { 2134 // copy atb to a matrix that will have lda == the number of rows 2135 PetscCall(MatDuplicate(atb_local, MAT_DO_NOT_COPY_VALUES, &atb_alloc)); 2136 PetscCall(MatCopy(atb_local, atb_alloc, DIFFERENT_NONZERO_PATTERN)); 2137 atb_local = atb_alloc; 2138 } 2139 2140 if (rank == 0) { 2141 PetscCall(MatDenseGetLDA(c_local, &c_lda)); 2142 if (c_lda != C->rmap->N) { 2143 // copy c to a matrix that will have lda == the number of rows 2144 PetscCall(MatDuplicate(c_local, MAT_DO_NOT_COPY_VALUES, &c_alloc)); 2145 c_local = c_alloc; 2146 } 2147 PetscCall(MatZeroEntries(c_local)); 2148 } 2149 /* atb_local and c_local have nrows = lda = A->cmap->N and ncols = 2150 * B->cmap->N: use the a->Mvctx to use the best reduction method */ 2151 if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A)); 2152 vector_type = MPIU_SCALAR; 2153 if (B->cmap->N > 1) { 2154 PetscCallMPI(MPI_Type_contiguous(B->cmap->N, MPIU_SCALAR, &vector_type)); 2155 PetscCallMPI(MPI_Type_commit(&vector_type)); 2156 } 2157 PetscCall(MatDenseGetArrayReadAndMemType(atb_local, &atb_array, &atb_memtype)); 2158 PetscCall(MatDenseGetArrayWriteAndMemType(c_local, &c_array, &c_memtype)); 2159 PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, vector_type, atb_memtype, atb_array, c_memtype, c_array, MPIU_SUM)); 2160 PetscCall(PetscSFReduceEnd(a->Mvctx, vector_type, atb_array, c_array, MPIU_SUM)); 2161 PetscCall(MatDenseRestoreArrayWriteAndMemType(c_local, &c_array)); 2162 PetscCall(MatDenseRestoreArrayReadAndMemType(atb_local, &atb_array)); 2163 if (rank == 0 && c_local != c->A) PetscCall(MatCopy(c_local, c->A, DIFFERENT_NONZERO_PATTERN)); 2164 if (B->cmap->N > 1) PetscCallMPI(MPI_Type_free(&vector_type)); 2165 PetscCall(MatDestroy(&atb_alloc)); 2166 PetscCall(MatDestroy(&c_alloc)); 2167 PetscCall(MatSetOption(C, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 2168 PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY)); 2169 PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY)); 2170 PetscFunctionReturn(PETSC_SUCCESS); 2171 } 2172 2173 /* arrange atbarray into sendbuf */ 2174 PetscCall(MatDenseGetArrayRead(atb->atb, &atbarray)); 2175 PetscCall(MatDenseGetLDA(atb->atb, &lda)); 2176 for (proc = 0, k = 0; proc < size; proc++) { 2177 for (j = 0; j < cN; j++) { 2178 for (i = ranges[proc]; i < ranges[proc + 1]; i++) sendbuf[k++] = atbarray[i + j * lda]; 2179 } 2180 } 2181 PetscCall(MatDenseRestoreArrayRead(atb->atb, &atbarray)); 2182 2183 /* sum all atbarray to local values of C */ 2184 PetscCall(MatDenseGetArrayWrite(c->A, &carray)); 2185 PetscCallMPI(MPI_Reduce_scatter(sendbuf, carray, recvcounts, MPIU_SCALAR, MPIU_SUM, comm)); 2186 PetscCall(MatDenseRestoreArrayWrite(c->A, &carray)); 2187 PetscCall(MatSetOption(C, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 2188 PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY)); 2189 PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY)); 2190 PetscFunctionReturn(PETSC_SUCCESS); 2191 } 2192 2193 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C) 2194 { 2195 MPI_Comm comm; 2196 PetscMPIInt size; 2197 PetscInt cm = A->cmap->n, cM, cN = B->cmap->N; 2198 Mat_TransMatMultDense *atb; 2199 PetscBool cisdense = PETSC_FALSE; 2200 const PetscInt *ranges; 2201 2202 PetscFunctionBegin; 2203 MatCheckProduct(C, 4); 2204 PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty"); 2205 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 2206 if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) { 2207 SETERRQ(comm, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend); 2208 } 2209 2210 /* create matrix product C */ 2211 PetscCall(MatSetSizes(C, cm, B->cmap->n, A->cmap->N, B->cmap->N)); 2212 #if defined(PETSC_HAVE_CUDA) 2213 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSECUDA, "")); 2214 #endif 2215 #if defined(PETSC_HAVE_HIP) 2216 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSEHIP, "")); 2217 #endif 2218 if (!cisdense) PetscCall(MatSetType(C, ((PetscObject)A)->type_name)); 2219 PetscCall(MatSetUp(C)); 2220 2221 /* create data structure for reuse C */ 2222 PetscCallMPI(MPI_Comm_size(comm, &size)); 2223 PetscCall(PetscNew(&atb)); 2224 cM = C->rmap->N; 2225 PetscCall(PetscMalloc2(cM * cN, &atb->sendbuf, size, &atb->recvcounts)); 2226 PetscCall(MatGetOwnershipRanges(C, &ranges)); 2227 for (PetscMPIInt i = 0; i < size; i++) PetscCall(PetscMPIIntCast((ranges[i + 1] - ranges[i]) * cN, &atb->recvcounts[i])); 2228 C->product->data = atb; 2229 C->product->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense; 2230 PetscFunctionReturn(PETSC_SUCCESS); 2231 } 2232 2233 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C) 2234 { 2235 MPI_Comm comm; 2236 PetscMPIInt i, size; 2237 PetscInt maxRows, bufsiz; 2238 PetscMPIInt tag; 2239 PetscInt alg; 2240 Mat_MatTransMultDense *abt; 2241 Mat_Product *product = C->product; 2242 PetscBool flg; 2243 2244 PetscFunctionBegin; 2245 MatCheckProduct(C, 4); 2246 PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty"); 2247 /* check local size of A and B */ 2248 PetscCheck(A->cmap->n == B->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local column dimensions are incompatible, A (%" PetscInt_FMT ") != B (%" PetscInt_FMT ")", A->cmap->n, B->cmap->n); 2249 2250 PetscCall(PetscStrcmp(product->alg, "allgatherv", &flg)); 2251 alg = flg ? 0 : 1; 2252 2253 /* setup matrix product C */ 2254 PetscCall(MatSetSizes(C, A->rmap->n, B->rmap->n, A->rmap->N, B->rmap->N)); 2255 PetscCall(MatSetType(C, MATMPIDENSE)); 2256 PetscCall(MatSetUp(C)); 2257 PetscCall(PetscObjectGetNewTag((PetscObject)C, &tag)); 2258 2259 /* create data structure for reuse C */ 2260 PetscCall(PetscObjectGetComm((PetscObject)C, &comm)); 2261 PetscCallMPI(MPI_Comm_size(comm, &size)); 2262 PetscCall(PetscNew(&abt)); 2263 abt->tag = tag; 2264 abt->alg = alg; 2265 switch (alg) { 2266 case 1: /* alg: "cyclic" */ 2267 for (maxRows = 0, i = 0; i < size; i++) maxRows = PetscMax(maxRows, B->rmap->range[i + 1] - B->rmap->range[i]); 2268 bufsiz = A->cmap->N * maxRows; 2269 PetscCall(PetscMalloc2(bufsiz, &abt->buf[0], bufsiz, &abt->buf[1])); 2270 break; 2271 default: /* alg: "allgatherv" */ 2272 PetscCall(PetscMalloc2(B->rmap->n * B->cmap->N, &abt->buf[0], B->rmap->N * B->cmap->N, &abt->buf[1])); 2273 PetscCall(PetscMalloc2(size, &abt->recvcounts, size + 1, &abt->recvdispls)); 2274 for (i = 0; i <= size; i++) PetscCall(PetscMPIIntCast(B->rmap->range[i] * A->cmap->N, &abt->recvdispls[i])); 2275 for (i = 0; i < size; i++) PetscCall(PetscMPIIntCast(abt->recvdispls[i + 1] - abt->recvdispls[i], &abt->recvcounts[i])); 2276 break; 2277 } 2278 2279 C->product->data = abt; 2280 C->product->destroy = MatDestroy_MatMatTransMult_MPIDense_MPIDense; 2281 C->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_MPIDense_MPIDense; 2282 PetscFunctionReturn(PETSC_SUCCESS); 2283 } 2284 2285 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(Mat A, Mat B, Mat C) 2286 { 2287 Mat_MPIDense *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data; 2288 Mat_MatTransMultDense *abt; 2289 MPI_Comm comm; 2290 PetscMPIInt rank, size, sendto, recvfrom, recvisfrom; 2291 PetscScalar *sendbuf, *recvbuf = NULL, *cv; 2292 PetscInt i, cK = A->cmap->N, sendsiz, recvsiz, k, j, bn; 2293 PetscScalar _DOne = 1.0, _DZero = 0.0; 2294 const PetscScalar *av, *bv; 2295 PetscBLASInt cm, cn, ck, alda, blda = 0, clda; 2296 MPI_Request reqs[2]; 2297 const PetscInt *ranges; 2298 2299 PetscFunctionBegin; 2300 MatCheckProduct(C, 3); 2301 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 2302 abt = (Mat_MatTransMultDense *)C->product->data; 2303 PetscCall(PetscObjectGetComm((PetscObject)C, &comm)); 2304 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 2305 PetscCallMPI(MPI_Comm_size(comm, &size)); 2306 PetscCall(MatDenseGetArrayRead(a->A, &av)); 2307 PetscCall(MatDenseGetArrayRead(b->A, &bv)); 2308 PetscCall(MatDenseGetArrayWrite(c->A, &cv)); 2309 PetscCall(MatDenseGetLDA(a->A, &i)); 2310 PetscCall(PetscBLASIntCast(i, &alda)); 2311 PetscCall(MatDenseGetLDA(b->A, &i)); 2312 PetscCall(PetscBLASIntCast(i, &blda)); 2313 PetscCall(MatDenseGetLDA(c->A, &i)); 2314 PetscCall(PetscBLASIntCast(i, &clda)); 2315 PetscCall(MatGetOwnershipRanges(B, &ranges)); 2316 bn = B->rmap->n; 2317 if (blda == bn) { 2318 sendbuf = (PetscScalar *)bv; 2319 } else { 2320 sendbuf = abt->buf[0]; 2321 for (k = 0, i = 0; i < cK; i++) { 2322 for (j = 0; j < bn; j++, k++) sendbuf[k] = bv[i * blda + j]; 2323 } 2324 } 2325 if (size > 1) { 2326 sendto = (rank + size - 1) % size; 2327 recvfrom = (rank + size + 1) % size; 2328 } else { 2329 sendto = recvfrom = 0; 2330 } 2331 PetscCall(PetscBLASIntCast(cK, &ck)); 2332 PetscCall(PetscBLASIntCast(c->A->rmap->n, &cm)); 2333 recvisfrom = rank; 2334 for (i = 0; i < size; i++) { 2335 /* we have finished receiving in sending, bufs can be read/modified */ 2336 PetscMPIInt nextrecvisfrom = (recvisfrom + 1) % size; /* which process the next recvbuf will originate on */ 2337 PetscInt nextbn = ranges[nextrecvisfrom + 1] - ranges[nextrecvisfrom]; 2338 2339 if (nextrecvisfrom != rank) { 2340 /* start the cyclic sends from sendbuf, to recvbuf (which will switch to sendbuf) */ 2341 sendsiz = cK * bn; 2342 recvsiz = cK * nextbn; 2343 recvbuf = (i & 1) ? abt->buf[0] : abt->buf[1]; 2344 PetscCallMPI(MPIU_Isend(sendbuf, sendsiz, MPIU_SCALAR, sendto, abt->tag, comm, &reqs[0])); 2345 PetscCallMPI(MPIU_Irecv(recvbuf, recvsiz, MPIU_SCALAR, recvfrom, abt->tag, comm, &reqs[1])); 2346 } 2347 2348 /* local aseq * sendbuf^T */ 2349 PetscCall(PetscBLASIntCast(ranges[recvisfrom + 1] - ranges[recvisfrom], &cn)); 2350 if (cm && cn && ck) PetscCallBLAS("BLASgemm", BLASgemm_("N", "T", &cm, &cn, &ck, &_DOne, av, &alda, sendbuf, &cn, &_DZero, cv + clda * ranges[recvisfrom], &clda)); 2351 2352 if (nextrecvisfrom != rank) { 2353 /* wait for the sends and receives to complete, swap sendbuf and recvbuf */ 2354 PetscCallMPI(MPI_Waitall(2, reqs, MPI_STATUSES_IGNORE)); 2355 } 2356 bn = nextbn; 2357 recvisfrom = nextrecvisfrom; 2358 sendbuf = recvbuf; 2359 } 2360 PetscCall(MatDenseRestoreArrayRead(a->A, &av)); 2361 PetscCall(MatDenseRestoreArrayRead(b->A, &bv)); 2362 PetscCall(MatDenseRestoreArrayWrite(c->A, &cv)); 2363 PetscCall(MatSetOption(C, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 2364 PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY)); 2365 PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY)); 2366 PetscFunctionReturn(PETSC_SUCCESS); 2367 } 2368 2369 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(Mat A, Mat B, Mat C) 2370 { 2371 Mat_MPIDense *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data; 2372 Mat_MatTransMultDense *abt; 2373 MPI_Comm comm; 2374 PetscMPIInt size, ibn; 2375 PetscScalar *cv, *sendbuf, *recvbuf; 2376 const PetscScalar *av, *bv; 2377 PetscInt blda, i, cK = A->cmap->N, k, j, bn; 2378 PetscScalar _DOne = 1.0, _DZero = 0.0; 2379 PetscBLASInt cm, cn, ck, alda, clda; 2380 2381 PetscFunctionBegin; 2382 MatCheckProduct(C, 3); 2383 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 2384 abt = (Mat_MatTransMultDense *)C->product->data; 2385 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 2386 PetscCallMPI(MPI_Comm_size(comm, &size)); 2387 PetscCall(MatDenseGetArrayRead(a->A, &av)); 2388 PetscCall(MatDenseGetArrayRead(b->A, &bv)); 2389 PetscCall(MatDenseGetArrayWrite(c->A, &cv)); 2390 PetscCall(MatDenseGetLDA(a->A, &i)); 2391 PetscCall(PetscBLASIntCast(i, &alda)); 2392 PetscCall(MatDenseGetLDA(b->A, &blda)); 2393 PetscCall(MatDenseGetLDA(c->A, &i)); 2394 PetscCall(PetscBLASIntCast(i, &clda)); 2395 /* copy transpose of B into buf[0] */ 2396 bn = B->rmap->n; 2397 sendbuf = abt->buf[0]; 2398 recvbuf = abt->buf[1]; 2399 for (k = 0, j = 0; j < bn; j++) { 2400 for (i = 0; i < cK; i++, k++) sendbuf[k] = bv[i * blda + j]; 2401 } 2402 PetscCall(MatDenseRestoreArrayRead(b->A, &bv)); 2403 PetscCall(PetscMPIIntCast(bn * cK, &ibn)); 2404 PetscCallMPI(MPI_Allgatherv(sendbuf, ibn, MPIU_SCALAR, recvbuf, abt->recvcounts, abt->recvdispls, MPIU_SCALAR, comm)); 2405 PetscCall(PetscBLASIntCast(cK, &ck)); 2406 PetscCall(PetscBLASIntCast(c->A->rmap->n, &cm)); 2407 PetscCall(PetscBLASIntCast(c->A->cmap->n, &cn)); 2408 if (cm && cn && ck) PetscCallBLAS("BLASgemm", BLASgemm_("N", "N", &cm, &cn, &ck, &_DOne, av, &alda, recvbuf, &ck, &_DZero, cv, &clda)); 2409 PetscCall(MatDenseRestoreArrayRead(a->A, &av)); 2410 PetscCall(MatDenseRestoreArrayRead(b->A, &bv)); 2411 PetscCall(MatDenseRestoreArrayWrite(c->A, &cv)); 2412 PetscCall(MatSetOption(C, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 2413 PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY)); 2414 PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY)); 2415 PetscFunctionReturn(PETSC_SUCCESS); 2416 } 2417 2418 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C) 2419 { 2420 Mat_MatTransMultDense *abt; 2421 2422 PetscFunctionBegin; 2423 MatCheckProduct(C, 3); 2424 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 2425 abt = (Mat_MatTransMultDense *)C->product->data; 2426 switch (abt->alg) { 2427 case 1: 2428 PetscCall(MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(A, B, C)); 2429 break; 2430 default: 2431 PetscCall(MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(A, B, C)); 2432 break; 2433 } 2434 PetscFunctionReturn(PETSC_SUCCESS); 2435 } 2436 2437 static PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(void *data) 2438 { 2439 Mat_MatMultDense *ab = (Mat_MatMultDense *)data; 2440 2441 PetscFunctionBegin; 2442 PetscCall(MatDestroy(&ab->Ce)); 2443 PetscCall(MatDestroy(&ab->Ae)); 2444 PetscCall(MatDestroy(&ab->Be)); 2445 PetscCall(PetscFree(ab)); 2446 PetscFunctionReturn(PETSC_SUCCESS); 2447 } 2448 2449 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C) 2450 { 2451 Mat_MatMultDense *ab; 2452 Mat_MPIDense *mdn = (Mat_MPIDense *)A->data; 2453 Mat_MPIDense *b = (Mat_MPIDense *)B->data; 2454 2455 PetscFunctionBegin; 2456 MatCheckProduct(C, 3); 2457 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Missing product data"); 2458 ab = (Mat_MatMultDense *)C->product->data; 2459 if (ab->Ae && ab->Ce) { 2460 #if PetscDefined(HAVE_ELEMENTAL) 2461 PetscCall(MatConvert_MPIDense_Elemental(A, MATELEMENTAL, MAT_REUSE_MATRIX, &ab->Ae)); 2462 PetscCall(MatConvert_MPIDense_Elemental(B, MATELEMENTAL, MAT_REUSE_MATRIX, &ab->Be)); 2463 PetscCall(MatMatMultNumeric_Elemental(ab->Ae, ab->Be, ab->Ce)); 2464 PetscCall(MatConvert(ab->Ce, MATMPIDENSE, MAT_REUSE_MATRIX, &C)); 2465 #else 2466 SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "PETSC_HAVE_ELEMENTAL not defined"); 2467 #endif 2468 } else { 2469 MPI_Comm comm; 2470 const PetscScalar *read; 2471 PetscScalar *write; 2472 PetscInt lda; 2473 const PetscInt *ranges; 2474 PetscMPIInt size; 2475 2476 if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A)); /* cannot be done during the symbolic phase because of possible calls to MatProductReplaceMats() */ 2477 comm = PetscObjectComm((PetscObject)B); 2478 PetscCallMPI(MPI_Comm_size(comm, &size)); 2479 PetscCall(PetscLayoutGetRanges(B->rmap, &ranges)); 2480 if (ranges[1] == ranges[size]) { 2481 // optimize for the case where the B matrix is broadcast from rank 0 2482 PetscInt b_lda, be_lda; 2483 Mat b_local = b->A; 2484 Mat b_alloc = NULL; 2485 Mat be_local = ab->Be; 2486 Mat be_alloc = NULL; 2487 PetscMemType b_memtype, be_memtype; 2488 const PetscScalar *b_array = NULL; 2489 MPI_Datatype vector_type; 2490 PetscScalar *be_array = NULL; 2491 PetscMPIInt rank; 2492 2493 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 2494 PetscCall(MatDenseGetLDA(be_local, &be_lda)); 2495 if (be_lda != B->rmap->N) { 2496 PetscCall(MatDuplicate(be_local, MAT_DO_NOT_COPY_VALUES, &be_alloc)); 2497 be_local = be_alloc; 2498 } 2499 2500 if (rank == 0) { 2501 PetscCall(MatDenseGetLDA(b_local, &b_lda)); 2502 if (b_lda != B->rmap->N) { 2503 PetscCall(MatDuplicate(b_local, MAT_DO_NOT_COPY_VALUES, &b_alloc)); 2504 PetscCall(MatCopy(b_local, b_alloc, DIFFERENT_NONZERO_PATTERN)); 2505 b_local = b_alloc; 2506 } 2507 } 2508 PetscCallMPI(MPI_Type_contiguous(B->cmap->N, MPIU_SCALAR, &vector_type)); 2509 PetscCallMPI(MPI_Type_commit(&vector_type)); 2510 PetscCall(MatDenseGetArrayReadAndMemType(b_local, &b_array, &b_memtype)); 2511 PetscCall(MatDenseGetArrayWriteAndMemType(be_local, &be_array, &be_memtype)); 2512 PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, vector_type, b_memtype, b_array, be_memtype, be_array, MPI_REPLACE)); 2513 PetscCall(PetscSFBcastEnd(mdn->Mvctx, vector_type, b_array, be_array, MPI_REPLACE)); 2514 PetscCall(MatDenseRestoreArrayWriteAndMemType(be_local, &be_array)); 2515 PetscCall(MatDenseRestoreArrayReadAndMemType(b_local, &b_array)); 2516 if (be_local != ab->Be) PetscCall(MatCopy(be_local, ab->Be, DIFFERENT_NONZERO_PATTERN)); 2517 PetscCallMPI(MPI_Type_free(&vector_type)); 2518 PetscCall(MatDestroy(&be_alloc)); 2519 PetscCall(MatDestroy(&b_alloc)); 2520 } else { 2521 PetscCall(MatDenseGetLDA(B, &lda)); 2522 PetscCall(MatDenseGetArrayRead(B, &read)); 2523 PetscCall(MatDenseGetArrayWrite(ab->Be, &write)); 2524 for (PetscInt i = 0; i < C->cmap->N; ++i) { 2525 PetscCall(PetscSFBcastBegin(mdn->Mvctx, MPIU_SCALAR, read + i * lda, write + i * ab->Be->rmap->n, MPI_REPLACE)); 2526 PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, read + i * lda, write + i * ab->Be->rmap->n, MPI_REPLACE)); 2527 } 2528 PetscCall(MatDenseRestoreArrayWrite(ab->Be, &write)); 2529 PetscCall(MatDenseRestoreArrayRead(B, &read)); 2530 } 2531 PetscCall(MatMatMultNumeric_SeqDense_SeqDense(((Mat_MPIDense *)A->data)->A, ab->Be, ((Mat_MPIDense *)C->data)->A)); 2532 } 2533 PetscFunctionReturn(PETSC_SUCCESS); 2534 } 2535 2536 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C) 2537 { 2538 Mat_Product *product = C->product; 2539 PetscInt alg; 2540 Mat_MatMultDense *ab; 2541 PetscBool flg; 2542 2543 PetscFunctionBegin; 2544 MatCheckProduct(C, 4); 2545 PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty"); 2546 /* check local size of A and B */ 2547 PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ", %" PetscInt_FMT ")", 2548 A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend); 2549 2550 PetscCall(PetscStrcmp(product->alg, "petsc", &flg)); 2551 alg = flg ? 0 : 1; 2552 2553 /* setup C */ 2554 PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, A->rmap->N, B->cmap->N)); 2555 PetscCall(MatSetType(C, MATMPIDENSE)); 2556 PetscCall(MatSetUp(C)); 2557 2558 /* create data structure for reuse Cdense */ 2559 PetscCall(PetscNew(&ab)); 2560 2561 switch (alg) { 2562 case 1: /* alg: "elemental" */ 2563 #if PetscDefined(HAVE_ELEMENTAL) 2564 /* create elemental matrices Ae and Be */ 2565 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &ab->Ae)); 2566 PetscCall(MatSetSizes(ab->Ae, PETSC_DECIDE, PETSC_DECIDE, A->rmap->N, A->cmap->N)); 2567 PetscCall(MatSetType(ab->Ae, MATELEMENTAL)); 2568 PetscCall(MatSetUp(ab->Ae)); 2569 PetscCall(MatSetOption(ab->Ae, MAT_ROW_ORIENTED, PETSC_FALSE)); 2570 2571 PetscCall(MatCreate(PetscObjectComm((PetscObject)B), &ab->Be)); 2572 PetscCall(MatSetSizes(ab->Be, PETSC_DECIDE, PETSC_DECIDE, B->rmap->N, B->cmap->N)); 2573 PetscCall(MatSetType(ab->Be, MATELEMENTAL)); 2574 PetscCall(MatSetUp(ab->Be)); 2575 PetscCall(MatSetOption(ab->Be, MAT_ROW_ORIENTED, PETSC_FALSE)); 2576 2577 /* compute symbolic Ce = Ae*Be */ 2578 PetscCall(MatCreate(PetscObjectComm((PetscObject)C), &ab->Ce)); 2579 PetscCall(MatMatMultSymbolic_Elemental(ab->Ae, ab->Be, fill, ab->Ce)); 2580 #else 2581 SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "PETSC_HAVE_ELEMENTAL not defined"); 2582 #endif 2583 break; 2584 default: /* alg: "petsc" */ 2585 ab->Ae = NULL; 2586 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, A->cmap->N, B->cmap->N, NULL, &ab->Be)); 2587 ab->Ce = NULL; 2588 break; 2589 } 2590 2591 C->product->data = ab; 2592 C->product->destroy = MatDestroy_MatMatMult_MPIDense_MPIDense; 2593 C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense; 2594 PetscFunctionReturn(PETSC_SUCCESS); 2595 } 2596 2597 static PetscErrorCode MatProductSetFromOptions_MPIDense_AB(Mat C) 2598 { 2599 Mat_Product *product = C->product; 2600 const char *algTypes[2] = {"petsc", "elemental"}; 2601 PetscInt alg, nalg = PetscDefined(HAVE_ELEMENTAL) ? 2 : 1; 2602 PetscBool flg = PETSC_FALSE; 2603 2604 PetscFunctionBegin; 2605 /* Set default algorithm */ 2606 alg = 0; /* default is PETSc */ 2607 PetscCall(PetscStrcmp(product->alg, "default", &flg)); 2608 if (flg) PetscCall(MatProductSetAlgorithm(C, algTypes[alg])); 2609 2610 /* Get runtime option */ 2611 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_AB", "Mat"); 2612 PetscCall(PetscOptionsEList("-mat_product_algorithm", "Algorithmic approach", "MatProduct_AB", algTypes, nalg, algTypes[alg], &alg, &flg)); 2613 PetscOptionsEnd(); 2614 if (flg) PetscCall(MatProductSetAlgorithm(C, algTypes[alg])); 2615 2616 C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIDense; 2617 C->ops->productsymbolic = MatProductSymbolic_AB; 2618 PetscFunctionReturn(PETSC_SUCCESS); 2619 } 2620 2621 static PetscErrorCode MatProductSetFromOptions_MPIDense_AtB(Mat C) 2622 { 2623 Mat_Product *product = C->product; 2624 Mat A = product->A, B = product->B; 2625 2626 PetscFunctionBegin; 2627 PetscCheck(A->rmap->rstart == B->rmap->rstart && A->rmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", 2628 A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend); 2629 C->ops->transposematmultsymbolic = MatTransposeMatMultSymbolic_MPIDense_MPIDense; 2630 C->ops->productsymbolic = MatProductSymbolic_AtB; 2631 PetscFunctionReturn(PETSC_SUCCESS); 2632 } 2633 2634 static PetscErrorCode MatProductSetFromOptions_MPIDense_ABt(Mat C) 2635 { 2636 Mat_Product *product = C->product; 2637 const char *algTypes[2] = {"allgatherv", "cyclic"}; 2638 PetscInt alg, nalg = 2; 2639 PetscBool flg = PETSC_FALSE; 2640 2641 PetscFunctionBegin; 2642 /* Set default algorithm */ 2643 alg = 0; /* default is allgatherv */ 2644 PetscCall(PetscStrcmp(product->alg, "default", &flg)); 2645 if (flg) PetscCall(MatProductSetAlgorithm(C, algTypes[alg])); 2646 2647 /* Get runtime option */ 2648 if (product->api_user) { 2649 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatMatTransposeMult", "Mat"); 2650 PetscCall(PetscOptionsEList("-matmattransmult_mpidense_mpidense_via", "Algorithmic approach", "MatMatTransposeMult", algTypes, nalg, algTypes[alg], &alg, &flg)); 2651 PetscOptionsEnd(); 2652 } else { 2653 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_ABt", "Mat"); 2654 PetscCall(PetscOptionsEList("-mat_product_algorithm", "Algorithmic approach", "MatProduct_ABt", algTypes, nalg, algTypes[alg], &alg, &flg)); 2655 PetscOptionsEnd(); 2656 } 2657 if (flg) PetscCall(MatProductSetAlgorithm(C, algTypes[alg])); 2658 2659 C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_MPIDense_MPIDense; 2660 C->ops->productsymbolic = MatProductSymbolic_ABt; 2661 PetscFunctionReturn(PETSC_SUCCESS); 2662 } 2663 2664 static PetscErrorCode MatProductSetFromOptions_MPIDense(Mat C) 2665 { 2666 Mat_Product *product = C->product; 2667 2668 PetscFunctionBegin; 2669 switch (product->type) { 2670 case MATPRODUCT_AB: 2671 PetscCall(MatProductSetFromOptions_MPIDense_AB(C)); 2672 break; 2673 case MATPRODUCT_AtB: 2674 PetscCall(MatProductSetFromOptions_MPIDense_AtB(C)); 2675 break; 2676 case MATPRODUCT_ABt: 2677 PetscCall(MatProductSetFromOptions_MPIDense_ABt(C)); 2678 break; 2679 default: 2680 break; 2681 } 2682 PetscFunctionReturn(PETSC_SUCCESS); 2683 } 2684