1 /* 2 Basic functions for basic parallel dense matrices. 3 Portions of this code are under: 4 Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. 5 */ 6 7 #include <../src/mat/impls/dense/mpi/mpidense.h> /*I "petscmat.h" I*/ 8 #include <../src/mat/impls/aij/mpi/mpiaij.h> 9 #include <petscblaslapack.h> 10 #include <petsc/private/vecimpl.h> 11 12 /*@ 13 MatDenseGetLocalMatrix - For a `MATMPIDENSE` or `MATSEQDENSE` matrix returns the sequential 14 matrix that represents the operator. For sequential matrices it returns itself. 15 16 Input Parameter: 17 . A - the sequential or MPI `MATDENSE` matrix 18 19 Output Parameter: 20 . B - the inner matrix 21 22 Level: intermediate 23 24 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MATMPIDENSE`, `MATSEQDENSE` 25 @*/ 26 PetscErrorCode MatDenseGetLocalMatrix(Mat A, Mat *B) 27 { 28 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 29 PetscBool flg; 30 31 PetscFunctionBegin; 32 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 33 PetscAssertPointer(B, 2); 34 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIDENSE, &flg)); 35 if (flg) *B = mat->A; 36 else { 37 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQDENSE, &flg)); 38 PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Not for matrix type %s", ((PetscObject)A)->type_name); 39 *B = A; 40 } 41 PetscFunctionReturn(PETSC_SUCCESS); 42 } 43 44 static PetscErrorCode MatCopy_MPIDense(Mat A, Mat B, MatStructure s) 45 { 46 Mat_MPIDense *Amat = (Mat_MPIDense *)A->data; 47 Mat_MPIDense *Bmat = (Mat_MPIDense *)B->data; 48 49 PetscFunctionBegin; 50 PetscCall(MatCopy(Amat->A, Bmat->A, s)); 51 PetscFunctionReturn(PETSC_SUCCESS); 52 } 53 54 PetscErrorCode MatShift_MPIDense(Mat A, PetscScalar alpha) 55 { 56 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 57 PetscInt j, lda, rstart = A->rmap->rstart, rend = A->rmap->rend, rend2; 58 PetscScalar *v; 59 60 PetscFunctionBegin; 61 PetscCall(MatDenseGetArray(mat->A, &v)); 62 PetscCall(MatDenseGetLDA(mat->A, &lda)); 63 rend2 = PetscMin(rend, A->cmap->N); 64 if (rend2 > rstart) { 65 for (j = rstart; j < rend2; j++) v[j - rstart + j * lda] += alpha; 66 PetscCall(PetscLogFlops(rend2 - rstart)); 67 } 68 PetscCall(MatDenseRestoreArray(mat->A, &v)); 69 PetscFunctionReturn(PETSC_SUCCESS); 70 } 71 72 static PetscErrorCode MatGetRow_MPIDense(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 73 { 74 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 75 PetscInt lrow, rstart = A->rmap->rstart, rend = A->rmap->rend; 76 77 PetscFunctionBegin; 78 PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "only local rows"); 79 lrow = row - rstart; 80 PetscCall(MatGetRow(mat->A, lrow, nz, (const PetscInt **)idx, (const PetscScalar **)v)); 81 PetscFunctionReturn(PETSC_SUCCESS); 82 } 83 84 static PetscErrorCode MatRestoreRow_MPIDense(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 85 { 86 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 87 PetscInt lrow, rstart = A->rmap->rstart, rend = A->rmap->rend; 88 89 PetscFunctionBegin; 90 PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "only local rows"); 91 lrow = row - rstart; 92 PetscCall(MatRestoreRow(mat->A, lrow, nz, (const PetscInt **)idx, (const PetscScalar **)v)); 93 PetscFunctionReturn(PETSC_SUCCESS); 94 } 95 96 static PetscErrorCode MatGetDiagonalBlock_MPIDense(Mat A, Mat *a) 97 { 98 Mat_MPIDense *mdn = (Mat_MPIDense *)A->data; 99 PetscInt m = A->rmap->n, rstart = A->rmap->rstart; 100 PetscScalar *array; 101 MPI_Comm comm; 102 PetscBool flg; 103 Mat B; 104 105 PetscFunctionBegin; 106 PetscCall(MatHasCongruentLayouts(A, &flg)); 107 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only square matrices supported."); 108 PetscCall(PetscObjectQuery((PetscObject)A, "DiagonalBlock", (PetscObject *)&B)); 109 if (!B) { /* This should use MatDenseGetSubMatrix (not create), but we would need a call like MatRestoreDiagonalBlock */ 110 #if PetscDefined(HAVE_CUDA) 111 PetscCall(PetscObjectTypeCompare((PetscObject)mdn->A, MATSEQDENSECUDA, &flg)); 112 PetscCheck(!flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature", MATSEQDENSECUDA); 113 #elif PetscDefined(HAVE_HIP) 114 PetscCall(PetscObjectTypeCompare((PetscObject)mdn->A, MATSEQDENSEHIP, &flg)); 115 PetscCheck(!flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature", MATSEQDENSEHIP); 116 #endif 117 PetscCall(PetscObjectGetComm((PetscObject)mdn->A, &comm)); 118 PetscCall(MatCreate(comm, &B)); 119 PetscCall(MatSetSizes(B, m, m, m, m)); 120 PetscCall(MatSetType(B, ((PetscObject)mdn->A)->type_name)); 121 PetscCall(MatDenseGetArrayRead(mdn->A, (const PetscScalar **)&array)); 122 PetscCall(MatSeqDenseSetPreallocation(B, array + m * rstart)); 123 PetscCall(MatDenseRestoreArrayRead(mdn->A, (const PetscScalar **)&array)); 124 PetscCall(PetscObjectCompose((PetscObject)A, "DiagonalBlock", (PetscObject)B)); 125 *a = B; 126 PetscCall(MatDestroy(&B)); 127 } else *a = B; 128 PetscFunctionReturn(PETSC_SUCCESS); 129 } 130 131 static PetscErrorCode MatSetValues_MPIDense(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], const PetscScalar v[], InsertMode addv) 132 { 133 Mat_MPIDense *A = (Mat_MPIDense *)mat->data; 134 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend, row; 135 PetscBool roworiented = A->roworiented; 136 137 PetscFunctionBegin; 138 for (i = 0; i < m; i++) { 139 if (idxm[i] < 0) continue; 140 PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large"); 141 if (idxm[i] >= rstart && idxm[i] < rend) { 142 row = idxm[i] - rstart; 143 if (roworiented) { 144 PetscCall(MatSetValues(A->A, 1, &row, n, idxn, PetscSafePointerPlusOffset(v, i * n), addv)); 145 } else { 146 for (j = 0; j < n; j++) { 147 if (idxn[j] < 0) continue; 148 PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large"); 149 PetscCall(MatSetValues(A->A, 1, &row, 1, &idxn[j], PetscSafePointerPlusOffset(v, i + j * m), addv)); 150 } 151 } 152 } else if (!A->donotstash) { 153 mat->assembled = PETSC_FALSE; 154 if (roworiented) { 155 PetscCall(MatStashValuesRow_Private(&mat->stash, idxm[i], n, idxn, PetscSafePointerPlusOffset(v, i * n), PETSC_FALSE)); 156 } else { 157 PetscCall(MatStashValuesCol_Private(&mat->stash, idxm[i], n, idxn, PetscSafePointerPlusOffset(v, i), m, PETSC_FALSE)); 158 } 159 } 160 } 161 PetscFunctionReturn(PETSC_SUCCESS); 162 } 163 164 static PetscErrorCode MatGetValues_MPIDense(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[]) 165 { 166 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 167 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend, row; 168 169 PetscFunctionBegin; 170 for (i = 0; i < m; i++) { 171 if (idxm[i] < 0) continue; /* negative row */ 172 PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large"); 173 if (idxm[i] >= rstart && idxm[i] < rend) { 174 row = idxm[i] - rstart; 175 for (j = 0; j < n; j++) { 176 if (idxn[j] < 0) continue; /* negative column */ 177 PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large"); 178 PetscCall(MatGetValues(mdn->A, 1, &row, 1, &idxn[j], v + i * n + j)); 179 } 180 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported"); 181 } 182 PetscFunctionReturn(PETSC_SUCCESS); 183 } 184 185 static PetscErrorCode MatDenseGetLDA_MPIDense(Mat A, PetscInt *lda) 186 { 187 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 188 189 PetscFunctionBegin; 190 PetscCall(MatDenseGetLDA(a->A, lda)); 191 PetscFunctionReturn(PETSC_SUCCESS); 192 } 193 194 static PetscErrorCode MatDenseSetLDA_MPIDense(Mat A, PetscInt lda) 195 { 196 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 197 MatType mtype = MATSEQDENSE; 198 199 PetscFunctionBegin; 200 if (!a->A) { 201 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 202 PetscCall(PetscLayoutSetUp(A->rmap)); 203 PetscCall(PetscLayoutSetUp(A->cmap)); 204 PetscCall(MatCreate(PETSC_COMM_SELF, &a->A)); 205 PetscCall(MatSetSizes(a->A, A->rmap->n, A->cmap->N, A->rmap->n, A->cmap->N)); 206 #if PetscDefined(HAVE_CUDA) 207 PetscBool iscuda; 208 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIDENSECUDA, &iscuda)); 209 if (iscuda) mtype = MATSEQDENSECUDA; 210 #elif PetscDefined(HAVE_HIP) 211 PetscBool iship; 212 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIDENSEHIP, &iship)); 213 if (iship) mtype = MATSEQDENSEHIP; 214 #endif 215 PetscCall(MatSetType(a->A, mtype)); 216 } 217 PetscCall(MatDenseSetLDA(a->A, lda)); 218 PetscFunctionReturn(PETSC_SUCCESS); 219 } 220 221 static PetscErrorCode MatDenseGetArray_MPIDense(Mat A, PetscScalar **array) 222 { 223 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 224 225 PetscFunctionBegin; 226 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 227 PetscCall(MatDenseGetArray(a->A, array)); 228 PetscFunctionReturn(PETSC_SUCCESS); 229 } 230 231 static PetscErrorCode MatDenseGetArrayRead_MPIDense(Mat A, PetscScalar **array) 232 { 233 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 234 235 PetscFunctionBegin; 236 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 237 PetscCall(MatDenseGetArrayRead(a->A, (const PetscScalar **)array)); 238 PetscFunctionReturn(PETSC_SUCCESS); 239 } 240 241 static PetscErrorCode MatDenseGetArrayWrite_MPIDense(Mat A, PetscScalar **array) 242 { 243 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 244 245 PetscFunctionBegin; 246 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 247 PetscCall(MatDenseGetArrayWrite(a->A, array)); 248 PetscFunctionReturn(PETSC_SUCCESS); 249 } 250 251 static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A, const PetscScalar *array) 252 { 253 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 254 255 PetscFunctionBegin; 256 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 257 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 258 PetscCall(MatDensePlaceArray(a->A, array)); 259 PetscFunctionReturn(PETSC_SUCCESS); 260 } 261 262 static PetscErrorCode MatDenseResetArray_MPIDense(Mat A) 263 { 264 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 265 266 PetscFunctionBegin; 267 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 268 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 269 PetscCall(MatDenseResetArray(a->A)); 270 PetscFunctionReturn(PETSC_SUCCESS); 271 } 272 273 static PetscErrorCode MatDenseReplaceArray_MPIDense(Mat A, const PetscScalar *array) 274 { 275 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 276 277 PetscFunctionBegin; 278 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 279 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 280 PetscCall(MatDenseReplaceArray(a->A, array)); 281 PetscFunctionReturn(PETSC_SUCCESS); 282 } 283 284 static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A, IS isrow, IS iscol, MatReuse scall, Mat *B) 285 { 286 Mat_MPIDense *mat = (Mat_MPIDense *)A->data, *newmatd; 287 PetscInt lda, i, j, rstart, rend, nrows, ncols, Ncols, nlrows, nlcols; 288 const PetscInt *irow, *icol; 289 const PetscScalar *v; 290 PetscScalar *bv; 291 Mat newmat; 292 IS iscol_local; 293 MPI_Comm comm_is, comm_mat; 294 295 PetscFunctionBegin; 296 PetscCall(PetscObjectGetComm((PetscObject)A, &comm_mat)); 297 PetscCall(PetscObjectGetComm((PetscObject)iscol, &comm_is)); 298 PetscCheck(comm_mat == comm_is, PETSC_COMM_SELF, PETSC_ERR_ARG_NOTSAMECOMM, "IS communicator must match matrix communicator"); 299 300 PetscCall(ISAllGather(iscol, &iscol_local)); 301 PetscCall(ISGetIndices(isrow, &irow)); 302 PetscCall(ISGetIndices(iscol_local, &icol)); 303 PetscCall(ISGetLocalSize(isrow, &nrows)); 304 PetscCall(ISGetLocalSize(iscol, &ncols)); 305 PetscCall(ISGetSize(iscol, &Ncols)); /* global number of columns, size of iscol_local */ 306 307 /* No parallel redistribution currently supported! Should really check each index set 308 to confirm that it is OK. ... Currently supports only submatrix same partitioning as 309 original matrix! */ 310 311 PetscCall(MatGetLocalSize(A, &nlrows, &nlcols)); 312 PetscCall(MatGetOwnershipRange(A, &rstart, &rend)); 313 314 /* Check submatrix call */ 315 if (scall == MAT_REUSE_MATRIX) { 316 /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */ 317 /* Really need to test rows and column sizes! */ 318 newmat = *B; 319 } else { 320 /* Create and fill new matrix */ 321 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &newmat)); 322 PetscCall(MatSetSizes(newmat, nrows, ncols, PETSC_DECIDE, Ncols)); 323 PetscCall(MatSetType(newmat, ((PetscObject)A)->type_name)); 324 PetscCall(MatMPIDenseSetPreallocation(newmat, NULL)); 325 } 326 327 /* Now extract the data pointers and do the copy, column at a time */ 328 newmatd = (Mat_MPIDense *)newmat->data; 329 PetscCall(MatDenseGetArray(newmatd->A, &bv)); 330 PetscCall(MatDenseGetArrayRead(mat->A, &v)); 331 PetscCall(MatDenseGetLDA(mat->A, &lda)); 332 for (i = 0; i < Ncols; i++) { 333 const PetscScalar *av = v + lda * icol[i]; 334 for (j = 0; j < nrows; j++) *bv++ = av[irow[j] - rstart]; 335 } 336 PetscCall(MatDenseRestoreArrayRead(mat->A, &v)); 337 PetscCall(MatDenseRestoreArray(newmatd->A, &bv)); 338 339 /* Assemble the matrices so that the correct flags are set */ 340 PetscCall(MatAssemblyBegin(newmat, MAT_FINAL_ASSEMBLY)); 341 PetscCall(MatAssemblyEnd(newmat, MAT_FINAL_ASSEMBLY)); 342 343 /* Free work space */ 344 PetscCall(ISRestoreIndices(isrow, &irow)); 345 PetscCall(ISRestoreIndices(iscol_local, &icol)); 346 PetscCall(ISDestroy(&iscol_local)); 347 *B = newmat; 348 PetscFunctionReturn(PETSC_SUCCESS); 349 } 350 351 static PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A, PetscScalar **array) 352 { 353 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 354 355 PetscFunctionBegin; 356 PetscCall(MatDenseRestoreArray(a->A, array)); 357 PetscFunctionReturn(PETSC_SUCCESS); 358 } 359 360 static PetscErrorCode MatDenseRestoreArrayRead_MPIDense(Mat A, PetscScalar **array) 361 { 362 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 363 364 PetscFunctionBegin; 365 PetscCall(MatDenseRestoreArrayRead(a->A, (const PetscScalar **)array)); 366 PetscFunctionReturn(PETSC_SUCCESS); 367 } 368 369 static PetscErrorCode MatDenseRestoreArrayWrite_MPIDense(Mat A, PetscScalar **array) 370 { 371 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 372 373 PetscFunctionBegin; 374 PetscCall(MatDenseRestoreArrayWrite(a->A, array)); 375 PetscFunctionReturn(PETSC_SUCCESS); 376 } 377 378 static PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat, MatAssemblyType mode) 379 { 380 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 381 PetscInt nstash, reallocs; 382 383 PetscFunctionBegin; 384 if (mdn->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS); 385 386 PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range)); 387 PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs)); 388 PetscCall(PetscInfo(mdn->A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 389 PetscFunctionReturn(PETSC_SUCCESS); 390 } 391 392 static PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat, MatAssemblyType mode) 393 { 394 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 395 PetscInt i, *row, *col, flg, j, rstart, ncols; 396 PetscMPIInt n; 397 PetscScalar *val; 398 399 PetscFunctionBegin; 400 if (!mdn->donotstash && !mat->nooffprocentries) { 401 /* wait on receives */ 402 while (1) { 403 PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg)); 404 if (!flg) break; 405 406 for (i = 0; i < n;) { 407 /* Now identify the consecutive vals belonging to the same row */ 408 for (j = i, rstart = row[j]; j < n; j++) { 409 if (row[j] != rstart) break; 410 } 411 if (j < n) ncols = j - i; 412 else ncols = n - i; 413 /* Now assemble all these values with a single function call */ 414 PetscCall(MatSetValues_MPIDense(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode)); 415 i = j; 416 } 417 } 418 PetscCall(MatStashScatterEnd_Private(&mat->stash)); 419 } 420 421 PetscCall(MatAssemblyBegin(mdn->A, mode)); 422 PetscCall(MatAssemblyEnd(mdn->A, mode)); 423 PetscFunctionReturn(PETSC_SUCCESS); 424 } 425 426 static PetscErrorCode MatZeroEntries_MPIDense(Mat A) 427 { 428 Mat_MPIDense *l = (Mat_MPIDense *)A->data; 429 430 PetscFunctionBegin; 431 PetscCall(MatZeroEntries(l->A)); 432 PetscFunctionReturn(PETSC_SUCCESS); 433 } 434 435 static PetscErrorCode MatZeroRows_MPIDense(Mat A, PetscInt n, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 436 { 437 Mat_MPIDense *l = (Mat_MPIDense *)A->data; 438 PetscInt i, len, *lrows; 439 440 PetscFunctionBegin; 441 /* get locally owned rows */ 442 PetscCall(PetscLayoutMapLocal(A->rmap, n, rows, &len, &lrows, NULL)); 443 /* fix right-hand side if needed */ 444 if (x && b) { 445 const PetscScalar *xx; 446 PetscScalar *bb; 447 448 PetscCall(VecGetArrayRead(x, &xx)); 449 PetscCall(VecGetArrayWrite(b, &bb)); 450 for (i = 0; i < len; ++i) bb[lrows[i]] = diag * xx[lrows[i]]; 451 PetscCall(VecRestoreArrayRead(x, &xx)); 452 PetscCall(VecRestoreArrayWrite(b, &bb)); 453 } 454 PetscCall(MatZeroRows(l->A, len, lrows, 0.0, NULL, NULL)); 455 if (diag != 0.0) { 456 Vec d; 457 458 PetscCall(MatCreateVecs(A, NULL, &d)); 459 PetscCall(VecSet(d, diag)); 460 PetscCall(MatDiagonalSet(A, d, INSERT_VALUES)); 461 PetscCall(VecDestroy(&d)); 462 } 463 PetscCall(PetscFree(lrows)); 464 PetscFunctionReturn(PETSC_SUCCESS); 465 } 466 467 PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat, Vec, Vec); 468 PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat, Vec, Vec, Vec); 469 PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat, Vec, Vec); 470 PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat, Vec, Vec, Vec); 471 472 static PetscErrorCode MatMult_MPIDense(Mat mat, Vec xx, Vec yy) 473 { 474 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 475 const PetscScalar *ax; 476 PetscScalar *ay; 477 PetscMemType axmtype, aymtype; 478 479 PetscFunctionBegin; 480 if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat)); 481 PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype)); 482 PetscCall(VecGetArrayAndMemType(mdn->lvec, &ay, &aymtype)); 483 PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE)); 484 PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE)); 485 PetscCall(VecRestoreArrayAndMemType(mdn->lvec, &ay)); 486 PetscCall(VecRestoreArrayReadAndMemType(xx, &ax)); 487 PetscCall((*mdn->A->ops->mult)(mdn->A, mdn->lvec, yy)); 488 PetscFunctionReturn(PETSC_SUCCESS); 489 } 490 491 static PetscErrorCode MatMultAddColumnRange_MPIDense(Mat mat, Vec xx, Vec yy, Vec zz, PetscInt c_start, PetscInt c_end) 492 { 493 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 494 const PetscScalar *ax; 495 PetscScalar *ay; 496 PetscMemType axmtype, aymtype; 497 498 PetscFunctionBegin; 499 if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat)); 500 PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype)); 501 PetscCall(VecGetArrayAndMemType(mdn->lvec, &ay, &aymtype)); 502 PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE)); 503 PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE)); 504 PetscCall(VecRestoreArrayAndMemType(mdn->lvec, &ay)); 505 PetscCall(VecRestoreArrayReadAndMemType(xx, &ax)); 506 PetscUseMethod(mdn->A, "MatMultAddColumnRange_C", (Mat, Vec, Vec, Vec, PetscInt, PetscInt), (mdn->A, mdn->lvec, yy, zz, c_start, c_end)); 507 PetscFunctionReturn(PETSC_SUCCESS); 508 } 509 510 static PetscErrorCode MatMultAdd_MPIDense(Mat mat, Vec xx, Vec yy, Vec zz) 511 { 512 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 513 const PetscScalar *ax; 514 PetscScalar *ay; 515 PetscMemType axmtype, aymtype; 516 517 PetscFunctionBegin; 518 if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat)); 519 PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype)); 520 PetscCall(VecGetArrayAndMemType(mdn->lvec, &ay, &aymtype)); 521 PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE)); 522 PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE)); 523 PetscCall(VecRestoreArrayAndMemType(mdn->lvec, &ay)); 524 PetscCall(VecRestoreArrayReadAndMemType(xx, &ax)); 525 PetscCall((*mdn->A->ops->multadd)(mdn->A, mdn->lvec, yy, zz)); 526 PetscFunctionReturn(PETSC_SUCCESS); 527 } 528 529 static PetscErrorCode MatMultHermitianTransposeColumnRange_MPIDense(Mat A, Vec xx, Vec yy, PetscInt c_start, PetscInt c_end) 530 { 531 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 532 const PetscScalar *ax; 533 PetscScalar *ay; 534 PetscMemType axmtype, aymtype; 535 536 PetscFunctionBegin; 537 if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A)); 538 PetscCall(VecSet(yy, 0.0)); 539 PetscUseMethod(a->A, "MatMultHermitianTransposeColumnRange_C", (Mat, Vec, Vec, PetscInt, PetscInt), (a->A, xx, a->lvec, c_start, c_end)); 540 PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype)); 541 PetscCall(VecGetArrayAndMemType(yy, &ay, &aymtype)); 542 PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM)); 543 PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM)); 544 PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax)); 545 PetscCall(VecRestoreArrayAndMemType(yy, &ay)); 546 PetscFunctionReturn(PETSC_SUCCESS); 547 } 548 549 static PetscErrorCode MatMultTransposeKernel_MPIDense(Mat A, Vec xx, Vec yy, PetscBool herm) 550 { 551 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 552 const PetscScalar *ax; 553 PetscScalar *ay; 554 PetscMemType axmtype, aymtype; 555 556 PetscFunctionBegin; 557 if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A)); 558 PetscCall(VecSet(yy, 0.0)); 559 if (herm) PetscCall((*a->A->ops->multhermitiantranspose)(a->A, xx, a->lvec)); 560 else PetscCall((*a->A->ops->multtranspose)(a->A, xx, a->lvec)); 561 PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype)); 562 PetscCall(VecGetArrayAndMemType(yy, &ay, &aymtype)); 563 PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM)); 564 PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM)); 565 PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax)); 566 PetscCall(VecRestoreArrayAndMemType(yy, &ay)); 567 PetscFunctionReturn(PETSC_SUCCESS); 568 } 569 570 static PetscErrorCode MatMultHermitianTransposeAddColumnRange_MPIDense(Mat A, Vec xx, Vec yy, Vec zz, PetscInt c_start, PetscInt c_end) 571 { 572 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 573 const PetscScalar *ax; 574 PetscScalar *ay; 575 PetscMemType axmtype, aymtype; 576 577 PetscFunctionBegin; 578 if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A)); 579 PetscCall(VecCopy(yy, zz)); 580 PetscMPIInt rank; 581 PetscCallMPI(MPI_Comm_rank(MPI_COMM_WORLD, &rank)); 582 PetscUseMethod(a->A, "MatMultHermitianTransposeColumnRange_C", (Mat, Vec, Vec, PetscInt, PetscInt), (a->A, xx, a->lvec, c_start, c_end)); 583 PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype)); 584 PetscCall(VecGetArrayAndMemType(zz, &ay, &aymtype)); 585 PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM)); 586 PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM)); 587 PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax)); 588 PetscCall(VecRestoreArrayAndMemType(zz, &ay)); 589 PetscFunctionReturn(PETSC_SUCCESS); 590 } 591 592 static PetscErrorCode MatMultTransposeAddKernel_MPIDense(Mat A, Vec xx, Vec yy, Vec zz, PetscBool herm) 593 { 594 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 595 const PetscScalar *ax; 596 PetscScalar *ay; 597 PetscMemType axmtype, aymtype; 598 599 PetscFunctionBegin; 600 if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A)); 601 PetscCall(VecCopy(yy, zz)); 602 if (herm) PetscCall((*a->A->ops->multhermitiantranspose)(a->A, xx, a->lvec)); 603 else PetscCall((*a->A->ops->multtranspose)(a->A, xx, a->lvec)); 604 PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype)); 605 PetscCall(VecGetArrayAndMemType(zz, &ay, &aymtype)); 606 PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM)); 607 PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM)); 608 PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax)); 609 PetscCall(VecRestoreArrayAndMemType(zz, &ay)); 610 PetscFunctionReturn(PETSC_SUCCESS); 611 } 612 613 static PetscErrorCode MatMultTranspose_MPIDense(Mat A, Vec xx, Vec yy) 614 { 615 PetscFunctionBegin; 616 PetscCall(MatMultTransposeKernel_MPIDense(A, xx, yy, PETSC_FALSE)); 617 PetscFunctionReturn(PETSC_SUCCESS); 618 } 619 620 static PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A, Vec xx, Vec yy, Vec zz) 621 { 622 PetscFunctionBegin; 623 PetscCall(MatMultTransposeAddKernel_MPIDense(A, xx, yy, zz, PETSC_FALSE)); 624 PetscFunctionReturn(PETSC_SUCCESS); 625 } 626 627 static PetscErrorCode MatMultHermitianTranspose_MPIDense(Mat A, Vec xx, Vec yy) 628 { 629 PetscFunctionBegin; 630 PetscCall(MatMultTransposeKernel_MPIDense(A, xx, yy, PETSC_TRUE)); 631 PetscFunctionReturn(PETSC_SUCCESS); 632 } 633 634 static PetscErrorCode MatMultHermitianTransposeAdd_MPIDense(Mat A, Vec xx, Vec yy, Vec zz) 635 { 636 PetscFunctionBegin; 637 PetscCall(MatMultTransposeAddKernel_MPIDense(A, xx, yy, zz, PETSC_TRUE)); 638 PetscFunctionReturn(PETSC_SUCCESS); 639 } 640 641 PetscErrorCode MatGetDiagonal_MPIDense(Mat A, Vec v) 642 { 643 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 644 PetscInt lda, len, i, nl, ng, m = A->rmap->n, radd; 645 PetscScalar *x; 646 const PetscScalar *av; 647 648 PetscFunctionBegin; 649 PetscCall(VecGetArray(v, &x)); 650 PetscCall(VecGetSize(v, &ng)); 651 PetscCheck(ng == A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming mat and vec"); 652 PetscCall(VecGetLocalSize(v, &nl)); 653 len = PetscMin(a->A->rmap->n, a->A->cmap->n); 654 radd = A->rmap->rstart * m; 655 PetscCall(MatDenseGetArrayRead(a->A, &av)); 656 PetscCall(MatDenseGetLDA(a->A, &lda)); 657 for (i = 0; i < len; i++) x[i] = av[radd + i * lda + i]; 658 PetscCall(MatDenseRestoreArrayRead(a->A, &av)); 659 if (nl - i > 0) PetscCall(PetscArrayzero(x + i, nl - i)); 660 PetscCall(VecRestoreArray(v, &x)); 661 PetscFunctionReturn(PETSC_SUCCESS); 662 } 663 664 static PetscErrorCode MatDestroy_MPIDense(Mat mat) 665 { 666 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 667 668 PetscFunctionBegin; 669 PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N)); 670 PetscCall(MatStashDestroy_Private(&mat->stash)); 671 PetscCheck(!mdn->vecinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 672 PetscCheck(!mdn->matinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 673 PetscCall(MatDestroy(&mdn->A)); 674 PetscCall(VecDestroy(&mdn->lvec)); 675 PetscCall(PetscSFDestroy(&mdn->Mvctx)); 676 PetscCall(VecDestroy(&mdn->cvec)); 677 PetscCall(MatDestroy(&mdn->cmat)); 678 679 PetscCall(PetscFree(mat->data)); 680 PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL)); 681 682 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetLDA_C", NULL)); 683 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseSetLDA_C", NULL)); 684 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArray_C", NULL)); 685 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArray_C", NULL)); 686 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayRead_C", NULL)); 687 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayRead_C", NULL)); 688 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayWrite_C", NULL)); 689 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayWrite_C", NULL)); 690 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDensePlaceArray_C", NULL)); 691 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseResetArray_C", NULL)); 692 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseReplaceArray_C", NULL)); 693 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", NULL)); 694 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpiaij_C", NULL)); 695 #if defined(PETSC_HAVE_ELEMENTAL) 696 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_elemental_C", NULL)); 697 #endif 698 #if defined(PETSC_HAVE_SCALAPACK) 699 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_scalapack_C", NULL)); 700 #endif 701 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIDenseSetPreallocation_C", NULL)); 702 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidense_C", NULL)); 703 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaij_C", NULL)); 704 #if defined(PETSC_HAVE_CUDA) 705 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidense_C", NULL)); 706 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijcusparse_C", NULL)); 707 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensecuda_C", NULL)); 708 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidensecuda_mpidense_C", NULL)); 709 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidensecuda_C", NULL)); 710 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C", NULL)); 711 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensecuda_mpiaij_C", NULL)); 712 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C", NULL)); 713 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArray_C", NULL)); 714 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArrayRead_C", NULL)); 715 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArrayWrite_C", NULL)); 716 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArray_C", NULL)); 717 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArrayRead_C", NULL)); 718 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArrayWrite_C", NULL)); 719 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAPlaceArray_C", NULL)); 720 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAResetArray_C", NULL)); 721 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAReplaceArray_C", NULL)); 722 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDASetPreallocation_C", NULL)); 723 #endif 724 #if defined(PETSC_HAVE_HIP) 725 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidense_C", NULL)); 726 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijhipsparse_C", NULL)); 727 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensehip_C", NULL)); 728 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidensehip_mpidense_C", NULL)); 729 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidensehip_C", NULL)); 730 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidensehip_C", NULL)); 731 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensehip_mpiaij_C", NULL)); 732 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensehip_mpiaijhipsparse_C", NULL)); 733 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArray_C", NULL)); 734 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArrayRead_C", NULL)); 735 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArrayWrite_C", NULL)); 736 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArray_C", NULL)); 737 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArrayRead_C", NULL)); 738 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArrayWrite_C", NULL)); 739 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPPlaceArray_C", NULL)); 740 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPResetArray_C", NULL)); 741 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPReplaceArray_C", NULL)); 742 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPSetPreallocation_C", NULL)); 743 #endif 744 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumn_C", NULL)); 745 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumn_C", NULL)); 746 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", NULL)); 747 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", NULL)); 748 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", NULL)); 749 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", NULL)); 750 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", NULL)); 751 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", NULL)); 752 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetSubMatrix_C", NULL)); 753 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreSubMatrix_C", NULL)); 754 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultAddColumnRange_C", NULL)); 755 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultHermitianTransposeColumnRange_C", NULL)); 756 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultHermitianTransposeAddColumnRange_C", NULL)); 757 758 PetscCall(PetscObjectCompose((PetscObject)mat, "DiagonalBlock", NULL)); 759 PetscFunctionReturn(PETSC_SUCCESS); 760 } 761 762 #include <petscdraw.h> 763 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat, PetscViewer viewer) 764 { 765 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 766 PetscMPIInt rank; 767 PetscViewerType vtype; 768 PetscBool iascii, isdraw; 769 PetscViewer sviewer; 770 PetscViewerFormat format; 771 772 PetscFunctionBegin; 773 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 774 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 775 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 776 if (iascii) { 777 PetscCall(PetscViewerGetType(viewer, &vtype)); 778 PetscCall(PetscViewerGetFormat(viewer, &format)); 779 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 780 MatInfo info; 781 PetscCall(MatGetInfo(mat, MAT_LOCAL, &info)); 782 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 783 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %" PetscInt_FMT " \n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, 784 (PetscInt)info.memory)); 785 PetscCall(PetscViewerFlush(viewer)); 786 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 787 if (mdn->Mvctx) PetscCall(PetscSFView(mdn->Mvctx, viewer)); 788 PetscFunctionReturn(PETSC_SUCCESS); 789 } else if (format == PETSC_VIEWER_ASCII_INFO) { 790 PetscFunctionReturn(PETSC_SUCCESS); 791 } 792 } else if (isdraw) { 793 PetscDraw draw; 794 PetscBool isnull; 795 796 PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw)); 797 PetscCall(PetscDrawIsNull(draw, &isnull)); 798 if (isnull) PetscFunctionReturn(PETSC_SUCCESS); 799 } 800 801 { 802 /* assemble the entire matrix onto first processor. */ 803 Mat A; 804 PetscInt M = mat->rmap->N, N = mat->cmap->N, m, row, i, nz; 805 PetscInt *cols; 806 PetscScalar *vals; 807 808 PetscCall(MatCreate(PetscObjectComm((PetscObject)mat), &A)); 809 if (rank == 0) { 810 PetscCall(MatSetSizes(A, M, N, M, N)); 811 } else { 812 PetscCall(MatSetSizes(A, 0, 0, M, N)); 813 } 814 /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */ 815 PetscCall(MatSetType(A, MATMPIDENSE)); 816 PetscCall(MatMPIDenseSetPreallocation(A, NULL)); 817 818 /* Copy the matrix ... This isn't the most efficient means, 819 but it's quick for now */ 820 A->insertmode = INSERT_VALUES; 821 822 row = mat->rmap->rstart; 823 m = mdn->A->rmap->n; 824 for (i = 0; i < m; i++) { 825 PetscCall(MatGetRow_MPIDense(mat, row, &nz, &cols, &vals)); 826 PetscCall(MatSetValues_MPIDense(A, 1, &row, nz, cols, vals, INSERT_VALUES)); 827 PetscCall(MatRestoreRow_MPIDense(mat, row, &nz, &cols, &vals)); 828 row++; 829 } 830 831 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 832 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 833 PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 834 if (rank == 0) { 835 PetscCall(PetscObjectSetName((PetscObject)((Mat_MPIDense *)A->data)->A, ((PetscObject)mat)->name)); 836 PetscCall(MatView_SeqDense(((Mat_MPIDense *)A->data)->A, sviewer)); 837 } 838 PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 839 PetscCall(MatDestroy(&A)); 840 } 841 PetscFunctionReturn(PETSC_SUCCESS); 842 } 843 844 static PetscErrorCode MatView_MPIDense(Mat mat, PetscViewer viewer) 845 { 846 PetscBool iascii, isbinary, isdraw, issocket; 847 848 PetscFunctionBegin; 849 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 850 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 851 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket)); 852 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 853 854 if (iascii || issocket || isdraw) { 855 PetscCall(MatView_MPIDense_ASCIIorDraworSocket(mat, viewer)); 856 } else if (isbinary) PetscCall(MatView_Dense_Binary(mat, viewer)); 857 PetscFunctionReturn(PETSC_SUCCESS); 858 } 859 860 static PetscErrorCode MatGetInfo_MPIDense(Mat A, MatInfoType flag, MatInfo *info) 861 { 862 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 863 Mat mdn = mat->A; 864 PetscLogDouble isend[5], irecv[5]; 865 866 PetscFunctionBegin; 867 info->block_size = 1.0; 868 869 PetscCall(MatGetInfo(mdn, MAT_LOCAL, info)); 870 871 isend[0] = info->nz_used; 872 isend[1] = info->nz_allocated; 873 isend[2] = info->nz_unneeded; 874 isend[3] = info->memory; 875 isend[4] = info->mallocs; 876 if (flag == MAT_LOCAL) { 877 info->nz_used = isend[0]; 878 info->nz_allocated = isend[1]; 879 info->nz_unneeded = isend[2]; 880 info->memory = isend[3]; 881 info->mallocs = isend[4]; 882 } else if (flag == MAT_GLOBAL_MAX) { 883 PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)A))); 884 885 info->nz_used = irecv[0]; 886 info->nz_allocated = irecv[1]; 887 info->nz_unneeded = irecv[2]; 888 info->memory = irecv[3]; 889 info->mallocs = irecv[4]; 890 } else if (flag == MAT_GLOBAL_SUM) { 891 PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)A))); 892 893 info->nz_used = irecv[0]; 894 info->nz_allocated = irecv[1]; 895 info->nz_unneeded = irecv[2]; 896 info->memory = irecv[3]; 897 info->mallocs = irecv[4]; 898 } 899 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 900 info->fill_ratio_needed = 0; 901 info->factor_mallocs = 0; 902 PetscFunctionReturn(PETSC_SUCCESS); 903 } 904 905 static PetscErrorCode MatSetOption_MPIDense(Mat A, MatOption op, PetscBool flg) 906 { 907 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 908 909 PetscFunctionBegin; 910 switch (op) { 911 case MAT_NEW_NONZERO_LOCATIONS: 912 case MAT_NEW_NONZERO_LOCATION_ERR: 913 case MAT_NEW_NONZERO_ALLOCATION_ERR: 914 MatCheckPreallocated(A, 1); 915 PetscCall(MatSetOption(a->A, op, flg)); 916 break; 917 case MAT_ROW_ORIENTED: 918 MatCheckPreallocated(A, 1); 919 a->roworiented = flg; 920 PetscCall(MatSetOption(a->A, op, flg)); 921 break; 922 case MAT_FORCE_DIAGONAL_ENTRIES: 923 case MAT_KEEP_NONZERO_PATTERN: 924 case MAT_USE_HASH_TABLE: 925 case MAT_SORTED_FULL: 926 case MAT_IGNORE_LOWER_TRIANGULAR: 927 case MAT_IGNORE_ZERO_ENTRIES: 928 case MAT_SUBMAT_SINGLEIS: 929 PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op])); 930 break; 931 case MAT_IGNORE_OFF_PROC_ENTRIES: 932 a->donotstash = flg; 933 break; 934 case MAT_SYMMETRIC: 935 case MAT_STRUCTURALLY_SYMMETRIC: 936 case MAT_HERMITIAN: 937 case MAT_SYMMETRY_ETERNAL: 938 case MAT_STRUCTURAL_SYMMETRY_ETERNAL: 939 case MAT_SPD: 940 case MAT_SPD_ETERNAL: 941 /* if the diagonal matrix is square it inherits some of the properties above */ 942 if (a->A && A->rmap->n == A->cmap->n) PetscCall(MatSetOption(a->A, op, flg)); 943 break; 944 default: 945 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %s", MatOptions[op]); 946 } 947 PetscFunctionReturn(PETSC_SUCCESS); 948 } 949 950 static PetscErrorCode MatDiagonalScale_MPIDense(Mat A, Vec ll, Vec rr) 951 { 952 Mat_MPIDense *mdn = (Mat_MPIDense *)A->data; 953 const PetscScalar *l; 954 PetscScalar x, *v, *vv, *r; 955 PetscInt i, j, s2a, s3a, s2, s3, m = mdn->A->rmap->n, n = mdn->A->cmap->n, lda; 956 957 PetscFunctionBegin; 958 PetscCall(MatDenseGetArray(mdn->A, &vv)); 959 PetscCall(MatDenseGetLDA(mdn->A, &lda)); 960 PetscCall(MatGetLocalSize(A, &s2, &s3)); 961 if (ll) { 962 PetscCall(VecGetLocalSize(ll, &s2a)); 963 PetscCheck(s2a == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Left scaling vector non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT, s2a, s2); 964 PetscCall(VecGetArrayRead(ll, &l)); 965 for (i = 0; i < m; i++) { 966 x = l[i]; 967 v = vv + i; 968 for (j = 0; j < n; j++) { 969 (*v) *= x; 970 v += lda; 971 } 972 } 973 PetscCall(VecRestoreArrayRead(ll, &l)); 974 PetscCall(PetscLogFlops(1.0 * n * m)); 975 } 976 if (rr) { 977 const PetscScalar *ar; 978 979 PetscCall(VecGetLocalSize(rr, &s3a)); 980 PetscCheck(s3a == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Right scaling vec non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT ".", s3a, s3); 981 PetscCall(VecGetArrayRead(rr, &ar)); 982 if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A)); 983 PetscCall(VecGetArray(mdn->lvec, &r)); 984 PetscCall(PetscSFBcastBegin(mdn->Mvctx, MPIU_SCALAR, ar, r, MPI_REPLACE)); 985 PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ar, r, MPI_REPLACE)); 986 PetscCall(VecRestoreArrayRead(rr, &ar)); 987 for (i = 0; i < n; i++) { 988 x = r[i]; 989 v = vv + i * lda; 990 for (j = 0; j < m; j++) (*v++) *= x; 991 } 992 PetscCall(VecRestoreArray(mdn->lvec, &r)); 993 PetscCall(PetscLogFlops(1.0 * n * m)); 994 } 995 PetscCall(MatDenseRestoreArray(mdn->A, &vv)); 996 PetscFunctionReturn(PETSC_SUCCESS); 997 } 998 999 static PetscErrorCode MatNorm_MPIDense(Mat A, NormType type, PetscReal *nrm) 1000 { 1001 Mat_MPIDense *mdn = (Mat_MPIDense *)A->data; 1002 PetscInt i, j; 1003 PetscMPIInt size, iN; 1004 PetscReal sum = 0.0; 1005 const PetscScalar *av, *v; 1006 1007 PetscFunctionBegin; 1008 PetscCall(MatDenseGetArrayRead(mdn->A, &av)); 1009 v = av; 1010 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 1011 if (size == 1) { 1012 PetscCall(MatNorm(mdn->A, type, nrm)); 1013 } else { 1014 if (type == NORM_FROBENIUS) { 1015 for (i = 0; i < mdn->A->cmap->n * mdn->A->rmap->n; i++) { 1016 sum += PetscRealPart(PetscConj(*v) * (*v)); 1017 v++; 1018 } 1019 PetscCallMPI(MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A))); 1020 *nrm = PetscSqrtReal(*nrm); 1021 PetscCall(PetscLogFlops(2.0 * mdn->A->cmap->n * mdn->A->rmap->n)); 1022 } else if (type == NORM_1) { 1023 PetscReal *tmp, *tmp2; 1024 PetscCall(PetscCalloc2(A->cmap->N, &tmp, A->cmap->N, &tmp2)); 1025 *nrm = 0.0; 1026 v = av; 1027 for (j = 0; j < mdn->A->cmap->n; j++) { 1028 for (i = 0; i < mdn->A->rmap->n; i++) { 1029 tmp[j] += PetscAbsScalar(*v); 1030 v++; 1031 } 1032 } 1033 PetscCall(PetscMPIIntCast(A->cmap->N, &iN)); 1034 PetscCallMPI(MPIU_Allreduce(tmp, tmp2, iN, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A))); 1035 for (j = 0; j < A->cmap->N; j++) { 1036 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 1037 } 1038 PetscCall(PetscFree2(tmp, tmp2)); 1039 PetscCall(PetscLogFlops(A->cmap->n * A->rmap->n)); 1040 } else if (type == NORM_INFINITY) { /* max row norm */ 1041 PetscReal ntemp; 1042 PetscCall(MatNorm(mdn->A, type, &ntemp)); 1043 PetscCallMPI(MPIU_Allreduce(&ntemp, nrm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A))); 1044 } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "No support for two norm"); 1045 } 1046 PetscCall(MatDenseRestoreArrayRead(mdn->A, &av)); 1047 PetscFunctionReturn(PETSC_SUCCESS); 1048 } 1049 1050 static PetscErrorCode MatTranspose_MPIDense(Mat A, MatReuse reuse, Mat *matout) 1051 { 1052 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1053 Mat B; 1054 PetscInt M = A->rmap->N, N = A->cmap->N, m, n, *rwork, rstart = A->rmap->rstart; 1055 PetscInt j, i, lda; 1056 PetscScalar *v; 1057 1058 PetscFunctionBegin; 1059 if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout)); 1060 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) { 1061 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 1062 PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M)); 1063 PetscCall(MatSetType(B, ((PetscObject)A)->type_name)); 1064 PetscCall(MatMPIDenseSetPreallocation(B, NULL)); 1065 } else B = *matout; 1066 1067 m = a->A->rmap->n; 1068 n = a->A->cmap->n; 1069 PetscCall(MatDenseGetArrayRead(a->A, (const PetscScalar **)&v)); 1070 PetscCall(MatDenseGetLDA(a->A, &lda)); 1071 PetscCall(PetscMalloc1(m, &rwork)); 1072 for (i = 0; i < m; i++) rwork[i] = rstart + i; 1073 for (j = 0; j < n; j++) { 1074 PetscCall(MatSetValues(B, 1, &j, m, rwork, v, INSERT_VALUES)); 1075 v = PetscSafePointerPlusOffset(v, lda); 1076 } 1077 PetscCall(MatDenseRestoreArrayRead(a->A, (const PetscScalar **)&v)); 1078 PetscCall(PetscFree(rwork)); 1079 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 1080 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 1081 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 1082 *matout = B; 1083 } else { 1084 PetscCall(MatHeaderMerge(A, &B)); 1085 } 1086 PetscFunctionReturn(PETSC_SUCCESS); 1087 } 1088 1089 static PetscErrorCode MatDuplicate_MPIDense(Mat, MatDuplicateOption, Mat *); 1090 PETSC_INTERN PetscErrorCode MatScale_MPIDense(Mat, PetscScalar); 1091 1092 static PetscErrorCode MatSetUp_MPIDense(Mat A) 1093 { 1094 PetscFunctionBegin; 1095 PetscCall(PetscLayoutSetUp(A->rmap)); 1096 PetscCall(PetscLayoutSetUp(A->cmap)); 1097 if (!A->preallocated) PetscCall(MatMPIDenseSetPreallocation(A, NULL)); 1098 PetscFunctionReturn(PETSC_SUCCESS); 1099 } 1100 1101 static PetscErrorCode MatAXPY_MPIDense(Mat Y, PetscScalar alpha, Mat X, MatStructure str) 1102 { 1103 Mat_MPIDense *A = (Mat_MPIDense *)Y->data, *B = (Mat_MPIDense *)X->data; 1104 1105 PetscFunctionBegin; 1106 PetscCall(MatAXPY(A->A, alpha, B->A, str)); 1107 PetscFunctionReturn(PETSC_SUCCESS); 1108 } 1109 1110 static PetscErrorCode MatConjugate_MPIDense(Mat mat) 1111 { 1112 Mat_MPIDense *a = (Mat_MPIDense *)mat->data; 1113 1114 PetscFunctionBegin; 1115 PetscCall(MatConjugate(a->A)); 1116 PetscFunctionReturn(PETSC_SUCCESS); 1117 } 1118 1119 static PetscErrorCode MatRealPart_MPIDense(Mat A) 1120 { 1121 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1122 1123 PetscFunctionBegin; 1124 PetscCall(MatRealPart(a->A)); 1125 PetscFunctionReturn(PETSC_SUCCESS); 1126 } 1127 1128 static PetscErrorCode MatImaginaryPart_MPIDense(Mat A) 1129 { 1130 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1131 1132 PetscFunctionBegin; 1133 PetscCall(MatImaginaryPart(a->A)); 1134 PetscFunctionReturn(PETSC_SUCCESS); 1135 } 1136 1137 static PetscErrorCode MatGetColumnVector_MPIDense(Mat A, Vec v, PetscInt col) 1138 { 1139 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1140 1141 PetscFunctionBegin; 1142 PetscCheck(a->A, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Missing local matrix"); 1143 PetscCheck(a->A->ops->getcolumnvector, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Missing get column operation"); 1144 PetscCall((*a->A->ops->getcolumnvector)(a->A, v, col)); 1145 PetscFunctionReturn(PETSC_SUCCESS); 1146 } 1147 1148 PETSC_INTERN PetscErrorCode MatGetColumnReductions_SeqDense(Mat, PetscInt, PetscReal *); 1149 1150 static PetscErrorCode MatGetColumnReductions_MPIDense(Mat A, PetscInt type, PetscReal *reductions) 1151 { 1152 PetscInt i, m, n; 1153 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1154 PetscReal *work; 1155 PetscMPIInt in; 1156 1157 PetscFunctionBegin; 1158 PetscCall(MatGetSize(A, &m, &n)); 1159 PetscCall(PetscMalloc1(n, &work)); 1160 if (type == REDUCTION_MEAN_REALPART) { 1161 PetscCall(MatGetColumnReductions_SeqDense(a->A, (PetscInt)REDUCTION_SUM_REALPART, work)); 1162 } else if (type == REDUCTION_MEAN_IMAGINARYPART) { 1163 PetscCall(MatGetColumnReductions_SeqDense(a->A, (PetscInt)REDUCTION_SUM_IMAGINARYPART, work)); 1164 } else { 1165 PetscCall(MatGetColumnReductions_SeqDense(a->A, type, work)); 1166 } 1167 if (type == NORM_2) { 1168 for (i = 0; i < n; i++) work[i] *= work[i]; 1169 } 1170 PetscCall(PetscMPIIntCast(n, &in)); 1171 if (type == NORM_INFINITY) { 1172 PetscCallMPI(MPIU_Allreduce(work, reductions, in, MPIU_REAL, MPIU_MAX, A->hdr.comm)); 1173 } else { 1174 PetscCallMPI(MPIU_Allreduce(work, reductions, in, MPIU_REAL, MPIU_SUM, A->hdr.comm)); 1175 } 1176 PetscCall(PetscFree(work)); 1177 if (type == NORM_2) { 1178 for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]); 1179 } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 1180 for (i = 0; i < n; i++) reductions[i] /= m; 1181 } 1182 PetscFunctionReturn(PETSC_SUCCESS); 1183 } 1184 1185 static PetscErrorCode MatSetRandom_MPIDense(Mat x, PetscRandom rctx) 1186 { 1187 Mat_MPIDense *d = (Mat_MPIDense *)x->data; 1188 1189 PetscFunctionBegin; 1190 PetscCall(MatSetRandom(d->A, rctx)); 1191 #if defined(PETSC_HAVE_DEVICE) 1192 x->offloadmask = d->A->offloadmask; 1193 #endif 1194 PetscFunctionReturn(PETSC_SUCCESS); 1195 } 1196 1197 static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A, PetscBool *missing, PetscInt *d) 1198 { 1199 PetscFunctionBegin; 1200 *missing = PETSC_FALSE; 1201 PetscFunctionReturn(PETSC_SUCCESS); 1202 } 1203 1204 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat, Mat, PetscReal, Mat); 1205 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat, Mat, Mat); 1206 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat, Mat, PetscReal, Mat); 1207 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat, Mat, Mat); 1208 static PetscErrorCode MatEqual_MPIDense(Mat, Mat, PetscBool *); 1209 static PetscErrorCode MatLoad_MPIDense(Mat, PetscViewer); 1210 static PetscErrorCode MatProductSetFromOptions_MPIDense(Mat); 1211 1212 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense, 1213 MatGetRow_MPIDense, 1214 MatRestoreRow_MPIDense, 1215 MatMult_MPIDense, 1216 /* 4*/ MatMultAdd_MPIDense, 1217 MatMultTranspose_MPIDense, 1218 MatMultTransposeAdd_MPIDense, 1219 NULL, 1220 NULL, 1221 NULL, 1222 /* 10*/ NULL, 1223 NULL, 1224 NULL, 1225 NULL, 1226 MatTranspose_MPIDense, 1227 /* 15*/ MatGetInfo_MPIDense, 1228 MatEqual_MPIDense, 1229 MatGetDiagonal_MPIDense, 1230 MatDiagonalScale_MPIDense, 1231 MatNorm_MPIDense, 1232 /* 20*/ MatAssemblyBegin_MPIDense, 1233 MatAssemblyEnd_MPIDense, 1234 MatSetOption_MPIDense, 1235 MatZeroEntries_MPIDense, 1236 /* 24*/ MatZeroRows_MPIDense, 1237 NULL, 1238 NULL, 1239 NULL, 1240 NULL, 1241 /* 29*/ MatSetUp_MPIDense, 1242 NULL, 1243 NULL, 1244 MatGetDiagonalBlock_MPIDense, 1245 NULL, 1246 /* 34*/ MatDuplicate_MPIDense, 1247 NULL, 1248 NULL, 1249 NULL, 1250 NULL, 1251 /* 39*/ MatAXPY_MPIDense, 1252 MatCreateSubMatrices_MPIDense, 1253 NULL, 1254 MatGetValues_MPIDense, 1255 MatCopy_MPIDense, 1256 /* 44*/ NULL, 1257 MatScale_MPIDense, 1258 MatShift_MPIDense, 1259 NULL, 1260 NULL, 1261 /* 49*/ MatSetRandom_MPIDense, 1262 NULL, 1263 NULL, 1264 NULL, 1265 NULL, 1266 /* 54*/ NULL, 1267 NULL, 1268 NULL, 1269 NULL, 1270 NULL, 1271 /* 59*/ MatCreateSubMatrix_MPIDense, 1272 MatDestroy_MPIDense, 1273 MatView_MPIDense, 1274 NULL, 1275 NULL, 1276 /* 64*/ NULL, 1277 NULL, 1278 NULL, 1279 NULL, 1280 NULL, 1281 /* 69*/ NULL, 1282 NULL, 1283 NULL, 1284 NULL, 1285 NULL, 1286 /* 74*/ NULL, 1287 NULL, 1288 NULL, 1289 NULL, 1290 NULL, 1291 /* 79*/ NULL, 1292 NULL, 1293 NULL, 1294 NULL, 1295 /* 83*/ MatLoad_MPIDense, 1296 NULL, 1297 NULL, 1298 NULL, 1299 NULL, 1300 NULL, 1301 /* 89*/ NULL, 1302 NULL, 1303 NULL, 1304 NULL, 1305 NULL, 1306 /* 94*/ NULL, 1307 NULL, 1308 MatMatTransposeMultSymbolic_MPIDense_MPIDense, 1309 MatMatTransposeMultNumeric_MPIDense_MPIDense, 1310 NULL, 1311 /* 99*/ MatProductSetFromOptions_MPIDense, 1312 NULL, 1313 NULL, 1314 MatConjugate_MPIDense, 1315 NULL, 1316 /*104*/ NULL, 1317 MatRealPart_MPIDense, 1318 MatImaginaryPart_MPIDense, 1319 NULL, 1320 NULL, 1321 /*109*/ NULL, 1322 NULL, 1323 NULL, 1324 MatGetColumnVector_MPIDense, 1325 MatMissingDiagonal_MPIDense, 1326 /*114*/ NULL, 1327 NULL, 1328 NULL, 1329 NULL, 1330 NULL, 1331 /*119*/ NULL, 1332 NULL, 1333 MatMultHermitianTranspose_MPIDense, 1334 MatMultHermitianTransposeAdd_MPIDense, 1335 NULL, 1336 /*124*/ NULL, 1337 MatGetColumnReductions_MPIDense, 1338 NULL, 1339 NULL, 1340 NULL, 1341 /*129*/ NULL, 1342 NULL, 1343 MatTransposeMatMultSymbolic_MPIDense_MPIDense, 1344 MatTransposeMatMultNumeric_MPIDense_MPIDense, 1345 NULL, 1346 /*134*/ NULL, 1347 NULL, 1348 NULL, 1349 NULL, 1350 NULL, 1351 /*139*/ NULL, 1352 NULL, 1353 NULL, 1354 NULL, 1355 NULL, 1356 MatCreateMPIMatConcatenateSeqMat_MPIDense, 1357 /*145*/ NULL, 1358 NULL, 1359 NULL, 1360 NULL, 1361 NULL, 1362 /*150*/ NULL, 1363 NULL, 1364 NULL, 1365 NULL, 1366 NULL, 1367 NULL}; 1368 1369 static PetscErrorCode MatMPIDenseSetPreallocation_MPIDense(Mat mat, PetscScalar *data) 1370 { 1371 Mat_MPIDense *a = (Mat_MPIDense *)mat->data; 1372 MatType mtype = MATSEQDENSE; 1373 1374 PetscFunctionBegin; 1375 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1376 PetscCall(PetscLayoutSetUp(mat->rmap)); 1377 PetscCall(PetscLayoutSetUp(mat->cmap)); 1378 if (!a->A) { 1379 PetscCall(MatCreate(PETSC_COMM_SELF, &a->A)); 1380 PetscCall(MatSetSizes(a->A, mat->rmap->n, mat->cmap->N, mat->rmap->n, mat->cmap->N)); 1381 } 1382 #if defined(PETSC_HAVE_CUDA) 1383 PetscBool iscuda; 1384 PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPIDENSECUDA, &iscuda)); 1385 if (iscuda) mtype = MATSEQDENSECUDA; 1386 #endif 1387 #if defined(PETSC_HAVE_HIP) 1388 PetscBool iship; 1389 PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPIDENSEHIP, &iship)); 1390 if (iship) mtype = MATSEQDENSEHIP; 1391 #endif 1392 PetscCall(MatSetType(a->A, mtype)); 1393 PetscCall(MatSeqDenseSetPreallocation(a->A, data)); 1394 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 1395 mat->offloadmask = a->A->offloadmask; 1396 #endif 1397 mat->preallocated = PETSC_TRUE; 1398 mat->assembled = PETSC_TRUE; 1399 PetscFunctionReturn(PETSC_SUCCESS); 1400 } 1401 1402 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIDense(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 1403 { 1404 Mat B, C; 1405 1406 PetscFunctionBegin; 1407 PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &C)); 1408 PetscCall(MatConvert_SeqAIJ_SeqDense(C, MATSEQDENSE, MAT_INITIAL_MATRIX, &B)); 1409 PetscCall(MatDestroy(&C)); 1410 if (reuse == MAT_REUSE_MATRIX) { 1411 C = *newmat; 1412 } else C = NULL; 1413 PetscCall(MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A), B, A->cmap->n, !C ? MAT_INITIAL_MATRIX : MAT_REUSE_MATRIX, &C)); 1414 PetscCall(MatDestroy(&B)); 1415 if (reuse == MAT_INPLACE_MATRIX) { 1416 PetscCall(MatHeaderReplace(A, &C)); 1417 } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C; 1418 PetscFunctionReturn(PETSC_SUCCESS); 1419 } 1420 1421 static PetscErrorCode MatConvert_MPIDense_MPIAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 1422 { 1423 Mat B, C; 1424 1425 PetscFunctionBegin; 1426 PetscCall(MatDenseGetLocalMatrix(A, &C)); 1427 PetscCall(MatConvert_SeqDense_SeqAIJ(C, MATSEQAIJ, MAT_INITIAL_MATRIX, &B)); 1428 if (reuse == MAT_REUSE_MATRIX) { 1429 C = *newmat; 1430 } else C = NULL; 1431 PetscCall(MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A), B, A->cmap->n, !C ? MAT_INITIAL_MATRIX : MAT_REUSE_MATRIX, &C)); 1432 PetscCall(MatDestroy(&B)); 1433 if (reuse == MAT_INPLACE_MATRIX) { 1434 PetscCall(MatHeaderReplace(A, &C)); 1435 } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C; 1436 PetscFunctionReturn(PETSC_SUCCESS); 1437 } 1438 1439 #if defined(PETSC_HAVE_ELEMENTAL) 1440 PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 1441 { 1442 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1443 Mat mat_elemental; 1444 PetscScalar *v; 1445 PetscInt m = A->rmap->n, N = A->cmap->N, rstart = A->rmap->rstart, i, *rows, *cols, lda; 1446 1447 PetscFunctionBegin; 1448 if (reuse == MAT_REUSE_MATRIX) { 1449 mat_elemental = *newmat; 1450 PetscCall(MatZeroEntries(*newmat)); 1451 } else { 1452 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental)); 1453 PetscCall(MatSetSizes(mat_elemental, PETSC_DECIDE, PETSC_DECIDE, A->rmap->N, A->cmap->N)); 1454 PetscCall(MatSetType(mat_elemental, MATELEMENTAL)); 1455 PetscCall(MatSetUp(mat_elemental)); 1456 PetscCall(MatSetOption(mat_elemental, MAT_ROW_ORIENTED, PETSC_FALSE)); 1457 } 1458 1459 PetscCall(PetscMalloc2(m, &rows, N, &cols)); 1460 for (i = 0; i < N; i++) cols[i] = i; 1461 for (i = 0; i < m; i++) rows[i] = rstart + i; 1462 1463 /* PETSc-Elemental interface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */ 1464 PetscCall(MatDenseGetArray(A, &v)); 1465 PetscCall(MatDenseGetLDA(a->A, &lda)); 1466 if (lda == m) PetscCall(MatSetValues(mat_elemental, m, rows, N, cols, v, ADD_VALUES)); 1467 else { 1468 for (i = 0; i < N; i++) PetscCall(MatSetValues(mat_elemental, m, rows, 1, &i, v + lda * i, ADD_VALUES)); 1469 } 1470 PetscCall(MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY)); 1471 PetscCall(MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY)); 1472 PetscCall(MatDenseRestoreArray(A, &v)); 1473 PetscCall(PetscFree2(rows, cols)); 1474 1475 if (reuse == MAT_INPLACE_MATRIX) { 1476 PetscCall(MatHeaderReplace(A, &mat_elemental)); 1477 } else { 1478 *newmat = mat_elemental; 1479 } 1480 PetscFunctionReturn(PETSC_SUCCESS); 1481 } 1482 #endif 1483 1484 static PetscErrorCode MatDenseGetColumn_MPIDense(Mat A, PetscInt col, PetscScalar **vals) 1485 { 1486 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 1487 1488 PetscFunctionBegin; 1489 PetscCall(MatDenseGetColumn(mat->A, col, vals)); 1490 PetscFunctionReturn(PETSC_SUCCESS); 1491 } 1492 1493 static PetscErrorCode MatDenseRestoreColumn_MPIDense(Mat A, PetscScalar **vals) 1494 { 1495 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 1496 1497 PetscFunctionBegin; 1498 PetscCall(MatDenseRestoreColumn(mat->A, vals)); 1499 PetscFunctionReturn(PETSC_SUCCESS); 1500 } 1501 1502 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIDense(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat) 1503 { 1504 Mat_MPIDense *mat; 1505 PetscInt m, nloc, N; 1506 1507 PetscFunctionBegin; 1508 PetscCall(MatGetSize(inmat, &m, &N)); 1509 PetscCall(MatGetLocalSize(inmat, NULL, &nloc)); 1510 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 1511 PetscInt sum; 1512 1513 if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnership(comm, &n, &N)); 1514 /* Check sum(n) = N */ 1515 PetscCallMPI(MPIU_Allreduce(&n, &sum, 1, MPIU_INT, MPI_SUM, comm)); 1516 PetscCheck(sum == N, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT, sum, N); 1517 1518 PetscCall(MatCreateDense(comm, m, n, PETSC_DETERMINE, N, NULL, outmat)); 1519 PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 1520 } 1521 1522 /* numeric phase */ 1523 mat = (Mat_MPIDense *)(*outmat)->data; 1524 PetscCall(MatCopy(inmat, mat->A, SAME_NONZERO_PATTERN)); 1525 PetscFunctionReturn(PETSC_SUCCESS); 1526 } 1527 1528 PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat A, PetscInt col, Vec *v) 1529 { 1530 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1531 PetscInt lda; 1532 1533 PetscFunctionBegin; 1534 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1535 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1536 if (!a->cvec) PetscCall(MatDenseCreateColumnVec_Private(A, &a->cvec)); 1537 a->vecinuse = col + 1; 1538 PetscCall(MatDenseGetLDA(a->A, &lda)); 1539 PetscCall(MatDenseGetArray(a->A, (PetscScalar **)&a->ptrinuse)); 1540 PetscCall(VecPlaceArray(a->cvec, PetscSafePointerPlusOffset(a->ptrinuse, (size_t)col * (size_t)lda))); 1541 *v = a->cvec; 1542 PetscFunctionReturn(PETSC_SUCCESS); 1543 } 1544 1545 PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat A, PetscInt col, Vec *v) 1546 { 1547 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1548 1549 PetscFunctionBegin; 1550 PetscCheck(a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first"); 1551 PetscCheck(a->cvec, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing internal column vector"); 1552 VecCheckAssembled(a->cvec); 1553 a->vecinuse = 0; 1554 PetscCall(MatDenseRestoreArray(a->A, (PetscScalar **)&a->ptrinuse)); 1555 PetscCall(VecResetArray(a->cvec)); 1556 if (v) *v = NULL; 1557 PetscFunctionReturn(PETSC_SUCCESS); 1558 } 1559 1560 PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat A, PetscInt col, Vec *v) 1561 { 1562 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1563 PetscInt lda; 1564 1565 PetscFunctionBegin; 1566 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1567 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1568 if (!a->cvec) PetscCall(MatDenseCreateColumnVec_Private(A, &a->cvec)); 1569 a->vecinuse = col + 1; 1570 PetscCall(MatDenseGetLDA(a->A, &lda)); 1571 PetscCall(MatDenseGetArrayRead(a->A, &a->ptrinuse)); 1572 PetscCall(VecPlaceArray(a->cvec, PetscSafePointerPlusOffset(a->ptrinuse, (size_t)col * (size_t)lda))); 1573 PetscCall(VecLockReadPush(a->cvec)); 1574 *v = a->cvec; 1575 PetscFunctionReturn(PETSC_SUCCESS); 1576 } 1577 1578 PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat A, PetscInt col, Vec *v) 1579 { 1580 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1581 1582 PetscFunctionBegin; 1583 PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first"); 1584 PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector"); 1585 VecCheckAssembled(a->cvec); 1586 a->vecinuse = 0; 1587 PetscCall(MatDenseRestoreArrayRead(a->A, &a->ptrinuse)); 1588 PetscCall(VecLockReadPop(a->cvec)); 1589 PetscCall(VecResetArray(a->cvec)); 1590 if (v) *v = NULL; 1591 PetscFunctionReturn(PETSC_SUCCESS); 1592 } 1593 1594 PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat A, PetscInt col, Vec *v) 1595 { 1596 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1597 PetscInt lda; 1598 1599 PetscFunctionBegin; 1600 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1601 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1602 if (!a->cvec) PetscCall(MatDenseCreateColumnVec_Private(A, &a->cvec)); 1603 a->vecinuse = col + 1; 1604 PetscCall(MatDenseGetLDA(a->A, &lda)); 1605 PetscCall(MatDenseGetArrayWrite(a->A, (PetscScalar **)&a->ptrinuse)); 1606 PetscCall(VecPlaceArray(a->cvec, PetscSafePointerPlusOffset(a->ptrinuse, (size_t)col * (size_t)lda))); 1607 *v = a->cvec; 1608 PetscFunctionReturn(PETSC_SUCCESS); 1609 } 1610 1611 PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat A, PetscInt col, Vec *v) 1612 { 1613 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1614 1615 PetscFunctionBegin; 1616 PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first"); 1617 PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector"); 1618 VecCheckAssembled(a->cvec); 1619 a->vecinuse = 0; 1620 PetscCall(MatDenseRestoreArrayWrite(a->A, (PetscScalar **)&a->ptrinuse)); 1621 PetscCall(VecResetArray(a->cvec)); 1622 if (v) *v = NULL; 1623 PetscFunctionReturn(PETSC_SUCCESS); 1624 } 1625 1626 static PetscErrorCode MatDenseGetSubMatrix_MPIDense(Mat A, PetscInt rbegin, PetscInt rend, PetscInt cbegin, PetscInt cend, Mat *v) 1627 { 1628 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1629 Mat_MPIDense *c; 1630 MPI_Comm comm; 1631 PetscInt pbegin, pend; 1632 1633 PetscFunctionBegin; 1634 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 1635 PetscCheck(!a->vecinuse, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1636 PetscCheck(!a->matinuse, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1637 pbegin = PetscMax(0, PetscMin(A->rmap->rend, rbegin) - A->rmap->rstart); 1638 pend = PetscMin(A->rmap->n, PetscMax(0, rend - A->rmap->rstart)); 1639 if (!a->cmat) { 1640 PetscCall(MatCreate(comm, &a->cmat)); 1641 PetscCall(MatSetType(a->cmat, ((PetscObject)A)->type_name)); 1642 if (rend - rbegin == A->rmap->N) PetscCall(PetscLayoutReference(A->rmap, &a->cmat->rmap)); 1643 else { 1644 PetscCall(PetscLayoutSetLocalSize(a->cmat->rmap, pend - pbegin)); 1645 PetscCall(PetscLayoutSetSize(a->cmat->rmap, rend - rbegin)); 1646 PetscCall(PetscLayoutSetUp(a->cmat->rmap)); 1647 } 1648 PetscCall(PetscLayoutSetSize(a->cmat->cmap, cend - cbegin)); 1649 PetscCall(PetscLayoutSetUp(a->cmat->cmap)); 1650 } else { 1651 PetscBool same = (PetscBool)(rend - rbegin == a->cmat->rmap->N); 1652 if (same && a->cmat->rmap->N != A->rmap->N) { 1653 same = (PetscBool)(pend - pbegin == a->cmat->rmap->n); 1654 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &same, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A))); 1655 } 1656 if (!same) { 1657 PetscCall(PetscLayoutDestroy(&a->cmat->rmap)); 1658 PetscCall(PetscLayoutCreate(comm, &a->cmat->rmap)); 1659 PetscCall(PetscLayoutSetLocalSize(a->cmat->rmap, pend - pbegin)); 1660 PetscCall(PetscLayoutSetSize(a->cmat->rmap, rend - rbegin)); 1661 PetscCall(PetscLayoutSetUp(a->cmat->rmap)); 1662 } 1663 if (cend - cbegin != a->cmat->cmap->N) { 1664 PetscCall(PetscLayoutDestroy(&a->cmat->cmap)); 1665 PetscCall(PetscLayoutCreate(comm, &a->cmat->cmap)); 1666 PetscCall(PetscLayoutSetSize(a->cmat->cmap, cend - cbegin)); 1667 PetscCall(PetscLayoutSetUp(a->cmat->cmap)); 1668 } 1669 } 1670 c = (Mat_MPIDense *)a->cmat->data; 1671 PetscCheck(!c->A, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1672 PetscCall(MatDenseGetSubMatrix(a->A, pbegin, pend, cbegin, cend, &c->A)); 1673 1674 a->cmat->preallocated = PETSC_TRUE; 1675 a->cmat->assembled = PETSC_TRUE; 1676 #if defined(PETSC_HAVE_DEVICE) 1677 a->cmat->offloadmask = c->A->offloadmask; 1678 #endif 1679 a->matinuse = cbegin + 1; 1680 *v = a->cmat; 1681 PetscFunctionReturn(PETSC_SUCCESS); 1682 } 1683 1684 static PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat A, Mat *v) 1685 { 1686 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1687 Mat_MPIDense *c; 1688 1689 PetscFunctionBegin; 1690 PetscCheck(a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetSubMatrix() first"); 1691 PetscCheck(a->cmat, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal matrix"); 1692 PetscCheck(*v == a->cmat, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Not the matrix obtained from MatDenseGetSubMatrix()"); 1693 a->matinuse = 0; 1694 c = (Mat_MPIDense *)a->cmat->data; 1695 PetscCall(MatDenseRestoreSubMatrix(a->A, &c->A)); 1696 if (v) *v = NULL; 1697 #if defined(PETSC_HAVE_DEVICE) 1698 A->offloadmask = a->A->offloadmask; 1699 #endif 1700 PetscFunctionReturn(PETSC_SUCCESS); 1701 } 1702 1703 /*MC 1704 MATMPIDENSE - MATMPIDENSE = "mpidense" - A matrix type to be used for distributed dense matrices. 1705 1706 Options Database Key: 1707 . -mat_type mpidense - sets the matrix type to `MATMPIDENSE` during a call to `MatSetFromOptions()` 1708 1709 Level: beginner 1710 1711 .seealso: [](ch_matrices), `Mat`, `MatCreateDense()`, `MATSEQDENSE`, `MATDENSE` 1712 M*/ 1713 PetscErrorCode MatCreate_MPIDense(Mat mat) 1714 { 1715 Mat_MPIDense *a; 1716 1717 PetscFunctionBegin; 1718 PetscCall(PetscNew(&a)); 1719 mat->data = (void *)a; 1720 mat->ops[0] = MatOps_Values; 1721 1722 mat->insertmode = NOT_SET_VALUES; 1723 1724 /* build cache for off array entries formed */ 1725 a->donotstash = PETSC_FALSE; 1726 1727 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)mat), 1, &mat->stash)); 1728 1729 /* stuff used for matrix vector multiply */ 1730 a->lvec = NULL; 1731 a->Mvctx = NULL; 1732 a->roworiented = PETSC_TRUE; 1733 1734 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetLDA_C", MatDenseGetLDA_MPIDense)); 1735 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseSetLDA_C", MatDenseSetLDA_MPIDense)); 1736 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArray_C", MatDenseGetArray_MPIDense)); 1737 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArray_C", MatDenseRestoreArray_MPIDense)); 1738 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayRead_C", MatDenseGetArrayRead_MPIDense)); 1739 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayRead_C", MatDenseRestoreArrayRead_MPIDense)); 1740 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayWrite_C", MatDenseGetArrayWrite_MPIDense)); 1741 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayWrite_C", MatDenseRestoreArrayWrite_MPIDense)); 1742 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDensePlaceArray_C", MatDensePlaceArray_MPIDense)); 1743 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseResetArray_C", MatDenseResetArray_MPIDense)); 1744 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseReplaceArray_C", MatDenseReplaceArray_MPIDense)); 1745 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", MatDenseGetColumnVec_MPIDense)); 1746 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", MatDenseRestoreColumnVec_MPIDense)); 1747 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", MatDenseGetColumnVecRead_MPIDense)); 1748 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", MatDenseRestoreColumnVecRead_MPIDense)); 1749 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", MatDenseGetColumnVecWrite_MPIDense)); 1750 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", MatDenseRestoreColumnVecWrite_MPIDense)); 1751 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetSubMatrix_C", MatDenseGetSubMatrix_MPIDense)); 1752 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreSubMatrix_C", MatDenseRestoreSubMatrix_MPIDense)); 1753 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", MatConvert_MPIAIJ_MPIDense)); 1754 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpiaij_C", MatConvert_MPIDense_MPIAIJ)); 1755 #if defined(PETSC_HAVE_ELEMENTAL) 1756 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_elemental_C", MatConvert_MPIDense_Elemental)); 1757 #endif 1758 #if defined(PETSC_HAVE_SCALAPACK) 1759 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_scalapack_C", MatConvert_Dense_ScaLAPACK)); 1760 #endif 1761 #if defined(PETSC_HAVE_CUDA) 1762 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensecuda_C", MatConvert_MPIDense_MPIDenseCUDA)); 1763 #endif 1764 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIDenseSetPreallocation_C", MatMPIDenseSetPreallocation_MPIDense)); 1765 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense)); 1766 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaij_C", MatProductSetFromOptions_MPIDense_MPIAIJ)); 1767 #if defined(PETSC_HAVE_CUDA) 1768 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense)); 1769 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijcusparse_C", MatProductSetFromOptions_MPIDense_MPIAIJ)); 1770 #endif 1771 #if defined(PETSC_HAVE_HIP) 1772 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensehip_C", MatConvert_MPIDense_MPIDenseHIP)); 1773 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense)); 1774 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijhipsparse_C", MatProductSetFromOptions_MPIDense_MPIAIJ)); 1775 #endif 1776 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumn_C", MatDenseGetColumn_MPIDense)); 1777 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumn_C", MatDenseRestoreColumn_MPIDense)); 1778 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultAddColumnRange_C", MatMultAddColumnRange_MPIDense)); 1779 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultHermitianTransposeColumnRange_C", MatMultHermitianTransposeColumnRange_MPIDense)); 1780 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultHermitianTransposeAddColumnRange_C", MatMultHermitianTransposeAddColumnRange_MPIDense)); 1781 PetscCall(PetscObjectChangeTypeName((PetscObject)mat, MATMPIDENSE)); 1782 PetscFunctionReturn(PETSC_SUCCESS); 1783 } 1784 1785 /*MC 1786 MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices. 1787 1788 This matrix type is identical to `MATSEQDENSE` when constructed with a single process communicator, 1789 and `MATMPIDENSE` otherwise. 1790 1791 Options Database Key: 1792 . -mat_type dense - sets the matrix type to `MATDENSE` during a call to `MatSetFromOptions()` 1793 1794 Level: beginner 1795 1796 .seealso: [](ch_matrices), `Mat`, `MATSEQDENSE`, `MATMPIDENSE`, `MATDENSECUDA`, `MATDENSEHIP` 1797 M*/ 1798 1799 /*@ 1800 MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries 1801 1802 Collective 1803 1804 Input Parameters: 1805 + B - the matrix 1806 - data - optional location of matrix data. Set to `NULL` for PETSc 1807 to control all matrix memory allocation. 1808 1809 Level: intermediate 1810 1811 Notes: 1812 The dense format is fully compatible with standard Fortran 1813 storage by columns. 1814 1815 The data input variable is intended primarily for Fortran programmers 1816 who wish to allocate their own matrix memory space. Most users should 1817 set `data` to `NULL`. 1818 1819 .seealso: [](ch_matrices), `Mat`, `MATMPIDENSE`, `MatCreate()`, `MatCreateSeqDense()`, `MatSetValues()` 1820 @*/ 1821 PetscErrorCode MatMPIDenseSetPreallocation(Mat B, PetscScalar *data) 1822 { 1823 PetscFunctionBegin; 1824 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 1825 PetscTryMethod(B, "MatMPIDenseSetPreallocation_C", (Mat, PetscScalar *), (B, data)); 1826 PetscFunctionReturn(PETSC_SUCCESS); 1827 } 1828 1829 /*@ 1830 MatDensePlaceArray - Allows one to replace the array in a `MATDENSE` matrix with an 1831 array provided by the user. This is useful to avoid copying an array 1832 into a matrix 1833 1834 Not Collective 1835 1836 Input Parameters: 1837 + mat - the matrix 1838 - array - the array in column major order 1839 1840 Level: developer 1841 1842 Note: 1843 Adding `const` to `array` was an oversight, see notes in `VecPlaceArray()`. 1844 1845 You can return to the original array with a call to `MatDenseResetArray()`. The user is responsible for freeing this array; it will not be 1846 freed when the matrix is destroyed. 1847 1848 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseGetArray()`, `MatDenseResetArray()`, `VecPlaceArray()`, `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`, `VecResetArray()`, 1849 `MatDenseReplaceArray()` 1850 @*/ 1851 PetscErrorCode MatDensePlaceArray(Mat mat, const PetscScalar *array) 1852 { 1853 PetscFunctionBegin; 1854 PetscValidHeaderSpecific(mat, MAT_CLASSID, 1); 1855 PetscUseMethod(mat, "MatDensePlaceArray_C", (Mat, const PetscScalar *), (mat, array)); 1856 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 1857 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 1858 mat->offloadmask = PETSC_OFFLOAD_CPU; 1859 #endif 1860 PetscFunctionReturn(PETSC_SUCCESS); 1861 } 1862 1863 /*@ 1864 MatDenseResetArray - Resets the matrix array to that it previously had before the call to `MatDensePlaceArray()` 1865 1866 Not Collective 1867 1868 Input Parameter: 1869 . mat - the matrix 1870 1871 Level: developer 1872 1873 Note: 1874 You can only call this after a call to `MatDensePlaceArray()` 1875 1876 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseGetArray()`, `MatDensePlaceArray()`, `VecPlaceArray()`, `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`, `VecResetArray()` 1877 @*/ 1878 PetscErrorCode MatDenseResetArray(Mat mat) 1879 { 1880 PetscFunctionBegin; 1881 PetscValidHeaderSpecific(mat, MAT_CLASSID, 1); 1882 PetscUseMethod(mat, "MatDenseResetArray_C", (Mat), (mat)); 1883 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 1884 PetscFunctionReturn(PETSC_SUCCESS); 1885 } 1886 1887 /*@ 1888 MatDenseReplaceArray - Allows one to replace the array in a dense matrix with an 1889 array provided by the user. This is useful to avoid copying an array 1890 into a matrix 1891 1892 Not Collective 1893 1894 Input Parameters: 1895 + mat - the matrix 1896 - array - the array in column major order 1897 1898 Level: developer 1899 1900 Note: 1901 Adding `const` to `array` was an oversight, see notes in `VecPlaceArray()`. 1902 1903 The memory passed in MUST be obtained with `PetscMalloc()` and CANNOT be 1904 freed by the user. It will be freed when the matrix is destroyed. 1905 1906 .seealso: [](ch_matrices), `Mat`, `MatDensePlaceArray()`, `MatDenseGetArray()`, `VecReplaceArray()` 1907 @*/ 1908 PetscErrorCode MatDenseReplaceArray(Mat mat, const PetscScalar *array) 1909 { 1910 PetscFunctionBegin; 1911 PetscValidHeaderSpecific(mat, MAT_CLASSID, 1); 1912 PetscUseMethod(mat, "MatDenseReplaceArray_C", (Mat, const PetscScalar *), (mat, array)); 1913 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 1914 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 1915 mat->offloadmask = PETSC_OFFLOAD_CPU; 1916 #endif 1917 PetscFunctionReturn(PETSC_SUCCESS); 1918 } 1919 1920 /*@ 1921 MatCreateDense - Creates a matrix in `MATDENSE` format. 1922 1923 Collective 1924 1925 Input Parameters: 1926 + comm - MPI communicator 1927 . m - number of local rows (or `PETSC_DECIDE` to have calculated if `M` is given) 1928 . n - number of local columns (or `PETSC_DECIDE` to have calculated if `N` is given) 1929 . M - number of global rows (or `PETSC_DECIDE` to have calculated if `m` is given) 1930 . N - number of global columns (or `PETSC_DECIDE` to have calculated if `n` is given) 1931 - data - optional location of matrix data. Set data to `NULL` (`PETSC_NULL_SCALAR` for Fortran users) for PETSc 1932 to control all matrix memory allocation. 1933 1934 Output Parameter: 1935 . A - the matrix 1936 1937 Level: intermediate 1938 1939 Notes: 1940 The dense format is fully compatible with standard Fortran 1941 storage by columns. 1942 1943 Although local portions of the matrix are stored in column-major 1944 order, the matrix is partitioned across MPI ranks by row. 1945 1946 The data input variable is intended primarily for Fortran programmers 1947 who wish to allocate their own matrix memory space. Most users should 1948 set `data` to `NULL` (`PETSC_NULL_SCALAR` for Fortran users). 1949 1950 The user MUST specify either the local or global matrix dimensions 1951 (possibly both). 1952 1953 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatCreate()`, `MatCreateSeqDense()`, `MatSetValues()` 1954 @*/ 1955 PetscErrorCode MatCreateDense(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscScalar *data, Mat *A) 1956 { 1957 PetscFunctionBegin; 1958 PetscCall(MatCreate(comm, A)); 1959 PetscCall(MatSetSizes(*A, m, n, M, N)); 1960 PetscCall(MatSetType(*A, MATDENSE)); 1961 PetscCall(MatSeqDenseSetPreallocation(*A, data)); 1962 PetscCall(MatMPIDenseSetPreallocation(*A, data)); 1963 PetscFunctionReturn(PETSC_SUCCESS); 1964 } 1965 1966 static PetscErrorCode MatDuplicate_MPIDense(Mat A, MatDuplicateOption cpvalues, Mat *newmat) 1967 { 1968 Mat mat; 1969 Mat_MPIDense *a, *oldmat = (Mat_MPIDense *)A->data; 1970 1971 PetscFunctionBegin; 1972 *newmat = NULL; 1973 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat)); 1974 PetscCall(MatSetSizes(mat, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 1975 PetscCall(MatSetType(mat, ((PetscObject)A)->type_name)); 1976 a = (Mat_MPIDense *)mat->data; 1977 1978 mat->factortype = A->factortype; 1979 mat->assembled = PETSC_TRUE; 1980 mat->preallocated = PETSC_TRUE; 1981 1982 mat->insertmode = NOT_SET_VALUES; 1983 a->donotstash = oldmat->donotstash; 1984 1985 PetscCall(PetscLayoutReference(A->rmap, &mat->rmap)); 1986 PetscCall(PetscLayoutReference(A->cmap, &mat->cmap)); 1987 1988 PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A)); 1989 1990 *newmat = mat; 1991 PetscFunctionReturn(PETSC_SUCCESS); 1992 } 1993 1994 static PetscErrorCode MatLoad_MPIDense(Mat newMat, PetscViewer viewer) 1995 { 1996 PetscBool isbinary; 1997 #if defined(PETSC_HAVE_HDF5) 1998 PetscBool ishdf5; 1999 #endif 2000 2001 PetscFunctionBegin; 2002 PetscValidHeaderSpecific(newMat, MAT_CLASSID, 1); 2003 PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2); 2004 /* force binary viewer to load .info file if it has not yet done so */ 2005 PetscCall(PetscViewerSetUp(viewer)); 2006 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 2007 #if defined(PETSC_HAVE_HDF5) 2008 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5)); 2009 #endif 2010 if (isbinary) { 2011 PetscCall(MatLoad_Dense_Binary(newMat, viewer)); 2012 #if defined(PETSC_HAVE_HDF5) 2013 } else if (ishdf5) { 2014 PetscCall(MatLoad_Dense_HDF5(newMat, viewer)); 2015 #endif 2016 } else SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name); 2017 PetscFunctionReturn(PETSC_SUCCESS); 2018 } 2019 2020 static PetscErrorCode MatEqual_MPIDense(Mat A, Mat B, PetscBool *flag) 2021 { 2022 Mat_MPIDense *matB = (Mat_MPIDense *)B->data, *matA = (Mat_MPIDense *)A->data; 2023 Mat a, b; 2024 2025 PetscFunctionBegin; 2026 a = matA->A; 2027 b = matB->A; 2028 PetscCall(MatEqual(a, b, flag)); 2029 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A))); 2030 PetscFunctionReturn(PETSC_SUCCESS); 2031 } 2032 2033 static PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(void *data) 2034 { 2035 Mat_TransMatMultDense *atb = (Mat_TransMatMultDense *)data; 2036 2037 PetscFunctionBegin; 2038 PetscCall(PetscFree2(atb->sendbuf, atb->recvcounts)); 2039 PetscCall(MatDestroy(&atb->atb)); 2040 PetscCall(PetscFree(atb)); 2041 PetscFunctionReturn(PETSC_SUCCESS); 2042 } 2043 2044 static PetscErrorCode MatDestroy_MatMatTransMult_MPIDense_MPIDense(void *data) 2045 { 2046 Mat_MatTransMultDense *abt = (Mat_MatTransMultDense *)data; 2047 2048 PetscFunctionBegin; 2049 PetscCall(PetscFree2(abt->buf[0], abt->buf[1])); 2050 PetscCall(PetscFree2(abt->recvcounts, abt->recvdispls)); 2051 PetscCall(PetscFree(abt)); 2052 PetscFunctionReturn(PETSC_SUCCESS); 2053 } 2054 2055 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C) 2056 { 2057 Mat_MPIDense *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data; 2058 Mat_TransMatMultDense *atb; 2059 MPI_Comm comm; 2060 PetscMPIInt size, *recvcounts; 2061 PetscScalar *carray, *sendbuf; 2062 const PetscScalar *atbarray; 2063 PetscInt i, cN = C->cmap->N, proc, k, j, lda; 2064 const PetscInt *ranges; 2065 2066 PetscFunctionBegin; 2067 MatCheckProduct(C, 3); 2068 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 2069 atb = (Mat_TransMatMultDense *)C->product->data; 2070 recvcounts = atb->recvcounts; 2071 sendbuf = atb->sendbuf; 2072 2073 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 2074 PetscCallMPI(MPI_Comm_size(comm, &size)); 2075 2076 /* compute atbarray = aseq^T * bseq */ 2077 PetscCall(MatTransposeMatMult(a->A, b->A, atb->atb ? MAT_REUSE_MATRIX : MAT_INITIAL_MATRIX, PETSC_DETERMINE, &atb->atb)); 2078 2079 PetscCall(MatGetOwnershipRanges(C, &ranges)); 2080 2081 /* arrange atbarray into sendbuf */ 2082 PetscCall(MatDenseGetArrayRead(atb->atb, &atbarray)); 2083 PetscCall(MatDenseGetLDA(atb->atb, &lda)); 2084 for (proc = 0, k = 0; proc < size; proc++) { 2085 for (j = 0; j < cN; j++) { 2086 for (i = ranges[proc]; i < ranges[proc + 1]; i++) sendbuf[k++] = atbarray[i + j * lda]; 2087 } 2088 } 2089 PetscCall(MatDenseRestoreArrayRead(atb->atb, &atbarray)); 2090 2091 /* sum all atbarray to local values of C */ 2092 PetscCall(MatDenseGetArrayWrite(c->A, &carray)); 2093 PetscCallMPI(MPI_Reduce_scatter(sendbuf, carray, recvcounts, MPIU_SCALAR, MPIU_SUM, comm)); 2094 PetscCall(MatDenseRestoreArrayWrite(c->A, &carray)); 2095 PetscCall(MatSetOption(C, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 2096 PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY)); 2097 PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY)); 2098 PetscFunctionReturn(PETSC_SUCCESS); 2099 } 2100 2101 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C) 2102 { 2103 MPI_Comm comm; 2104 PetscMPIInt size; 2105 PetscInt cm = A->cmap->n, cM, cN = B->cmap->N; 2106 Mat_TransMatMultDense *atb; 2107 PetscBool cisdense = PETSC_FALSE; 2108 const PetscInt *ranges; 2109 2110 PetscFunctionBegin; 2111 MatCheckProduct(C, 4); 2112 PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty"); 2113 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 2114 if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) { 2115 SETERRQ(comm, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend); 2116 } 2117 2118 /* create matrix product C */ 2119 PetscCall(MatSetSizes(C, cm, B->cmap->n, A->cmap->N, B->cmap->N)); 2120 #if defined(PETSC_HAVE_CUDA) 2121 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSECUDA, "")); 2122 #endif 2123 #if defined(PETSC_HAVE_HIP) 2124 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSEHIP, "")); 2125 #endif 2126 if (!cisdense) PetscCall(MatSetType(C, ((PetscObject)A)->type_name)); 2127 PetscCall(MatSetUp(C)); 2128 2129 /* create data structure for reuse C */ 2130 PetscCallMPI(MPI_Comm_size(comm, &size)); 2131 PetscCall(PetscNew(&atb)); 2132 cM = C->rmap->N; 2133 PetscCall(PetscMalloc2(cM * cN, &atb->sendbuf, size, &atb->recvcounts)); 2134 PetscCall(MatGetOwnershipRanges(C, &ranges)); 2135 for (PetscMPIInt i = 0; i < size; i++) PetscCall(PetscMPIIntCast((ranges[i + 1] - ranges[i]) * cN, &atb->recvcounts[i])); 2136 C->product->data = atb; 2137 C->product->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense; 2138 PetscFunctionReturn(PETSC_SUCCESS); 2139 } 2140 2141 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C) 2142 { 2143 MPI_Comm comm; 2144 PetscMPIInt i, size; 2145 PetscInt maxRows, bufsiz; 2146 PetscMPIInt tag; 2147 PetscInt alg; 2148 Mat_MatTransMultDense *abt; 2149 Mat_Product *product = C->product; 2150 PetscBool flg; 2151 2152 PetscFunctionBegin; 2153 MatCheckProduct(C, 4); 2154 PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty"); 2155 /* check local size of A and B */ 2156 PetscCheck(A->cmap->n == B->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local column dimensions are incompatible, A (%" PetscInt_FMT ") != B (%" PetscInt_FMT ")", A->cmap->n, B->cmap->n); 2157 2158 PetscCall(PetscStrcmp(product->alg, "allgatherv", &flg)); 2159 alg = flg ? 0 : 1; 2160 2161 /* setup matrix product C */ 2162 PetscCall(MatSetSizes(C, A->rmap->n, B->rmap->n, A->rmap->N, B->rmap->N)); 2163 PetscCall(MatSetType(C, MATMPIDENSE)); 2164 PetscCall(MatSetUp(C)); 2165 PetscCall(PetscObjectGetNewTag((PetscObject)C, &tag)); 2166 2167 /* create data structure for reuse C */ 2168 PetscCall(PetscObjectGetComm((PetscObject)C, &comm)); 2169 PetscCallMPI(MPI_Comm_size(comm, &size)); 2170 PetscCall(PetscNew(&abt)); 2171 abt->tag = tag; 2172 abt->alg = alg; 2173 switch (alg) { 2174 case 1: /* alg: "cyclic" */ 2175 for (maxRows = 0, i = 0; i < size; i++) maxRows = PetscMax(maxRows, B->rmap->range[i + 1] - B->rmap->range[i]); 2176 bufsiz = A->cmap->N * maxRows; 2177 PetscCall(PetscMalloc2(bufsiz, &abt->buf[0], bufsiz, &abt->buf[1])); 2178 break; 2179 default: /* alg: "allgatherv" */ 2180 PetscCall(PetscMalloc2(B->rmap->n * B->cmap->N, &abt->buf[0], B->rmap->N * B->cmap->N, &abt->buf[1])); 2181 PetscCall(PetscMalloc2(size, &abt->recvcounts, size + 1, &abt->recvdispls)); 2182 for (i = 0; i <= size; i++) PetscCall(PetscMPIIntCast(B->rmap->range[i] * A->cmap->N, &abt->recvdispls[i])); 2183 for (i = 0; i < size; i++) PetscCall(PetscMPIIntCast(abt->recvdispls[i + 1] - abt->recvdispls[i], &abt->recvcounts[i])); 2184 break; 2185 } 2186 2187 C->product->data = abt; 2188 C->product->destroy = MatDestroy_MatMatTransMult_MPIDense_MPIDense; 2189 C->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_MPIDense_MPIDense; 2190 PetscFunctionReturn(PETSC_SUCCESS); 2191 } 2192 2193 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(Mat A, Mat B, Mat C) 2194 { 2195 Mat_MPIDense *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data; 2196 Mat_MatTransMultDense *abt; 2197 MPI_Comm comm; 2198 PetscMPIInt rank, size, sendto, recvfrom, recvisfrom; 2199 PetscScalar *sendbuf, *recvbuf = NULL, *cv; 2200 PetscInt i, cK = A->cmap->N, sendsiz, recvsiz, k, j, bn; 2201 PetscScalar _DOne = 1.0, _DZero = 0.0; 2202 const PetscScalar *av, *bv; 2203 PetscBLASInt cm, cn, ck, alda, blda = 0, clda; 2204 MPI_Request reqs[2]; 2205 const PetscInt *ranges; 2206 2207 PetscFunctionBegin; 2208 MatCheckProduct(C, 3); 2209 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 2210 abt = (Mat_MatTransMultDense *)C->product->data; 2211 PetscCall(PetscObjectGetComm((PetscObject)C, &comm)); 2212 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 2213 PetscCallMPI(MPI_Comm_size(comm, &size)); 2214 PetscCall(MatDenseGetArrayRead(a->A, &av)); 2215 PetscCall(MatDenseGetArrayRead(b->A, &bv)); 2216 PetscCall(MatDenseGetArrayWrite(c->A, &cv)); 2217 PetscCall(MatDenseGetLDA(a->A, &i)); 2218 PetscCall(PetscBLASIntCast(i, &alda)); 2219 PetscCall(MatDenseGetLDA(b->A, &i)); 2220 PetscCall(PetscBLASIntCast(i, &blda)); 2221 PetscCall(MatDenseGetLDA(c->A, &i)); 2222 PetscCall(PetscBLASIntCast(i, &clda)); 2223 PetscCall(MatGetOwnershipRanges(B, &ranges)); 2224 bn = B->rmap->n; 2225 if (blda == bn) { 2226 sendbuf = (PetscScalar *)bv; 2227 } else { 2228 sendbuf = abt->buf[0]; 2229 for (k = 0, i = 0; i < cK; i++) { 2230 for (j = 0; j < bn; j++, k++) sendbuf[k] = bv[i * blda + j]; 2231 } 2232 } 2233 if (size > 1) { 2234 sendto = (rank + size - 1) % size; 2235 recvfrom = (rank + size + 1) % size; 2236 } else { 2237 sendto = recvfrom = 0; 2238 } 2239 PetscCall(PetscBLASIntCast(cK, &ck)); 2240 PetscCall(PetscBLASIntCast(c->A->rmap->n, &cm)); 2241 recvisfrom = rank; 2242 for (i = 0; i < size; i++) { 2243 /* we have finished receiving in sending, bufs can be read/modified */ 2244 PetscMPIInt nextrecvisfrom = (recvisfrom + 1) % size; /* which process the next recvbuf will originate on */ 2245 PetscInt nextbn = ranges[nextrecvisfrom + 1] - ranges[nextrecvisfrom]; 2246 2247 if (nextrecvisfrom != rank) { 2248 /* start the cyclic sends from sendbuf, to recvbuf (which will switch to sendbuf) */ 2249 sendsiz = cK * bn; 2250 recvsiz = cK * nextbn; 2251 recvbuf = (i & 1) ? abt->buf[0] : abt->buf[1]; 2252 PetscCallMPI(MPIU_Isend(sendbuf, sendsiz, MPIU_SCALAR, sendto, abt->tag, comm, &reqs[0])); 2253 PetscCallMPI(MPIU_Irecv(recvbuf, recvsiz, MPIU_SCALAR, recvfrom, abt->tag, comm, &reqs[1])); 2254 } 2255 2256 /* local aseq * sendbuf^T */ 2257 PetscCall(PetscBLASIntCast(ranges[recvisfrom + 1] - ranges[recvisfrom], &cn)); 2258 if (cm && cn && ck) PetscCallBLAS("BLASgemm", BLASgemm_("N", "T", &cm, &cn, &ck, &_DOne, av, &alda, sendbuf, &cn, &_DZero, cv + clda * ranges[recvisfrom], &clda)); 2259 2260 if (nextrecvisfrom != rank) { 2261 /* wait for the sends and receives to complete, swap sendbuf and recvbuf */ 2262 PetscCallMPI(MPI_Waitall(2, reqs, MPI_STATUSES_IGNORE)); 2263 } 2264 bn = nextbn; 2265 recvisfrom = nextrecvisfrom; 2266 sendbuf = recvbuf; 2267 } 2268 PetscCall(MatDenseRestoreArrayRead(a->A, &av)); 2269 PetscCall(MatDenseRestoreArrayRead(b->A, &bv)); 2270 PetscCall(MatDenseRestoreArrayWrite(c->A, &cv)); 2271 PetscCall(MatSetOption(C, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 2272 PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY)); 2273 PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY)); 2274 PetscFunctionReturn(PETSC_SUCCESS); 2275 } 2276 2277 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(Mat A, Mat B, Mat C) 2278 { 2279 Mat_MPIDense *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data; 2280 Mat_MatTransMultDense *abt; 2281 MPI_Comm comm; 2282 PetscMPIInt size, ibn; 2283 PetscScalar *cv, *sendbuf, *recvbuf; 2284 const PetscScalar *av, *bv; 2285 PetscInt blda, i, cK = A->cmap->N, k, j, bn; 2286 PetscScalar _DOne = 1.0, _DZero = 0.0; 2287 PetscBLASInt cm, cn, ck, alda, clda; 2288 2289 PetscFunctionBegin; 2290 MatCheckProduct(C, 3); 2291 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 2292 abt = (Mat_MatTransMultDense *)C->product->data; 2293 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 2294 PetscCallMPI(MPI_Comm_size(comm, &size)); 2295 PetscCall(MatDenseGetArrayRead(a->A, &av)); 2296 PetscCall(MatDenseGetArrayRead(b->A, &bv)); 2297 PetscCall(MatDenseGetArrayWrite(c->A, &cv)); 2298 PetscCall(MatDenseGetLDA(a->A, &i)); 2299 PetscCall(PetscBLASIntCast(i, &alda)); 2300 PetscCall(MatDenseGetLDA(b->A, &blda)); 2301 PetscCall(MatDenseGetLDA(c->A, &i)); 2302 PetscCall(PetscBLASIntCast(i, &clda)); 2303 /* copy transpose of B into buf[0] */ 2304 bn = B->rmap->n; 2305 sendbuf = abt->buf[0]; 2306 recvbuf = abt->buf[1]; 2307 for (k = 0, j = 0; j < bn; j++) { 2308 for (i = 0; i < cK; i++, k++) sendbuf[k] = bv[i * blda + j]; 2309 } 2310 PetscCall(MatDenseRestoreArrayRead(b->A, &bv)); 2311 PetscCall(PetscMPIIntCast(bn * cK, &ibn)); 2312 PetscCallMPI(MPI_Allgatherv(sendbuf, ibn, MPIU_SCALAR, recvbuf, abt->recvcounts, abt->recvdispls, MPIU_SCALAR, comm)); 2313 PetscCall(PetscBLASIntCast(cK, &ck)); 2314 PetscCall(PetscBLASIntCast(c->A->rmap->n, &cm)); 2315 PetscCall(PetscBLASIntCast(c->A->cmap->n, &cn)); 2316 if (cm && cn && ck) PetscCallBLAS("BLASgemm", BLASgemm_("N", "N", &cm, &cn, &ck, &_DOne, av, &alda, recvbuf, &ck, &_DZero, cv, &clda)); 2317 PetscCall(MatDenseRestoreArrayRead(a->A, &av)); 2318 PetscCall(MatDenseRestoreArrayRead(b->A, &bv)); 2319 PetscCall(MatDenseRestoreArrayWrite(c->A, &cv)); 2320 PetscCall(MatSetOption(C, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 2321 PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY)); 2322 PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY)); 2323 PetscFunctionReturn(PETSC_SUCCESS); 2324 } 2325 2326 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C) 2327 { 2328 Mat_MatTransMultDense *abt; 2329 2330 PetscFunctionBegin; 2331 MatCheckProduct(C, 3); 2332 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 2333 abt = (Mat_MatTransMultDense *)C->product->data; 2334 switch (abt->alg) { 2335 case 1: 2336 PetscCall(MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(A, B, C)); 2337 break; 2338 default: 2339 PetscCall(MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(A, B, C)); 2340 break; 2341 } 2342 PetscFunctionReturn(PETSC_SUCCESS); 2343 } 2344 2345 static PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(void *data) 2346 { 2347 Mat_MatMultDense *ab = (Mat_MatMultDense *)data; 2348 2349 PetscFunctionBegin; 2350 PetscCall(MatDestroy(&ab->Ce)); 2351 PetscCall(MatDestroy(&ab->Ae)); 2352 PetscCall(MatDestroy(&ab->Be)); 2353 PetscCall(PetscFree(ab)); 2354 PetscFunctionReturn(PETSC_SUCCESS); 2355 } 2356 2357 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C) 2358 { 2359 Mat_MatMultDense *ab; 2360 Mat_MPIDense *mdn = (Mat_MPIDense *)A->data; 2361 2362 PetscFunctionBegin; 2363 MatCheckProduct(C, 3); 2364 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Missing product data"); 2365 ab = (Mat_MatMultDense *)C->product->data; 2366 if (ab->Ae && ab->Ce) { 2367 #if PetscDefined(HAVE_ELEMENTAL) 2368 PetscCall(MatConvert_MPIDense_Elemental(A, MATELEMENTAL, MAT_REUSE_MATRIX, &ab->Ae)); 2369 PetscCall(MatConvert_MPIDense_Elemental(B, MATELEMENTAL, MAT_REUSE_MATRIX, &ab->Be)); 2370 PetscCall(MatMatMultNumeric_Elemental(ab->Ae, ab->Be, ab->Ce)); 2371 PetscCall(MatConvert(ab->Ce, MATMPIDENSE, MAT_REUSE_MATRIX, &C)); 2372 #else 2373 SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "PETSC_HAVE_ELEMENTAL not defined"); 2374 #endif 2375 } else { 2376 const PetscScalar *read; 2377 PetscScalar *write; 2378 PetscInt lda; 2379 2380 PetscCall(MatDenseGetLDA(B, &lda)); 2381 PetscCall(MatDenseGetArrayRead(B, &read)); 2382 PetscCall(MatDenseGetArrayWrite(ab->Be, &write)); 2383 if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A)); /* cannot be done during the symbolic phase because of possible calls to MatProductReplaceMats() */ 2384 for (PetscInt i = 0; i < C->cmap->N; ++i) { 2385 PetscCall(PetscSFBcastBegin(mdn->Mvctx, MPIU_SCALAR, read + i * lda, write + i * ab->Be->rmap->n, MPI_REPLACE)); 2386 PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, read + i * lda, write + i * ab->Be->rmap->n, MPI_REPLACE)); 2387 } 2388 PetscCall(MatDenseRestoreArrayWrite(ab->Be, &write)); 2389 PetscCall(MatDenseRestoreArrayRead(B, &read)); 2390 PetscCall(MatMatMultNumeric_SeqDense_SeqDense(((Mat_MPIDense *)A->data)->A, ab->Be, ((Mat_MPIDense *)C->data)->A)); 2391 } 2392 PetscFunctionReturn(PETSC_SUCCESS); 2393 } 2394 2395 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C) 2396 { 2397 Mat_Product *product = C->product; 2398 PetscInt alg; 2399 Mat_MatMultDense *ab; 2400 PetscBool flg; 2401 2402 PetscFunctionBegin; 2403 MatCheckProduct(C, 4); 2404 PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty"); 2405 /* check local size of A and B */ 2406 PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ", %" PetscInt_FMT ")", 2407 A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend); 2408 2409 PetscCall(PetscStrcmp(product->alg, "petsc", &flg)); 2410 alg = flg ? 0 : 1; 2411 2412 /* setup C */ 2413 PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, A->rmap->N, B->cmap->N)); 2414 PetscCall(MatSetType(C, MATMPIDENSE)); 2415 PetscCall(MatSetUp(C)); 2416 2417 /* create data structure for reuse Cdense */ 2418 PetscCall(PetscNew(&ab)); 2419 2420 switch (alg) { 2421 case 1: /* alg: "elemental" */ 2422 #if PetscDefined(HAVE_ELEMENTAL) 2423 /* create elemental matrices Ae and Be */ 2424 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &ab->Ae)); 2425 PetscCall(MatSetSizes(ab->Ae, PETSC_DECIDE, PETSC_DECIDE, A->rmap->N, A->cmap->N)); 2426 PetscCall(MatSetType(ab->Ae, MATELEMENTAL)); 2427 PetscCall(MatSetUp(ab->Ae)); 2428 PetscCall(MatSetOption(ab->Ae, MAT_ROW_ORIENTED, PETSC_FALSE)); 2429 2430 PetscCall(MatCreate(PetscObjectComm((PetscObject)B), &ab->Be)); 2431 PetscCall(MatSetSizes(ab->Be, PETSC_DECIDE, PETSC_DECIDE, B->rmap->N, B->cmap->N)); 2432 PetscCall(MatSetType(ab->Be, MATELEMENTAL)); 2433 PetscCall(MatSetUp(ab->Be)); 2434 PetscCall(MatSetOption(ab->Be, MAT_ROW_ORIENTED, PETSC_FALSE)); 2435 2436 /* compute symbolic Ce = Ae*Be */ 2437 PetscCall(MatCreate(PetscObjectComm((PetscObject)C), &ab->Ce)); 2438 PetscCall(MatMatMultSymbolic_Elemental(ab->Ae, ab->Be, fill, ab->Ce)); 2439 #else 2440 SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "PETSC_HAVE_ELEMENTAL not defined"); 2441 #endif 2442 break; 2443 default: /* alg: "petsc" */ 2444 ab->Ae = NULL; 2445 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, A->cmap->N, B->cmap->N, NULL, &ab->Be)); 2446 ab->Ce = NULL; 2447 break; 2448 } 2449 2450 C->product->data = ab; 2451 C->product->destroy = MatDestroy_MatMatMult_MPIDense_MPIDense; 2452 C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense; 2453 PetscFunctionReturn(PETSC_SUCCESS); 2454 } 2455 2456 static PetscErrorCode MatProductSetFromOptions_MPIDense_AB(Mat C) 2457 { 2458 Mat_Product *product = C->product; 2459 const char *algTypes[2] = {"petsc", "elemental"}; 2460 PetscInt alg, nalg = PetscDefined(HAVE_ELEMENTAL) ? 2 : 1; 2461 PetscBool flg = PETSC_FALSE; 2462 2463 PetscFunctionBegin; 2464 /* Set default algorithm */ 2465 alg = 0; /* default is petsc */ 2466 PetscCall(PetscStrcmp(product->alg, "default", &flg)); 2467 if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg])); 2468 2469 /* Get runtime option */ 2470 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_AB", "Mat"); 2471 PetscCall(PetscOptionsEList("-mat_product_algorithm", "Algorithmic approach", "MatProduct_AB", algTypes, nalg, algTypes[alg], &alg, &flg)); 2472 PetscOptionsEnd(); 2473 if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg])); 2474 2475 C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIDense; 2476 C->ops->productsymbolic = MatProductSymbolic_AB; 2477 PetscFunctionReturn(PETSC_SUCCESS); 2478 } 2479 2480 static PetscErrorCode MatProductSetFromOptions_MPIDense_AtB(Mat C) 2481 { 2482 Mat_Product *product = C->product; 2483 Mat A = product->A, B = product->B; 2484 2485 PetscFunctionBegin; 2486 PetscCheck(A->rmap->rstart == B->rmap->rstart && A->rmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", 2487 A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend); 2488 C->ops->transposematmultsymbolic = MatTransposeMatMultSymbolic_MPIDense_MPIDense; 2489 C->ops->productsymbolic = MatProductSymbolic_AtB; 2490 PetscFunctionReturn(PETSC_SUCCESS); 2491 } 2492 2493 static PetscErrorCode MatProductSetFromOptions_MPIDense_ABt(Mat C) 2494 { 2495 Mat_Product *product = C->product; 2496 const char *algTypes[2] = {"allgatherv", "cyclic"}; 2497 PetscInt alg, nalg = 2; 2498 PetscBool flg = PETSC_FALSE; 2499 2500 PetscFunctionBegin; 2501 /* Set default algorithm */ 2502 alg = 0; /* default is allgatherv */ 2503 PetscCall(PetscStrcmp(product->alg, "default", &flg)); 2504 if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg])); 2505 2506 /* Get runtime option */ 2507 if (product->api_user) { 2508 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatMatTransposeMult", "Mat"); 2509 PetscCall(PetscOptionsEList("-matmattransmult_mpidense_mpidense_via", "Algorithmic approach", "MatMatTransposeMult", algTypes, nalg, algTypes[alg], &alg, &flg)); 2510 PetscOptionsEnd(); 2511 } else { 2512 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_ABt", "Mat"); 2513 PetscCall(PetscOptionsEList("-mat_product_algorithm", "Algorithmic approach", "MatProduct_ABt", algTypes, nalg, algTypes[alg], &alg, &flg)); 2514 PetscOptionsEnd(); 2515 } 2516 if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg])); 2517 2518 C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_MPIDense_MPIDense; 2519 C->ops->productsymbolic = MatProductSymbolic_ABt; 2520 PetscFunctionReturn(PETSC_SUCCESS); 2521 } 2522 2523 static PetscErrorCode MatProductSetFromOptions_MPIDense(Mat C) 2524 { 2525 Mat_Product *product = C->product; 2526 2527 PetscFunctionBegin; 2528 switch (product->type) { 2529 case MATPRODUCT_AB: 2530 PetscCall(MatProductSetFromOptions_MPIDense_AB(C)); 2531 break; 2532 case MATPRODUCT_AtB: 2533 PetscCall(MatProductSetFromOptions_MPIDense_AtB(C)); 2534 break; 2535 case MATPRODUCT_ABt: 2536 PetscCall(MatProductSetFromOptions_MPIDense_ABt(C)); 2537 break; 2538 default: 2539 break; 2540 } 2541 PetscFunctionReturn(PETSC_SUCCESS); 2542 } 2543