1 /* 2 Basic functions for basic parallel dense matrices. 3 Portions of this code are under: 4 Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. 5 */ 6 7 #include <../src/mat/impls/dense/mpi/mpidense.h> /*I "petscmat.h" I*/ 8 #include <../src/mat/impls/aij/mpi/mpiaij.h> 9 #include <petscblaslapack.h> 10 #include <petsc/private/vecimpl.h> 11 12 /*@ 13 MatDenseGetLocalMatrix - For a `MATMPIDENSE` or `MATSEQDENSE` matrix returns the sequential 14 matrix that represents the operator. For sequential matrices it returns itself. 15 16 Input Parameter: 17 . A - the sequential or MPI `MATDENSE` matrix 18 19 Output Parameter: 20 . B - the inner matrix 21 22 Level: intermediate 23 24 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MATMPIDENSE`, `MATSEQDENSE` 25 @*/ 26 PetscErrorCode MatDenseGetLocalMatrix(Mat A, Mat *B) 27 { 28 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 29 PetscBool flg; 30 31 PetscFunctionBegin; 32 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 33 PetscAssertPointer(B, 2); 34 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIDENSE, &flg)); 35 if (flg) *B = mat->A; 36 else { 37 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQDENSE, &flg)); 38 PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Not for matrix type %s", ((PetscObject)A)->type_name); 39 *B = A; 40 } 41 PetscFunctionReturn(PETSC_SUCCESS); 42 } 43 44 static PetscErrorCode MatCopy_MPIDense(Mat A, Mat B, MatStructure s) 45 { 46 Mat_MPIDense *Amat = (Mat_MPIDense *)A->data; 47 Mat_MPIDense *Bmat = (Mat_MPIDense *)B->data; 48 49 PetscFunctionBegin; 50 PetscCall(MatCopy(Amat->A, Bmat->A, s)); 51 PetscFunctionReturn(PETSC_SUCCESS); 52 } 53 54 PetscErrorCode MatShift_MPIDense(Mat A, PetscScalar alpha) 55 { 56 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 57 PetscInt j, lda, rstart = A->rmap->rstart, rend = A->rmap->rend, rend2; 58 PetscScalar *v; 59 60 PetscFunctionBegin; 61 PetscCall(MatDenseGetArray(mat->A, &v)); 62 PetscCall(MatDenseGetLDA(mat->A, &lda)); 63 rend2 = PetscMin(rend, A->cmap->N); 64 if (rend2 > rstart) { 65 for (j = rstart; j < rend2; j++) v[j - rstart + j * lda] += alpha; 66 PetscCall(PetscLogFlops(rend2 - rstart)); 67 } 68 PetscCall(MatDenseRestoreArray(mat->A, &v)); 69 PetscFunctionReturn(PETSC_SUCCESS); 70 } 71 72 static PetscErrorCode MatGetRow_MPIDense(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 73 { 74 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 75 PetscInt lrow, rstart = A->rmap->rstart, rend = A->rmap->rend; 76 77 PetscFunctionBegin; 78 PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "only local rows"); 79 lrow = row - rstart; 80 PetscCall(MatGetRow(mat->A, lrow, nz, (const PetscInt **)idx, (const PetscScalar **)v)); 81 PetscFunctionReturn(PETSC_SUCCESS); 82 } 83 84 static PetscErrorCode MatRestoreRow_MPIDense(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 85 { 86 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 87 PetscInt lrow, rstart = A->rmap->rstart, rend = A->rmap->rend; 88 89 PetscFunctionBegin; 90 PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "only local rows"); 91 lrow = row - rstart; 92 PetscCall(MatRestoreRow(mat->A, lrow, nz, (const PetscInt **)idx, (const PetscScalar **)v)); 93 PetscFunctionReturn(PETSC_SUCCESS); 94 } 95 96 static PetscErrorCode MatGetDiagonalBlock_MPIDense(Mat A, Mat *a) 97 { 98 Mat_MPIDense *mdn = (Mat_MPIDense *)A->data; 99 PetscInt m = A->rmap->n, rstart = A->rmap->rstart; 100 PetscScalar *array; 101 MPI_Comm comm; 102 PetscBool flg; 103 Mat B; 104 105 PetscFunctionBegin; 106 PetscCall(MatHasCongruentLayouts(A, &flg)); 107 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only square matrices supported."); 108 PetscCall(PetscObjectQuery((PetscObject)A, "DiagonalBlock", (PetscObject *)&B)); 109 if (!B) { /* This should use MatDenseGetSubMatrix (not create), but we would need a call like MatRestoreDiagonalBlock */ 110 #if PetscDefined(HAVE_CUDA) 111 PetscCall(PetscObjectTypeCompare((PetscObject)mdn->A, MATSEQDENSECUDA, &flg)); 112 PetscCheck(!flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature", MATSEQDENSECUDA); 113 #elif PetscDefined(HAVE_HIP) 114 PetscCall(PetscObjectTypeCompare((PetscObject)mdn->A, MATSEQDENSEHIP, &flg)); 115 PetscCheck(!flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature", MATSEQDENSEHIP); 116 #endif 117 PetscCall(PetscObjectGetComm((PetscObject)mdn->A, &comm)); 118 PetscCall(MatCreate(comm, &B)); 119 PetscCall(MatSetSizes(B, m, m, m, m)); 120 PetscCall(MatSetType(B, ((PetscObject)mdn->A)->type_name)); 121 PetscCall(MatDenseGetArrayRead(mdn->A, (const PetscScalar **)&array)); 122 PetscCall(MatSeqDenseSetPreallocation(B, array + m * rstart)); 123 PetscCall(MatDenseRestoreArrayRead(mdn->A, (const PetscScalar **)&array)); 124 PetscCall(PetscObjectCompose((PetscObject)A, "DiagonalBlock", (PetscObject)B)); 125 *a = B; 126 PetscCall(MatDestroy(&B)); 127 } else *a = B; 128 PetscFunctionReturn(PETSC_SUCCESS); 129 } 130 131 static PetscErrorCode MatSetValues_MPIDense(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], const PetscScalar v[], InsertMode addv) 132 { 133 Mat_MPIDense *A = (Mat_MPIDense *)mat->data; 134 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend, row; 135 PetscBool roworiented = A->roworiented; 136 137 PetscFunctionBegin; 138 for (i = 0; i < m; i++) { 139 if (idxm[i] < 0) continue; 140 PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large"); 141 if (idxm[i] >= rstart && idxm[i] < rend) { 142 row = idxm[i] - rstart; 143 if (roworiented) { 144 PetscCall(MatSetValues(A->A, 1, &row, n, idxn, PetscSafePointerPlusOffset(v, i * n), addv)); 145 } else { 146 for (j = 0; j < n; j++) { 147 if (idxn[j] < 0) continue; 148 PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large"); 149 PetscCall(MatSetValues(A->A, 1, &row, 1, &idxn[j], PetscSafePointerPlusOffset(v, i + j * m), addv)); 150 } 151 } 152 } else if (!A->donotstash) { 153 mat->assembled = PETSC_FALSE; 154 if (roworiented) { 155 PetscCall(MatStashValuesRow_Private(&mat->stash, idxm[i], n, idxn, PetscSafePointerPlusOffset(v, i * n), PETSC_FALSE)); 156 } else { 157 PetscCall(MatStashValuesCol_Private(&mat->stash, idxm[i], n, idxn, PetscSafePointerPlusOffset(v, i), m, PETSC_FALSE)); 158 } 159 } 160 } 161 PetscFunctionReturn(PETSC_SUCCESS); 162 } 163 164 static PetscErrorCode MatGetValues_MPIDense(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[]) 165 { 166 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 167 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend, row; 168 169 PetscFunctionBegin; 170 for (i = 0; i < m; i++) { 171 if (idxm[i] < 0) continue; /* negative row */ 172 PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large"); 173 if (idxm[i] >= rstart && idxm[i] < rend) { 174 row = idxm[i] - rstart; 175 for (j = 0; j < n; j++) { 176 if (idxn[j] < 0) continue; /* negative column */ 177 PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large"); 178 PetscCall(MatGetValues(mdn->A, 1, &row, 1, &idxn[j], v + i * n + j)); 179 } 180 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported"); 181 } 182 PetscFunctionReturn(PETSC_SUCCESS); 183 } 184 185 static PetscErrorCode MatDenseGetLDA_MPIDense(Mat A, PetscInt *lda) 186 { 187 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 188 189 PetscFunctionBegin; 190 PetscCall(MatDenseGetLDA(a->A, lda)); 191 PetscFunctionReturn(PETSC_SUCCESS); 192 } 193 194 static PetscErrorCode MatDenseSetLDA_MPIDense(Mat A, PetscInt lda) 195 { 196 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 197 MatType mtype = MATSEQDENSE; 198 199 PetscFunctionBegin; 200 if (!a->A) { 201 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 202 PetscCall(PetscLayoutSetUp(A->rmap)); 203 PetscCall(PetscLayoutSetUp(A->cmap)); 204 PetscCall(MatCreate(PETSC_COMM_SELF, &a->A)); 205 PetscCall(MatSetSizes(a->A, A->rmap->n, A->cmap->N, A->rmap->n, A->cmap->N)); 206 #if PetscDefined(HAVE_CUDA) 207 PetscBool iscuda; 208 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIDENSECUDA, &iscuda)); 209 if (iscuda) mtype = MATSEQDENSECUDA; 210 #elif PetscDefined(HAVE_HIP) 211 PetscBool iship; 212 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIDENSEHIP, &iship)); 213 if (iship) mtype = MATSEQDENSEHIP; 214 #endif 215 PetscCall(MatSetType(a->A, mtype)); 216 } 217 PetscCall(MatDenseSetLDA(a->A, lda)); 218 PetscFunctionReturn(PETSC_SUCCESS); 219 } 220 221 static PetscErrorCode MatDenseGetArray_MPIDense(Mat A, PetscScalar **array) 222 { 223 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 224 225 PetscFunctionBegin; 226 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 227 PetscCall(MatDenseGetArray(a->A, array)); 228 PetscFunctionReturn(PETSC_SUCCESS); 229 } 230 231 static PetscErrorCode MatDenseGetArrayRead_MPIDense(Mat A, PetscScalar **array) 232 { 233 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 234 235 PetscFunctionBegin; 236 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 237 PetscCall(MatDenseGetArrayRead(a->A, (const PetscScalar **)array)); 238 PetscFunctionReturn(PETSC_SUCCESS); 239 } 240 241 static PetscErrorCode MatDenseGetArrayWrite_MPIDense(Mat A, PetscScalar **array) 242 { 243 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 244 245 PetscFunctionBegin; 246 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 247 PetscCall(MatDenseGetArrayWrite(a->A, array)); 248 PetscFunctionReturn(PETSC_SUCCESS); 249 } 250 251 static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A, const PetscScalar *array) 252 { 253 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 254 255 PetscFunctionBegin; 256 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 257 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 258 PetscCall(MatDensePlaceArray(a->A, array)); 259 PetscFunctionReturn(PETSC_SUCCESS); 260 } 261 262 static PetscErrorCode MatDenseResetArray_MPIDense(Mat A) 263 { 264 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 265 266 PetscFunctionBegin; 267 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 268 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 269 PetscCall(MatDenseResetArray(a->A)); 270 PetscFunctionReturn(PETSC_SUCCESS); 271 } 272 273 static PetscErrorCode MatDenseReplaceArray_MPIDense(Mat A, const PetscScalar *array) 274 { 275 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 276 277 PetscFunctionBegin; 278 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 279 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 280 PetscCall(MatDenseReplaceArray(a->A, array)); 281 PetscFunctionReturn(PETSC_SUCCESS); 282 } 283 284 static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A, IS isrow, IS iscol, MatReuse scall, Mat *B) 285 { 286 Mat_MPIDense *mat = (Mat_MPIDense *)A->data, *newmatd; 287 PetscInt lda, i, j, rstart, rend, nrows, ncols, Ncols, nlrows, nlcols; 288 const PetscInt *irow, *icol; 289 const PetscScalar *v; 290 PetscScalar *bv; 291 Mat newmat; 292 IS iscol_local; 293 MPI_Comm comm_is, comm_mat; 294 295 PetscFunctionBegin; 296 PetscCall(PetscObjectGetComm((PetscObject)A, &comm_mat)); 297 PetscCall(PetscObjectGetComm((PetscObject)iscol, &comm_is)); 298 PetscCheck(comm_mat == comm_is, PETSC_COMM_SELF, PETSC_ERR_ARG_NOTSAMECOMM, "IS communicator must match matrix communicator"); 299 300 PetscCall(ISAllGather(iscol, &iscol_local)); 301 PetscCall(ISGetIndices(isrow, &irow)); 302 PetscCall(ISGetIndices(iscol_local, &icol)); 303 PetscCall(ISGetLocalSize(isrow, &nrows)); 304 PetscCall(ISGetLocalSize(iscol, &ncols)); 305 PetscCall(ISGetSize(iscol, &Ncols)); /* global number of columns, size of iscol_local */ 306 307 /* No parallel redistribution currently supported! Should really check each index set 308 to confirm that it is OK. ... Currently supports only submatrix same partitioning as 309 original matrix! */ 310 311 PetscCall(MatGetLocalSize(A, &nlrows, &nlcols)); 312 PetscCall(MatGetOwnershipRange(A, &rstart, &rend)); 313 314 /* Check submatrix call */ 315 if (scall == MAT_REUSE_MATRIX) { 316 /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */ 317 /* Really need to test rows and column sizes! */ 318 newmat = *B; 319 } else { 320 /* Create and fill new matrix */ 321 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &newmat)); 322 PetscCall(MatSetSizes(newmat, nrows, ncols, PETSC_DECIDE, Ncols)); 323 PetscCall(MatSetType(newmat, ((PetscObject)A)->type_name)); 324 PetscCall(MatMPIDenseSetPreallocation(newmat, NULL)); 325 } 326 327 /* Now extract the data pointers and do the copy, column at a time */ 328 newmatd = (Mat_MPIDense *)newmat->data; 329 PetscCall(MatDenseGetArray(newmatd->A, &bv)); 330 PetscCall(MatDenseGetArrayRead(mat->A, &v)); 331 PetscCall(MatDenseGetLDA(mat->A, &lda)); 332 for (i = 0; i < Ncols; i++) { 333 const PetscScalar *av = v + lda * icol[i]; 334 for (j = 0; j < nrows; j++) *bv++ = av[irow[j] - rstart]; 335 } 336 PetscCall(MatDenseRestoreArrayRead(mat->A, &v)); 337 PetscCall(MatDenseRestoreArray(newmatd->A, &bv)); 338 339 /* Assemble the matrices so that the correct flags are set */ 340 PetscCall(MatAssemblyBegin(newmat, MAT_FINAL_ASSEMBLY)); 341 PetscCall(MatAssemblyEnd(newmat, MAT_FINAL_ASSEMBLY)); 342 343 /* Free work space */ 344 PetscCall(ISRestoreIndices(isrow, &irow)); 345 PetscCall(ISRestoreIndices(iscol_local, &icol)); 346 PetscCall(ISDestroy(&iscol_local)); 347 *B = newmat; 348 PetscFunctionReturn(PETSC_SUCCESS); 349 } 350 351 static PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A, PetscScalar **array) 352 { 353 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 354 355 PetscFunctionBegin; 356 PetscCall(MatDenseRestoreArray(a->A, array)); 357 PetscFunctionReturn(PETSC_SUCCESS); 358 } 359 360 static PetscErrorCode MatDenseRestoreArrayRead_MPIDense(Mat A, PetscScalar **array) 361 { 362 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 363 364 PetscFunctionBegin; 365 PetscCall(MatDenseRestoreArrayRead(a->A, (const PetscScalar **)array)); 366 PetscFunctionReturn(PETSC_SUCCESS); 367 } 368 369 static PetscErrorCode MatDenseRestoreArrayWrite_MPIDense(Mat A, PetscScalar **array) 370 { 371 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 372 373 PetscFunctionBegin; 374 PetscCall(MatDenseRestoreArrayWrite(a->A, array)); 375 PetscFunctionReturn(PETSC_SUCCESS); 376 } 377 378 static PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat, MatAssemblyType mode) 379 { 380 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 381 PetscInt nstash, reallocs; 382 383 PetscFunctionBegin; 384 if (mdn->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS); 385 386 PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range)); 387 PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs)); 388 PetscCall(PetscInfo(mdn->A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 389 PetscFunctionReturn(PETSC_SUCCESS); 390 } 391 392 static PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat, MatAssemblyType mode) 393 { 394 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 395 PetscInt i, *row, *col, flg, j, rstart, ncols; 396 PetscMPIInt n; 397 PetscScalar *val; 398 399 PetscFunctionBegin; 400 if (!mdn->donotstash && !mat->nooffprocentries) { 401 /* wait on receives */ 402 while (1) { 403 PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg)); 404 if (!flg) break; 405 406 for (i = 0; i < n;) { 407 /* Now identify the consecutive vals belonging to the same row */ 408 for (j = i, rstart = row[j]; j < n; j++) { 409 if (row[j] != rstart) break; 410 } 411 if (j < n) ncols = j - i; 412 else ncols = n - i; 413 /* Now assemble all these values with a single function call */ 414 PetscCall(MatSetValues_MPIDense(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode)); 415 i = j; 416 } 417 } 418 PetscCall(MatStashScatterEnd_Private(&mat->stash)); 419 } 420 421 PetscCall(MatAssemblyBegin(mdn->A, mode)); 422 PetscCall(MatAssemblyEnd(mdn->A, mode)); 423 PetscFunctionReturn(PETSC_SUCCESS); 424 } 425 426 static PetscErrorCode MatZeroEntries_MPIDense(Mat A) 427 { 428 Mat_MPIDense *l = (Mat_MPIDense *)A->data; 429 430 PetscFunctionBegin; 431 PetscCall(MatZeroEntries(l->A)); 432 PetscFunctionReturn(PETSC_SUCCESS); 433 } 434 435 static PetscErrorCode MatZeroRows_MPIDense(Mat A, PetscInt n, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 436 { 437 Mat_MPIDense *l = (Mat_MPIDense *)A->data; 438 PetscInt i, len, *lrows; 439 440 PetscFunctionBegin; 441 /* get locally owned rows */ 442 PetscCall(PetscLayoutMapLocal(A->rmap, n, rows, &len, &lrows, NULL)); 443 /* fix right-hand side if needed */ 444 if (x && b) { 445 const PetscScalar *xx; 446 PetscScalar *bb; 447 448 PetscCall(VecGetArrayRead(x, &xx)); 449 PetscCall(VecGetArrayWrite(b, &bb)); 450 for (i = 0; i < len; ++i) bb[lrows[i]] = diag * xx[lrows[i]]; 451 PetscCall(VecRestoreArrayRead(x, &xx)); 452 PetscCall(VecRestoreArrayWrite(b, &bb)); 453 } 454 PetscCall(MatZeroRows(l->A, len, lrows, 0.0, NULL, NULL)); 455 if (diag != 0.0) { 456 Vec d; 457 458 PetscCall(MatCreateVecs(A, NULL, &d)); 459 PetscCall(VecSet(d, diag)); 460 PetscCall(MatDiagonalSet(A, d, INSERT_VALUES)); 461 PetscCall(VecDestroy(&d)); 462 } 463 PetscCall(PetscFree(lrows)); 464 PetscFunctionReturn(PETSC_SUCCESS); 465 } 466 467 PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat, Vec, Vec); 468 PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat, Vec, Vec, Vec); 469 PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat, Vec, Vec); 470 PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat, Vec, Vec, Vec); 471 472 static PetscErrorCode MatMult_MPIDense(Mat mat, Vec xx, Vec yy) 473 { 474 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 475 const PetscScalar *ax; 476 PetscScalar *ay; 477 PetscMemType axmtype, aymtype; 478 479 PetscFunctionBegin; 480 if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat)); 481 PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype)); 482 PetscCall(VecGetArrayAndMemType(mdn->lvec, &ay, &aymtype)); 483 PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE)); 484 PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE)); 485 PetscCall(VecRestoreArrayAndMemType(mdn->lvec, &ay)); 486 PetscCall(VecRestoreArrayReadAndMemType(xx, &ax)); 487 PetscCall((*mdn->A->ops->mult)(mdn->A, mdn->lvec, yy)); 488 PetscFunctionReturn(PETSC_SUCCESS); 489 } 490 491 static PetscErrorCode MatMultAddColumnRange_MPIDense(Mat mat, Vec xx, Vec yy, Vec zz, PetscInt c_start, PetscInt c_end) 492 { 493 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 494 const PetscScalar *ax; 495 PetscScalar *ay; 496 PetscMemType axmtype, aymtype; 497 498 PetscFunctionBegin; 499 if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat)); 500 PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype)); 501 PetscCall(VecGetArrayAndMemType(mdn->lvec, &ay, &aymtype)); 502 PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE)); 503 PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE)); 504 PetscCall(VecRestoreArrayAndMemType(mdn->lvec, &ay)); 505 PetscCall(VecRestoreArrayReadAndMemType(xx, &ax)); 506 PetscUseMethod(mdn->A, "MatMultAddColumnRange_C", (Mat, Vec, Vec, Vec, PetscInt, PetscInt), (mdn->A, mdn->lvec, yy, zz, c_start, c_end)); 507 PetscFunctionReturn(PETSC_SUCCESS); 508 } 509 510 static PetscErrorCode MatMultAdd_MPIDense(Mat mat, Vec xx, Vec yy, Vec zz) 511 { 512 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 513 const PetscScalar *ax; 514 PetscScalar *ay; 515 PetscMemType axmtype, aymtype; 516 517 PetscFunctionBegin; 518 if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat)); 519 PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype)); 520 PetscCall(VecGetArrayAndMemType(mdn->lvec, &ay, &aymtype)); 521 PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE)); 522 PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE)); 523 PetscCall(VecRestoreArrayAndMemType(mdn->lvec, &ay)); 524 PetscCall(VecRestoreArrayReadAndMemType(xx, &ax)); 525 PetscCall((*mdn->A->ops->multadd)(mdn->A, mdn->lvec, yy, zz)); 526 PetscFunctionReturn(PETSC_SUCCESS); 527 } 528 529 static PetscErrorCode MatMultHermitianTransposeColumnRange_MPIDense(Mat A, Vec xx, Vec yy, PetscInt c_start, PetscInt c_end) 530 { 531 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 532 const PetscScalar *ax; 533 PetscScalar *ay; 534 PetscMemType axmtype, aymtype; 535 536 PetscFunctionBegin; 537 if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A)); 538 PetscCall(VecSet(yy, 0.0)); 539 PetscUseMethod(a->A, "MatMultHermitianTransposeColumnRange_C", (Mat, Vec, Vec, PetscInt, PetscInt), (a->A, xx, a->lvec, c_start, c_end)); 540 PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype)); 541 PetscCall(VecGetArrayAndMemType(yy, &ay, &aymtype)); 542 PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM)); 543 PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM)); 544 PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax)); 545 PetscCall(VecRestoreArrayAndMemType(yy, &ay)); 546 PetscFunctionReturn(PETSC_SUCCESS); 547 } 548 549 static PetscErrorCode MatMultTransposeKernel_MPIDense(Mat A, Vec xx, Vec yy, PetscBool herm) 550 { 551 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 552 const PetscScalar *ax; 553 PetscScalar *ay; 554 PetscMemType axmtype, aymtype; 555 556 PetscFunctionBegin; 557 if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A)); 558 PetscCall(VecSet(yy, 0.0)); 559 if (herm) PetscCall((*a->A->ops->multhermitiantranspose)(a->A, xx, a->lvec)); 560 else PetscCall((*a->A->ops->multtranspose)(a->A, xx, a->lvec)); 561 PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype)); 562 PetscCall(VecGetArrayAndMemType(yy, &ay, &aymtype)); 563 PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM)); 564 PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM)); 565 PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax)); 566 PetscCall(VecRestoreArrayAndMemType(yy, &ay)); 567 PetscFunctionReturn(PETSC_SUCCESS); 568 } 569 570 static PetscErrorCode MatMultHermitianTransposeAddColumnRange_MPIDense(Mat A, Vec xx, Vec yy, Vec zz, PetscInt c_start, PetscInt c_end) 571 { 572 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 573 const PetscScalar *ax; 574 PetscScalar *ay; 575 PetscMemType axmtype, aymtype; 576 577 PetscFunctionBegin; 578 if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A)); 579 PetscCall(VecCopy(yy, zz)); 580 PetscMPIInt rank; 581 PetscCallMPI(MPI_Comm_rank(MPI_COMM_WORLD, &rank)); 582 PetscUseMethod(a->A, "MatMultHermitianTransposeColumnRange_C", (Mat, Vec, Vec, PetscInt, PetscInt), (a->A, xx, a->lvec, c_start, c_end)); 583 PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype)); 584 PetscCall(VecGetArrayAndMemType(zz, &ay, &aymtype)); 585 PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM)); 586 PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM)); 587 PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax)); 588 PetscCall(VecRestoreArrayAndMemType(zz, &ay)); 589 PetscFunctionReturn(PETSC_SUCCESS); 590 } 591 592 static PetscErrorCode MatMultTransposeAddKernel_MPIDense(Mat A, Vec xx, Vec yy, Vec zz, PetscBool herm) 593 { 594 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 595 const PetscScalar *ax; 596 PetscScalar *ay; 597 PetscMemType axmtype, aymtype; 598 599 PetscFunctionBegin; 600 if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A)); 601 PetscCall(VecCopy(yy, zz)); 602 if (herm) PetscCall((*a->A->ops->multhermitiantranspose)(a->A, xx, a->lvec)); 603 else PetscCall((*a->A->ops->multtranspose)(a->A, xx, a->lvec)); 604 PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype)); 605 PetscCall(VecGetArrayAndMemType(zz, &ay, &aymtype)); 606 PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM)); 607 PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM)); 608 PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax)); 609 PetscCall(VecRestoreArrayAndMemType(zz, &ay)); 610 PetscFunctionReturn(PETSC_SUCCESS); 611 } 612 613 static PetscErrorCode MatMultTranspose_MPIDense(Mat A, Vec xx, Vec yy) 614 { 615 PetscFunctionBegin; 616 PetscCall(MatMultTransposeKernel_MPIDense(A, xx, yy, PETSC_FALSE)); 617 PetscFunctionReturn(PETSC_SUCCESS); 618 } 619 620 static PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A, Vec xx, Vec yy, Vec zz) 621 { 622 PetscFunctionBegin; 623 PetscCall(MatMultTransposeAddKernel_MPIDense(A, xx, yy, zz, PETSC_FALSE)); 624 PetscFunctionReturn(PETSC_SUCCESS); 625 } 626 627 static PetscErrorCode MatMultHermitianTranspose_MPIDense(Mat A, Vec xx, Vec yy) 628 { 629 PetscFunctionBegin; 630 PetscCall(MatMultTransposeKernel_MPIDense(A, xx, yy, PETSC_TRUE)); 631 PetscFunctionReturn(PETSC_SUCCESS); 632 } 633 634 static PetscErrorCode MatMultHermitianTransposeAdd_MPIDense(Mat A, Vec xx, Vec yy, Vec zz) 635 { 636 PetscFunctionBegin; 637 PetscCall(MatMultTransposeAddKernel_MPIDense(A, xx, yy, zz, PETSC_TRUE)); 638 PetscFunctionReturn(PETSC_SUCCESS); 639 } 640 641 PetscErrorCode MatGetDiagonal_MPIDense(Mat A, Vec v) 642 { 643 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 644 PetscInt lda, len, i, nl, ng, m = A->rmap->n, radd; 645 PetscScalar *x; 646 const PetscScalar *av; 647 648 PetscFunctionBegin; 649 PetscCall(VecGetArray(v, &x)); 650 PetscCall(VecGetSize(v, &ng)); 651 PetscCheck(ng == A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming mat and vec"); 652 PetscCall(VecGetLocalSize(v, &nl)); 653 len = PetscMin(a->A->rmap->n, a->A->cmap->n); 654 radd = A->rmap->rstart * m; 655 PetscCall(MatDenseGetArrayRead(a->A, &av)); 656 PetscCall(MatDenseGetLDA(a->A, &lda)); 657 for (i = 0; i < len; i++) x[i] = av[radd + i * lda + i]; 658 PetscCall(MatDenseRestoreArrayRead(a->A, &av)); 659 if (nl - i > 0) PetscCall(PetscArrayzero(x + i, nl - i)); 660 PetscCall(VecRestoreArray(v, &x)); 661 PetscFunctionReturn(PETSC_SUCCESS); 662 } 663 664 static PetscErrorCode MatDestroy_MPIDense(Mat mat) 665 { 666 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 667 668 PetscFunctionBegin; 669 PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N)); 670 PetscCall(MatStashDestroy_Private(&mat->stash)); 671 PetscCheck(!mdn->vecinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 672 PetscCheck(!mdn->matinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 673 PetscCall(MatDestroy(&mdn->A)); 674 PetscCall(VecDestroy(&mdn->lvec)); 675 PetscCall(PetscSFDestroy(&mdn->Mvctx)); 676 PetscCall(VecDestroy(&mdn->cvec)); 677 PetscCall(MatDestroy(&mdn->cmat)); 678 679 PetscCall(PetscFree(mat->data)); 680 PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL)); 681 682 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetLDA_C", NULL)); 683 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseSetLDA_C", NULL)); 684 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArray_C", NULL)); 685 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArray_C", NULL)); 686 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayRead_C", NULL)); 687 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayRead_C", NULL)); 688 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayWrite_C", NULL)); 689 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayWrite_C", NULL)); 690 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDensePlaceArray_C", NULL)); 691 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseResetArray_C", NULL)); 692 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseReplaceArray_C", NULL)); 693 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", NULL)); 694 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpiaij_C", NULL)); 695 #if defined(PETSC_HAVE_ELEMENTAL) 696 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_elemental_C", NULL)); 697 #endif 698 #if defined(PETSC_HAVE_SCALAPACK) 699 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_scalapack_C", NULL)); 700 #endif 701 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIDenseSetPreallocation_C", NULL)); 702 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidense_C", NULL)); 703 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaij_C", NULL)); 704 #if defined(PETSC_HAVE_CUDA) 705 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidense_C", NULL)); 706 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijcusparse_C", NULL)); 707 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensecuda_C", NULL)); 708 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidensecuda_mpidense_C", NULL)); 709 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidensecuda_C", NULL)); 710 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C", NULL)); 711 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensecuda_mpiaij_C", NULL)); 712 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C", NULL)); 713 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArray_C", NULL)); 714 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArrayRead_C", NULL)); 715 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArrayWrite_C", NULL)); 716 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArray_C", NULL)); 717 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArrayRead_C", NULL)); 718 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArrayWrite_C", NULL)); 719 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAPlaceArray_C", NULL)); 720 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAResetArray_C", NULL)); 721 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAReplaceArray_C", NULL)); 722 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDASetPreallocation_C", NULL)); 723 #endif 724 #if defined(PETSC_HAVE_HIP) 725 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidense_C", NULL)); 726 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijhipsparse_C", NULL)); 727 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensehip_C", NULL)); 728 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidensehip_mpidense_C", NULL)); 729 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidensehip_C", NULL)); 730 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidensehip_C", NULL)); 731 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensehip_mpiaij_C", NULL)); 732 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensehip_mpiaijhipsparse_C", NULL)); 733 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArray_C", NULL)); 734 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArrayRead_C", NULL)); 735 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArrayWrite_C", NULL)); 736 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArray_C", NULL)); 737 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArrayRead_C", NULL)); 738 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArrayWrite_C", NULL)); 739 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPPlaceArray_C", NULL)); 740 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPResetArray_C", NULL)); 741 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPReplaceArray_C", NULL)); 742 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPSetPreallocation_C", NULL)); 743 #endif 744 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumn_C", NULL)); 745 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumn_C", NULL)); 746 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", NULL)); 747 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", NULL)); 748 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", NULL)); 749 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", NULL)); 750 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", NULL)); 751 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", NULL)); 752 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetSubMatrix_C", NULL)); 753 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreSubMatrix_C", NULL)); 754 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultAddColumnRange_C", NULL)); 755 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultHermitianTransposeColumnRange_C", NULL)); 756 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultHermitianTransposeAddColumnRange_C", NULL)); 757 758 PetscCall(PetscObjectCompose((PetscObject)mat, "DiagonalBlock", NULL)); 759 PetscFunctionReturn(PETSC_SUCCESS); 760 } 761 762 #include <petscdraw.h> 763 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat, PetscViewer viewer) 764 { 765 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 766 PetscMPIInt rank; 767 PetscViewerType vtype; 768 PetscBool iascii, isdraw; 769 PetscViewer sviewer; 770 PetscViewerFormat format; 771 772 PetscFunctionBegin; 773 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 774 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 775 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 776 if (iascii) { 777 PetscCall(PetscViewerGetType(viewer, &vtype)); 778 PetscCall(PetscViewerGetFormat(viewer, &format)); 779 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 780 MatInfo info; 781 PetscCall(MatGetInfo(mat, MAT_LOCAL, &info)); 782 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 783 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %" PetscInt_FMT " \n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, 784 (PetscInt)info.memory)); 785 PetscCall(PetscViewerFlush(viewer)); 786 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 787 if (mdn->Mvctx) PetscCall(PetscSFView(mdn->Mvctx, viewer)); 788 PetscFunctionReturn(PETSC_SUCCESS); 789 } else if (format == PETSC_VIEWER_ASCII_INFO) { 790 PetscFunctionReturn(PETSC_SUCCESS); 791 } 792 } else if (isdraw) { 793 PetscDraw draw; 794 PetscBool isnull; 795 796 PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw)); 797 PetscCall(PetscDrawIsNull(draw, &isnull)); 798 if (isnull) PetscFunctionReturn(PETSC_SUCCESS); 799 } 800 801 { 802 /* assemble the entire matrix onto first processor. */ 803 Mat A; 804 PetscInt M = mat->rmap->N, N = mat->cmap->N, m, row, i, nz; 805 PetscInt *cols; 806 PetscScalar *vals; 807 808 PetscCall(MatCreate(PetscObjectComm((PetscObject)mat), &A)); 809 if (rank == 0) { 810 PetscCall(MatSetSizes(A, M, N, M, N)); 811 } else { 812 PetscCall(MatSetSizes(A, 0, 0, M, N)); 813 } 814 /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */ 815 PetscCall(MatSetType(A, MATMPIDENSE)); 816 PetscCall(MatMPIDenseSetPreallocation(A, NULL)); 817 818 /* Copy the matrix ... This isn't the most efficient means, 819 but it's quick for now */ 820 A->insertmode = INSERT_VALUES; 821 822 row = mat->rmap->rstart; 823 m = mdn->A->rmap->n; 824 for (i = 0; i < m; i++) { 825 PetscCall(MatGetRow_MPIDense(mat, row, &nz, &cols, &vals)); 826 PetscCall(MatSetValues_MPIDense(A, 1, &row, nz, cols, vals, INSERT_VALUES)); 827 PetscCall(MatRestoreRow_MPIDense(mat, row, &nz, &cols, &vals)); 828 row++; 829 } 830 831 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 832 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 833 PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 834 if (rank == 0) { 835 PetscCall(PetscObjectSetName((PetscObject)((Mat_MPIDense *)A->data)->A, ((PetscObject)mat)->name)); 836 PetscCall(MatView_SeqDense(((Mat_MPIDense *)A->data)->A, sviewer)); 837 } 838 PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 839 PetscCall(MatDestroy(&A)); 840 } 841 PetscFunctionReturn(PETSC_SUCCESS); 842 } 843 844 static PetscErrorCode MatView_MPIDense(Mat mat, PetscViewer viewer) 845 { 846 PetscBool iascii, isbinary, isdraw, issocket; 847 848 PetscFunctionBegin; 849 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 850 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 851 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket)); 852 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 853 854 if (iascii || issocket || isdraw) { 855 PetscCall(MatView_MPIDense_ASCIIorDraworSocket(mat, viewer)); 856 } else if (isbinary) PetscCall(MatView_Dense_Binary(mat, viewer)); 857 PetscFunctionReturn(PETSC_SUCCESS); 858 } 859 860 static PetscErrorCode MatGetInfo_MPIDense(Mat A, MatInfoType flag, MatInfo *info) 861 { 862 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 863 Mat mdn = mat->A; 864 PetscLogDouble isend[5], irecv[5]; 865 866 PetscFunctionBegin; 867 info->block_size = 1.0; 868 869 PetscCall(MatGetInfo(mdn, MAT_LOCAL, info)); 870 871 isend[0] = info->nz_used; 872 isend[1] = info->nz_allocated; 873 isend[2] = info->nz_unneeded; 874 isend[3] = info->memory; 875 isend[4] = info->mallocs; 876 if (flag == MAT_LOCAL) { 877 info->nz_used = isend[0]; 878 info->nz_allocated = isend[1]; 879 info->nz_unneeded = isend[2]; 880 info->memory = isend[3]; 881 info->mallocs = isend[4]; 882 } else if (flag == MAT_GLOBAL_MAX) { 883 PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)A))); 884 885 info->nz_used = irecv[0]; 886 info->nz_allocated = irecv[1]; 887 info->nz_unneeded = irecv[2]; 888 info->memory = irecv[3]; 889 info->mallocs = irecv[4]; 890 } else if (flag == MAT_GLOBAL_SUM) { 891 PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)A))); 892 893 info->nz_used = irecv[0]; 894 info->nz_allocated = irecv[1]; 895 info->nz_unneeded = irecv[2]; 896 info->memory = irecv[3]; 897 info->mallocs = irecv[4]; 898 } 899 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 900 info->fill_ratio_needed = 0; 901 info->factor_mallocs = 0; 902 PetscFunctionReturn(PETSC_SUCCESS); 903 } 904 905 static PetscErrorCode MatSetOption_MPIDense(Mat A, MatOption op, PetscBool flg) 906 { 907 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 908 909 PetscFunctionBegin; 910 switch (op) { 911 case MAT_NEW_NONZERO_LOCATIONS: 912 case MAT_NEW_NONZERO_LOCATION_ERR: 913 case MAT_NEW_NONZERO_ALLOCATION_ERR: 914 MatCheckPreallocated(A, 1); 915 PetscCall(MatSetOption(a->A, op, flg)); 916 break; 917 case MAT_ROW_ORIENTED: 918 MatCheckPreallocated(A, 1); 919 a->roworiented = flg; 920 PetscCall(MatSetOption(a->A, op, flg)); 921 break; 922 case MAT_FORCE_DIAGONAL_ENTRIES: 923 case MAT_KEEP_NONZERO_PATTERN: 924 case MAT_USE_HASH_TABLE: 925 case MAT_SORTED_FULL: 926 case MAT_IGNORE_LOWER_TRIANGULAR: 927 case MAT_IGNORE_ZERO_ENTRIES: 928 case MAT_SUBMAT_SINGLEIS: 929 PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op])); 930 break; 931 case MAT_IGNORE_OFF_PROC_ENTRIES: 932 a->donotstash = flg; 933 break; 934 case MAT_SYMMETRIC: 935 case MAT_STRUCTURALLY_SYMMETRIC: 936 case MAT_HERMITIAN: 937 case MAT_SYMMETRY_ETERNAL: 938 case MAT_STRUCTURAL_SYMMETRY_ETERNAL: 939 case MAT_SPD: 940 case MAT_SPD_ETERNAL: 941 /* if the diagonal matrix is square it inherits some of the properties above */ 942 if (a->A && A->rmap->n == A->cmap->n) PetscCall(MatSetOption(a->A, op, flg)); 943 break; 944 default: 945 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %s", MatOptions[op]); 946 } 947 PetscFunctionReturn(PETSC_SUCCESS); 948 } 949 950 static PetscErrorCode MatDiagonalScale_MPIDense(Mat A, Vec ll, Vec rr) 951 { 952 Mat_MPIDense *mdn = (Mat_MPIDense *)A->data; 953 const PetscScalar *l; 954 PetscScalar x, *v, *vv, *r; 955 PetscInt i, j, s2a, s3a, s2, s3, m = mdn->A->rmap->n, n = mdn->A->cmap->n, lda; 956 957 PetscFunctionBegin; 958 PetscCall(MatDenseGetArray(mdn->A, &vv)); 959 PetscCall(MatDenseGetLDA(mdn->A, &lda)); 960 PetscCall(MatGetLocalSize(A, &s2, &s3)); 961 if (ll) { 962 PetscCall(VecGetLocalSize(ll, &s2a)); 963 PetscCheck(s2a == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Left scaling vector non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT, s2a, s2); 964 PetscCall(VecGetArrayRead(ll, &l)); 965 for (i = 0; i < m; i++) { 966 x = l[i]; 967 v = vv + i; 968 for (j = 0; j < n; j++) { 969 (*v) *= x; 970 v += lda; 971 } 972 } 973 PetscCall(VecRestoreArrayRead(ll, &l)); 974 PetscCall(PetscLogFlops(1.0 * n * m)); 975 } 976 if (rr) { 977 const PetscScalar *ar; 978 979 PetscCall(VecGetLocalSize(rr, &s3a)); 980 PetscCheck(s3a == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Right scaling vec non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT ".", s3a, s3); 981 PetscCall(VecGetArrayRead(rr, &ar)); 982 if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A)); 983 PetscCall(VecGetArray(mdn->lvec, &r)); 984 PetscCall(PetscSFBcastBegin(mdn->Mvctx, MPIU_SCALAR, ar, r, MPI_REPLACE)); 985 PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ar, r, MPI_REPLACE)); 986 PetscCall(VecRestoreArrayRead(rr, &ar)); 987 for (i = 0; i < n; i++) { 988 x = r[i]; 989 v = vv + i * lda; 990 for (j = 0; j < m; j++) (*v++) *= x; 991 } 992 PetscCall(VecRestoreArray(mdn->lvec, &r)); 993 PetscCall(PetscLogFlops(1.0 * n * m)); 994 } 995 PetscCall(MatDenseRestoreArray(mdn->A, &vv)); 996 PetscFunctionReturn(PETSC_SUCCESS); 997 } 998 999 static PetscErrorCode MatNorm_MPIDense(Mat A, NormType type, PetscReal *nrm) 1000 { 1001 Mat_MPIDense *mdn = (Mat_MPIDense *)A->data; 1002 PetscInt i, j; 1003 PetscMPIInt size; 1004 PetscReal sum = 0.0; 1005 const PetscScalar *av, *v; 1006 1007 PetscFunctionBegin; 1008 PetscCall(MatDenseGetArrayRead(mdn->A, &av)); 1009 v = av; 1010 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 1011 if (size == 1) { 1012 PetscCall(MatNorm(mdn->A, type, nrm)); 1013 } else { 1014 if (type == NORM_FROBENIUS) { 1015 for (i = 0; i < mdn->A->cmap->n * mdn->A->rmap->n; i++) { 1016 sum += PetscRealPart(PetscConj(*v) * (*v)); 1017 v++; 1018 } 1019 PetscCallMPI(MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A))); 1020 *nrm = PetscSqrtReal(*nrm); 1021 PetscCall(PetscLogFlops(2.0 * mdn->A->cmap->n * mdn->A->rmap->n)); 1022 } else if (type == NORM_1) { 1023 PetscReal *tmp; 1024 1025 PetscCall(PetscCalloc1(A->cmap->N, &tmp)); 1026 *nrm = 0.0; 1027 v = av; 1028 for (j = 0; j < mdn->A->cmap->n; j++) { 1029 for (i = 0; i < mdn->A->rmap->n; i++) { 1030 tmp[j] += PetscAbsScalar(*v); 1031 v++; 1032 } 1033 } 1034 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, tmp, A->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A))); 1035 for (j = 0; j < A->cmap->N; j++) { 1036 if (tmp[j] > *nrm) *nrm = tmp[j]; 1037 } 1038 PetscCall(PetscFree(tmp)); 1039 PetscCall(PetscLogFlops(A->cmap->n * A->rmap->n)); 1040 } else if (type == NORM_INFINITY) { /* max row norm */ 1041 PetscCall(MatNorm(mdn->A, type, nrm)); 1042 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, nrm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A))); 1043 } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "No support for two norm"); 1044 } 1045 PetscCall(MatDenseRestoreArrayRead(mdn->A, &av)); 1046 PetscFunctionReturn(PETSC_SUCCESS); 1047 } 1048 1049 static PetscErrorCode MatTranspose_MPIDense(Mat A, MatReuse reuse, Mat *matout) 1050 { 1051 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1052 Mat B; 1053 PetscInt M = A->rmap->N, N = A->cmap->N, m, n, *rwork, rstart = A->rmap->rstart; 1054 PetscInt j, i, lda; 1055 PetscScalar *v; 1056 1057 PetscFunctionBegin; 1058 if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout)); 1059 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) { 1060 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 1061 PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M)); 1062 PetscCall(MatSetType(B, ((PetscObject)A)->type_name)); 1063 PetscCall(MatMPIDenseSetPreallocation(B, NULL)); 1064 } else B = *matout; 1065 1066 m = a->A->rmap->n; 1067 n = a->A->cmap->n; 1068 PetscCall(MatDenseGetArrayRead(a->A, (const PetscScalar **)&v)); 1069 PetscCall(MatDenseGetLDA(a->A, &lda)); 1070 PetscCall(PetscMalloc1(m, &rwork)); 1071 for (i = 0; i < m; i++) rwork[i] = rstart + i; 1072 for (j = 0; j < n; j++) { 1073 PetscCall(MatSetValues(B, 1, &j, m, rwork, v, INSERT_VALUES)); 1074 v = PetscSafePointerPlusOffset(v, lda); 1075 } 1076 PetscCall(MatDenseRestoreArrayRead(a->A, (const PetscScalar **)&v)); 1077 PetscCall(PetscFree(rwork)); 1078 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 1079 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 1080 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 1081 *matout = B; 1082 } else { 1083 PetscCall(MatHeaderMerge(A, &B)); 1084 } 1085 PetscFunctionReturn(PETSC_SUCCESS); 1086 } 1087 1088 static PetscErrorCode MatDuplicate_MPIDense(Mat, MatDuplicateOption, Mat *); 1089 PETSC_INTERN PetscErrorCode MatScale_MPIDense(Mat, PetscScalar); 1090 1091 static PetscErrorCode MatSetUp_MPIDense(Mat A) 1092 { 1093 PetscFunctionBegin; 1094 PetscCall(PetscLayoutSetUp(A->rmap)); 1095 PetscCall(PetscLayoutSetUp(A->cmap)); 1096 if (!A->preallocated) PetscCall(MatMPIDenseSetPreallocation(A, NULL)); 1097 PetscFunctionReturn(PETSC_SUCCESS); 1098 } 1099 1100 static PetscErrorCode MatAXPY_MPIDense(Mat Y, PetscScalar alpha, Mat X, MatStructure str) 1101 { 1102 Mat_MPIDense *A = (Mat_MPIDense *)Y->data, *B = (Mat_MPIDense *)X->data; 1103 1104 PetscFunctionBegin; 1105 PetscCall(MatAXPY(A->A, alpha, B->A, str)); 1106 PetscFunctionReturn(PETSC_SUCCESS); 1107 } 1108 1109 static PetscErrorCode MatConjugate_MPIDense(Mat mat) 1110 { 1111 Mat_MPIDense *a = (Mat_MPIDense *)mat->data; 1112 1113 PetscFunctionBegin; 1114 PetscCall(MatConjugate(a->A)); 1115 PetscFunctionReturn(PETSC_SUCCESS); 1116 } 1117 1118 static PetscErrorCode MatRealPart_MPIDense(Mat A) 1119 { 1120 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1121 1122 PetscFunctionBegin; 1123 PetscCall(MatRealPart(a->A)); 1124 PetscFunctionReturn(PETSC_SUCCESS); 1125 } 1126 1127 static PetscErrorCode MatImaginaryPart_MPIDense(Mat A) 1128 { 1129 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1130 1131 PetscFunctionBegin; 1132 PetscCall(MatImaginaryPart(a->A)); 1133 PetscFunctionReturn(PETSC_SUCCESS); 1134 } 1135 1136 static PetscErrorCode MatGetColumnVector_MPIDense(Mat A, Vec v, PetscInt col) 1137 { 1138 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1139 1140 PetscFunctionBegin; 1141 PetscCheck(a->A, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Missing local matrix"); 1142 PetscCheck(a->A->ops->getcolumnvector, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Missing get column operation"); 1143 PetscCall((*a->A->ops->getcolumnvector)(a->A, v, col)); 1144 PetscFunctionReturn(PETSC_SUCCESS); 1145 } 1146 1147 PETSC_INTERN PetscErrorCode MatGetColumnReductions_SeqDense(Mat, PetscInt, PetscReal *); 1148 1149 static PetscErrorCode MatGetColumnReductions_MPIDense(Mat A, PetscInt type, PetscReal *reductions) 1150 { 1151 PetscInt i, m, n; 1152 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1153 1154 PetscFunctionBegin; 1155 PetscCall(MatGetSize(A, &m, &n)); 1156 if (type == REDUCTION_MEAN_REALPART) { 1157 PetscCall(MatGetColumnReductions_SeqDense(a->A, (PetscInt)REDUCTION_SUM_REALPART, reductions)); 1158 } else if (type == REDUCTION_MEAN_IMAGINARYPART) { 1159 PetscCall(MatGetColumnReductions_SeqDense(a->A, (PetscInt)REDUCTION_SUM_IMAGINARYPART, reductions)); 1160 } else { 1161 PetscCall(MatGetColumnReductions_SeqDense(a->A, type, reductions)); 1162 } 1163 if (type == NORM_2) { 1164 for (i = 0; i < n; i++) reductions[i] *= reductions[i]; 1165 } 1166 if (type == NORM_INFINITY) { 1167 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, reductions, n, MPIU_REAL, MPIU_MAX, A->hdr.comm)); 1168 } else { 1169 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, reductions, n, MPIU_REAL, MPIU_SUM, A->hdr.comm)); 1170 } 1171 if (type == NORM_2) { 1172 for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]); 1173 } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 1174 for (i = 0; i < n; i++) reductions[i] /= m; 1175 } 1176 PetscFunctionReturn(PETSC_SUCCESS); 1177 } 1178 1179 static PetscErrorCode MatSetRandom_MPIDense(Mat x, PetscRandom rctx) 1180 { 1181 Mat_MPIDense *d = (Mat_MPIDense *)x->data; 1182 1183 PetscFunctionBegin; 1184 PetscCall(MatSetRandom(d->A, rctx)); 1185 #if defined(PETSC_HAVE_DEVICE) 1186 x->offloadmask = d->A->offloadmask; 1187 #endif 1188 PetscFunctionReturn(PETSC_SUCCESS); 1189 } 1190 1191 static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A, PetscBool *missing, PetscInt *d) 1192 { 1193 PetscFunctionBegin; 1194 *missing = PETSC_FALSE; 1195 PetscFunctionReturn(PETSC_SUCCESS); 1196 } 1197 1198 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat, Mat, PetscReal, Mat); 1199 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat, Mat, Mat); 1200 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat, Mat, PetscReal, Mat); 1201 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat, Mat, Mat); 1202 static PetscErrorCode MatEqual_MPIDense(Mat, Mat, PetscBool *); 1203 static PetscErrorCode MatLoad_MPIDense(Mat, PetscViewer); 1204 static PetscErrorCode MatProductSetFromOptions_MPIDense(Mat); 1205 1206 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense, 1207 MatGetRow_MPIDense, 1208 MatRestoreRow_MPIDense, 1209 MatMult_MPIDense, 1210 /* 4*/ MatMultAdd_MPIDense, 1211 MatMultTranspose_MPIDense, 1212 MatMultTransposeAdd_MPIDense, 1213 NULL, 1214 NULL, 1215 NULL, 1216 /* 10*/ NULL, 1217 NULL, 1218 NULL, 1219 NULL, 1220 MatTranspose_MPIDense, 1221 /* 15*/ MatGetInfo_MPIDense, 1222 MatEqual_MPIDense, 1223 MatGetDiagonal_MPIDense, 1224 MatDiagonalScale_MPIDense, 1225 MatNorm_MPIDense, 1226 /* 20*/ MatAssemblyBegin_MPIDense, 1227 MatAssemblyEnd_MPIDense, 1228 MatSetOption_MPIDense, 1229 MatZeroEntries_MPIDense, 1230 /* 24*/ MatZeroRows_MPIDense, 1231 NULL, 1232 NULL, 1233 NULL, 1234 NULL, 1235 /* 29*/ MatSetUp_MPIDense, 1236 NULL, 1237 NULL, 1238 MatGetDiagonalBlock_MPIDense, 1239 NULL, 1240 /* 34*/ MatDuplicate_MPIDense, 1241 NULL, 1242 NULL, 1243 NULL, 1244 NULL, 1245 /* 39*/ MatAXPY_MPIDense, 1246 MatCreateSubMatrices_MPIDense, 1247 NULL, 1248 MatGetValues_MPIDense, 1249 MatCopy_MPIDense, 1250 /* 44*/ NULL, 1251 MatScale_MPIDense, 1252 MatShift_MPIDense, 1253 NULL, 1254 NULL, 1255 /* 49*/ MatSetRandom_MPIDense, 1256 NULL, 1257 NULL, 1258 NULL, 1259 NULL, 1260 /* 54*/ NULL, 1261 NULL, 1262 NULL, 1263 NULL, 1264 NULL, 1265 /* 59*/ MatCreateSubMatrix_MPIDense, 1266 MatDestroy_MPIDense, 1267 MatView_MPIDense, 1268 NULL, 1269 NULL, 1270 /* 64*/ NULL, 1271 NULL, 1272 NULL, 1273 NULL, 1274 NULL, 1275 /* 69*/ NULL, 1276 NULL, 1277 NULL, 1278 NULL, 1279 NULL, 1280 /* 74*/ NULL, 1281 NULL, 1282 NULL, 1283 NULL, 1284 NULL, 1285 /* 79*/ NULL, 1286 NULL, 1287 NULL, 1288 NULL, 1289 /* 83*/ MatLoad_MPIDense, 1290 NULL, 1291 NULL, 1292 NULL, 1293 NULL, 1294 NULL, 1295 /* 89*/ NULL, 1296 NULL, 1297 NULL, 1298 NULL, 1299 NULL, 1300 /* 94*/ NULL, 1301 NULL, 1302 MatMatTransposeMultSymbolic_MPIDense_MPIDense, 1303 MatMatTransposeMultNumeric_MPIDense_MPIDense, 1304 NULL, 1305 /* 99*/ MatProductSetFromOptions_MPIDense, 1306 NULL, 1307 NULL, 1308 MatConjugate_MPIDense, 1309 NULL, 1310 /*104*/ NULL, 1311 MatRealPart_MPIDense, 1312 MatImaginaryPart_MPIDense, 1313 NULL, 1314 NULL, 1315 /*109*/ NULL, 1316 NULL, 1317 NULL, 1318 MatGetColumnVector_MPIDense, 1319 MatMissingDiagonal_MPIDense, 1320 /*114*/ NULL, 1321 NULL, 1322 NULL, 1323 NULL, 1324 NULL, 1325 /*119*/ NULL, 1326 NULL, 1327 MatMultHermitianTranspose_MPIDense, 1328 MatMultHermitianTransposeAdd_MPIDense, 1329 NULL, 1330 /*124*/ NULL, 1331 MatGetColumnReductions_MPIDense, 1332 NULL, 1333 NULL, 1334 NULL, 1335 /*129*/ NULL, 1336 NULL, 1337 MatTransposeMatMultSymbolic_MPIDense_MPIDense, 1338 MatTransposeMatMultNumeric_MPIDense_MPIDense, 1339 NULL, 1340 /*134*/ NULL, 1341 NULL, 1342 NULL, 1343 NULL, 1344 NULL, 1345 /*139*/ NULL, 1346 NULL, 1347 NULL, 1348 NULL, 1349 NULL, 1350 MatCreateMPIMatConcatenateSeqMat_MPIDense, 1351 /*145*/ NULL, 1352 NULL, 1353 NULL, 1354 NULL, 1355 NULL, 1356 /*150*/ NULL, 1357 NULL, 1358 NULL, 1359 NULL, 1360 NULL, 1361 /*155*/ NULL, 1362 NULL}; 1363 1364 static PetscErrorCode MatMPIDenseSetPreallocation_MPIDense(Mat mat, PetscScalar *data) 1365 { 1366 Mat_MPIDense *a = (Mat_MPIDense *)mat->data; 1367 MatType mtype = MATSEQDENSE; 1368 1369 PetscFunctionBegin; 1370 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1371 PetscCall(PetscLayoutSetUp(mat->rmap)); 1372 PetscCall(PetscLayoutSetUp(mat->cmap)); 1373 if (!a->A) { 1374 PetscCall(MatCreate(PETSC_COMM_SELF, &a->A)); 1375 PetscCall(MatSetSizes(a->A, mat->rmap->n, mat->cmap->N, mat->rmap->n, mat->cmap->N)); 1376 } 1377 #if defined(PETSC_HAVE_CUDA) 1378 PetscBool iscuda; 1379 PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPIDENSECUDA, &iscuda)); 1380 if (iscuda) mtype = MATSEQDENSECUDA; 1381 #endif 1382 #if defined(PETSC_HAVE_HIP) 1383 PetscBool iship; 1384 PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPIDENSEHIP, &iship)); 1385 if (iship) mtype = MATSEQDENSEHIP; 1386 #endif 1387 PetscCall(MatSetType(a->A, mtype)); 1388 PetscCall(MatSeqDenseSetPreallocation(a->A, data)); 1389 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 1390 mat->offloadmask = a->A->offloadmask; 1391 #endif 1392 mat->preallocated = PETSC_TRUE; 1393 mat->assembled = PETSC_TRUE; 1394 PetscFunctionReturn(PETSC_SUCCESS); 1395 } 1396 1397 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIDense(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 1398 { 1399 Mat B, C; 1400 1401 PetscFunctionBegin; 1402 PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &C)); 1403 PetscCall(MatConvert_SeqAIJ_SeqDense(C, MATSEQDENSE, MAT_INITIAL_MATRIX, &B)); 1404 PetscCall(MatDestroy(&C)); 1405 if (reuse == MAT_REUSE_MATRIX) { 1406 C = *newmat; 1407 } else C = NULL; 1408 PetscCall(MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A), B, A->cmap->n, !C ? MAT_INITIAL_MATRIX : MAT_REUSE_MATRIX, &C)); 1409 PetscCall(MatDestroy(&B)); 1410 if (reuse == MAT_INPLACE_MATRIX) { 1411 PetscCall(MatHeaderReplace(A, &C)); 1412 } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C; 1413 PetscFunctionReturn(PETSC_SUCCESS); 1414 } 1415 1416 static PetscErrorCode MatConvert_MPIDense_MPIAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 1417 { 1418 Mat B, C; 1419 1420 PetscFunctionBegin; 1421 PetscCall(MatDenseGetLocalMatrix(A, &C)); 1422 PetscCall(MatConvert_SeqDense_SeqAIJ(C, MATSEQAIJ, MAT_INITIAL_MATRIX, &B)); 1423 if (reuse == MAT_REUSE_MATRIX) { 1424 C = *newmat; 1425 } else C = NULL; 1426 PetscCall(MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A), B, A->cmap->n, !C ? MAT_INITIAL_MATRIX : MAT_REUSE_MATRIX, &C)); 1427 PetscCall(MatDestroy(&B)); 1428 if (reuse == MAT_INPLACE_MATRIX) { 1429 PetscCall(MatHeaderReplace(A, &C)); 1430 } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C; 1431 PetscFunctionReturn(PETSC_SUCCESS); 1432 } 1433 1434 #if defined(PETSC_HAVE_ELEMENTAL) 1435 PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 1436 { 1437 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1438 Mat mat_elemental; 1439 PetscScalar *v; 1440 PetscInt m = A->rmap->n, N = A->cmap->N, rstart = A->rmap->rstart, i, *rows, *cols, lda; 1441 1442 PetscFunctionBegin; 1443 if (reuse == MAT_REUSE_MATRIX) { 1444 mat_elemental = *newmat; 1445 PetscCall(MatZeroEntries(*newmat)); 1446 } else { 1447 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental)); 1448 PetscCall(MatSetSizes(mat_elemental, PETSC_DECIDE, PETSC_DECIDE, A->rmap->N, A->cmap->N)); 1449 PetscCall(MatSetType(mat_elemental, MATELEMENTAL)); 1450 PetscCall(MatSetUp(mat_elemental)); 1451 PetscCall(MatSetOption(mat_elemental, MAT_ROW_ORIENTED, PETSC_FALSE)); 1452 } 1453 1454 PetscCall(PetscMalloc2(m, &rows, N, &cols)); 1455 for (i = 0; i < N; i++) cols[i] = i; 1456 for (i = 0; i < m; i++) rows[i] = rstart + i; 1457 1458 /* PETSc-Elemental interface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */ 1459 PetscCall(MatDenseGetArray(A, &v)); 1460 PetscCall(MatDenseGetLDA(a->A, &lda)); 1461 if (lda == m) PetscCall(MatSetValues(mat_elemental, m, rows, N, cols, v, ADD_VALUES)); 1462 else { 1463 for (i = 0; i < N; i++) PetscCall(MatSetValues(mat_elemental, m, rows, 1, &i, v + lda * i, ADD_VALUES)); 1464 } 1465 PetscCall(MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY)); 1466 PetscCall(MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY)); 1467 PetscCall(MatDenseRestoreArray(A, &v)); 1468 PetscCall(PetscFree2(rows, cols)); 1469 1470 if (reuse == MAT_INPLACE_MATRIX) { 1471 PetscCall(MatHeaderReplace(A, &mat_elemental)); 1472 } else { 1473 *newmat = mat_elemental; 1474 } 1475 PetscFunctionReturn(PETSC_SUCCESS); 1476 } 1477 #endif 1478 1479 static PetscErrorCode MatDenseGetColumn_MPIDense(Mat A, PetscInt col, PetscScalar **vals) 1480 { 1481 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 1482 1483 PetscFunctionBegin; 1484 PetscCall(MatDenseGetColumn(mat->A, col, vals)); 1485 PetscFunctionReturn(PETSC_SUCCESS); 1486 } 1487 1488 static PetscErrorCode MatDenseRestoreColumn_MPIDense(Mat A, PetscScalar **vals) 1489 { 1490 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 1491 1492 PetscFunctionBegin; 1493 PetscCall(MatDenseRestoreColumn(mat->A, vals)); 1494 PetscFunctionReturn(PETSC_SUCCESS); 1495 } 1496 1497 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIDense(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat) 1498 { 1499 Mat_MPIDense *mat; 1500 PetscInt m, nloc, N; 1501 1502 PetscFunctionBegin; 1503 PetscCall(MatGetSize(inmat, &m, &N)); 1504 PetscCall(MatGetLocalSize(inmat, NULL, &nloc)); 1505 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 1506 PetscInt sum; 1507 1508 if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnership(comm, &n, &N)); 1509 /* Check sum(n) = N */ 1510 PetscCallMPI(MPIU_Allreduce(&n, &sum, 1, MPIU_INT, MPI_SUM, comm)); 1511 PetscCheck(sum == N, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT, sum, N); 1512 1513 PetscCall(MatCreateDense(comm, m, n, PETSC_DETERMINE, N, NULL, outmat)); 1514 PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 1515 } 1516 1517 /* numeric phase */ 1518 mat = (Mat_MPIDense *)(*outmat)->data; 1519 PetscCall(MatCopy(inmat, mat->A, SAME_NONZERO_PATTERN)); 1520 PetscFunctionReturn(PETSC_SUCCESS); 1521 } 1522 1523 PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat A, PetscInt col, Vec *v) 1524 { 1525 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1526 PetscInt lda; 1527 1528 PetscFunctionBegin; 1529 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1530 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1531 if (!a->cvec) PetscCall(MatDenseCreateColumnVec_Private(A, &a->cvec)); 1532 a->vecinuse = col + 1; 1533 PetscCall(MatDenseGetLDA(a->A, &lda)); 1534 PetscCall(MatDenseGetArray(a->A, (PetscScalar **)&a->ptrinuse)); 1535 PetscCall(VecPlaceArray(a->cvec, PetscSafePointerPlusOffset(a->ptrinuse, (size_t)col * (size_t)lda))); 1536 *v = a->cvec; 1537 PetscFunctionReturn(PETSC_SUCCESS); 1538 } 1539 1540 PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat A, PetscInt col, Vec *v) 1541 { 1542 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1543 1544 PetscFunctionBegin; 1545 PetscCheck(a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first"); 1546 PetscCheck(a->cvec, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing internal column vector"); 1547 VecCheckAssembled(a->cvec); 1548 a->vecinuse = 0; 1549 PetscCall(MatDenseRestoreArray(a->A, (PetscScalar **)&a->ptrinuse)); 1550 PetscCall(VecResetArray(a->cvec)); 1551 if (v) *v = NULL; 1552 PetscFunctionReturn(PETSC_SUCCESS); 1553 } 1554 1555 PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat A, PetscInt col, Vec *v) 1556 { 1557 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1558 PetscInt lda; 1559 1560 PetscFunctionBegin; 1561 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1562 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1563 if (!a->cvec) PetscCall(MatDenseCreateColumnVec_Private(A, &a->cvec)); 1564 a->vecinuse = col + 1; 1565 PetscCall(MatDenseGetLDA(a->A, &lda)); 1566 PetscCall(MatDenseGetArrayRead(a->A, &a->ptrinuse)); 1567 PetscCall(VecPlaceArray(a->cvec, PetscSafePointerPlusOffset(a->ptrinuse, (size_t)col * (size_t)lda))); 1568 PetscCall(VecLockReadPush(a->cvec)); 1569 *v = a->cvec; 1570 PetscFunctionReturn(PETSC_SUCCESS); 1571 } 1572 1573 PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat A, PetscInt col, Vec *v) 1574 { 1575 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1576 1577 PetscFunctionBegin; 1578 PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first"); 1579 PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector"); 1580 VecCheckAssembled(a->cvec); 1581 a->vecinuse = 0; 1582 PetscCall(MatDenseRestoreArrayRead(a->A, &a->ptrinuse)); 1583 PetscCall(VecLockReadPop(a->cvec)); 1584 PetscCall(VecResetArray(a->cvec)); 1585 if (v) *v = NULL; 1586 PetscFunctionReturn(PETSC_SUCCESS); 1587 } 1588 1589 PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat A, PetscInt col, Vec *v) 1590 { 1591 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1592 PetscInt lda; 1593 1594 PetscFunctionBegin; 1595 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1596 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1597 if (!a->cvec) PetscCall(MatDenseCreateColumnVec_Private(A, &a->cvec)); 1598 a->vecinuse = col + 1; 1599 PetscCall(MatDenseGetLDA(a->A, &lda)); 1600 PetscCall(MatDenseGetArrayWrite(a->A, (PetscScalar **)&a->ptrinuse)); 1601 PetscCall(VecPlaceArray(a->cvec, PetscSafePointerPlusOffset(a->ptrinuse, (size_t)col * (size_t)lda))); 1602 *v = a->cvec; 1603 PetscFunctionReturn(PETSC_SUCCESS); 1604 } 1605 1606 PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat A, PetscInt col, Vec *v) 1607 { 1608 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1609 1610 PetscFunctionBegin; 1611 PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first"); 1612 PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector"); 1613 VecCheckAssembled(a->cvec); 1614 a->vecinuse = 0; 1615 PetscCall(MatDenseRestoreArrayWrite(a->A, (PetscScalar **)&a->ptrinuse)); 1616 PetscCall(VecResetArray(a->cvec)); 1617 if (v) *v = NULL; 1618 PetscFunctionReturn(PETSC_SUCCESS); 1619 } 1620 1621 static PetscErrorCode MatDenseGetSubMatrix_MPIDense(Mat A, PetscInt rbegin, PetscInt rend, PetscInt cbegin, PetscInt cend, Mat *v) 1622 { 1623 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1624 Mat_MPIDense *c; 1625 MPI_Comm comm; 1626 PetscInt pbegin, pend; 1627 1628 PetscFunctionBegin; 1629 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 1630 PetscCheck(!a->vecinuse, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1631 PetscCheck(!a->matinuse, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1632 pbegin = PetscMax(0, PetscMin(A->rmap->rend, rbegin) - A->rmap->rstart); 1633 pend = PetscMin(A->rmap->n, PetscMax(0, rend - A->rmap->rstart)); 1634 if (!a->cmat) { 1635 PetscCall(MatCreate(comm, &a->cmat)); 1636 PetscCall(MatSetType(a->cmat, ((PetscObject)A)->type_name)); 1637 if (rend - rbegin == A->rmap->N) PetscCall(PetscLayoutReference(A->rmap, &a->cmat->rmap)); 1638 else { 1639 PetscCall(PetscLayoutSetLocalSize(a->cmat->rmap, pend - pbegin)); 1640 PetscCall(PetscLayoutSetSize(a->cmat->rmap, rend - rbegin)); 1641 PetscCall(PetscLayoutSetUp(a->cmat->rmap)); 1642 } 1643 PetscCall(PetscLayoutSetSize(a->cmat->cmap, cend - cbegin)); 1644 PetscCall(PetscLayoutSetUp(a->cmat->cmap)); 1645 } else { 1646 PetscBool same = (PetscBool)(rend - rbegin == a->cmat->rmap->N); 1647 if (same && a->cmat->rmap->N != A->rmap->N) { 1648 same = (PetscBool)(pend - pbegin == a->cmat->rmap->n); 1649 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &same, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A))); 1650 } 1651 if (!same) { 1652 PetscCall(PetscLayoutDestroy(&a->cmat->rmap)); 1653 PetscCall(PetscLayoutCreate(comm, &a->cmat->rmap)); 1654 PetscCall(PetscLayoutSetLocalSize(a->cmat->rmap, pend - pbegin)); 1655 PetscCall(PetscLayoutSetSize(a->cmat->rmap, rend - rbegin)); 1656 PetscCall(PetscLayoutSetUp(a->cmat->rmap)); 1657 } 1658 if (cend - cbegin != a->cmat->cmap->N) { 1659 PetscCall(PetscLayoutDestroy(&a->cmat->cmap)); 1660 PetscCall(PetscLayoutCreate(comm, &a->cmat->cmap)); 1661 PetscCall(PetscLayoutSetSize(a->cmat->cmap, cend - cbegin)); 1662 PetscCall(PetscLayoutSetUp(a->cmat->cmap)); 1663 } 1664 } 1665 c = (Mat_MPIDense *)a->cmat->data; 1666 PetscCheck(!c->A, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1667 PetscCall(MatDenseGetSubMatrix(a->A, pbegin, pend, cbegin, cend, &c->A)); 1668 1669 a->cmat->preallocated = PETSC_TRUE; 1670 a->cmat->assembled = PETSC_TRUE; 1671 #if defined(PETSC_HAVE_DEVICE) 1672 a->cmat->offloadmask = c->A->offloadmask; 1673 #endif 1674 a->matinuse = cbegin + 1; 1675 *v = a->cmat; 1676 PetscFunctionReturn(PETSC_SUCCESS); 1677 } 1678 1679 static PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat A, Mat *v) 1680 { 1681 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1682 Mat_MPIDense *c; 1683 1684 PetscFunctionBegin; 1685 PetscCheck(a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetSubMatrix() first"); 1686 PetscCheck(a->cmat, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal matrix"); 1687 PetscCheck(*v == a->cmat, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Not the matrix obtained from MatDenseGetSubMatrix()"); 1688 a->matinuse = 0; 1689 c = (Mat_MPIDense *)a->cmat->data; 1690 PetscCall(MatDenseRestoreSubMatrix(a->A, &c->A)); 1691 if (v) *v = NULL; 1692 #if defined(PETSC_HAVE_DEVICE) 1693 A->offloadmask = a->A->offloadmask; 1694 #endif 1695 PetscFunctionReturn(PETSC_SUCCESS); 1696 } 1697 1698 /*MC 1699 MATMPIDENSE - MATMPIDENSE = "mpidense" - A matrix type to be used for distributed dense matrices. 1700 1701 Options Database Key: 1702 . -mat_type mpidense - sets the matrix type to `MATMPIDENSE` during a call to `MatSetFromOptions()` 1703 1704 Level: beginner 1705 1706 .seealso: [](ch_matrices), `Mat`, `MatCreateDense()`, `MATSEQDENSE`, `MATDENSE` 1707 M*/ 1708 PetscErrorCode MatCreate_MPIDense(Mat mat) 1709 { 1710 Mat_MPIDense *a; 1711 1712 PetscFunctionBegin; 1713 PetscCall(PetscNew(&a)); 1714 mat->data = (void *)a; 1715 mat->ops[0] = MatOps_Values; 1716 1717 mat->insertmode = NOT_SET_VALUES; 1718 1719 /* build cache for off array entries formed */ 1720 a->donotstash = PETSC_FALSE; 1721 1722 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)mat), 1, &mat->stash)); 1723 1724 /* stuff used for matrix vector multiply */ 1725 a->lvec = NULL; 1726 a->Mvctx = NULL; 1727 a->roworiented = PETSC_TRUE; 1728 1729 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetLDA_C", MatDenseGetLDA_MPIDense)); 1730 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseSetLDA_C", MatDenseSetLDA_MPIDense)); 1731 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArray_C", MatDenseGetArray_MPIDense)); 1732 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArray_C", MatDenseRestoreArray_MPIDense)); 1733 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayRead_C", MatDenseGetArrayRead_MPIDense)); 1734 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayRead_C", MatDenseRestoreArrayRead_MPIDense)); 1735 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayWrite_C", MatDenseGetArrayWrite_MPIDense)); 1736 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayWrite_C", MatDenseRestoreArrayWrite_MPIDense)); 1737 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDensePlaceArray_C", MatDensePlaceArray_MPIDense)); 1738 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseResetArray_C", MatDenseResetArray_MPIDense)); 1739 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseReplaceArray_C", MatDenseReplaceArray_MPIDense)); 1740 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", MatDenseGetColumnVec_MPIDense)); 1741 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", MatDenseRestoreColumnVec_MPIDense)); 1742 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", MatDenseGetColumnVecRead_MPIDense)); 1743 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", MatDenseRestoreColumnVecRead_MPIDense)); 1744 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", MatDenseGetColumnVecWrite_MPIDense)); 1745 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", MatDenseRestoreColumnVecWrite_MPIDense)); 1746 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetSubMatrix_C", MatDenseGetSubMatrix_MPIDense)); 1747 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreSubMatrix_C", MatDenseRestoreSubMatrix_MPIDense)); 1748 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", MatConvert_MPIAIJ_MPIDense)); 1749 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpiaij_C", MatConvert_MPIDense_MPIAIJ)); 1750 #if defined(PETSC_HAVE_ELEMENTAL) 1751 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_elemental_C", MatConvert_MPIDense_Elemental)); 1752 #endif 1753 #if defined(PETSC_HAVE_SCALAPACK) 1754 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_scalapack_C", MatConvert_Dense_ScaLAPACK)); 1755 #endif 1756 #if defined(PETSC_HAVE_CUDA) 1757 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensecuda_C", MatConvert_MPIDense_MPIDenseCUDA)); 1758 #endif 1759 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIDenseSetPreallocation_C", MatMPIDenseSetPreallocation_MPIDense)); 1760 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense)); 1761 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaij_C", MatProductSetFromOptions_MPIDense_MPIAIJ)); 1762 #if defined(PETSC_HAVE_CUDA) 1763 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense)); 1764 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijcusparse_C", MatProductSetFromOptions_MPIDense_MPIAIJ)); 1765 #endif 1766 #if defined(PETSC_HAVE_HIP) 1767 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensehip_C", MatConvert_MPIDense_MPIDenseHIP)); 1768 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense)); 1769 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijhipsparse_C", MatProductSetFromOptions_MPIDense_MPIAIJ)); 1770 #endif 1771 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumn_C", MatDenseGetColumn_MPIDense)); 1772 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumn_C", MatDenseRestoreColumn_MPIDense)); 1773 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultAddColumnRange_C", MatMultAddColumnRange_MPIDense)); 1774 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultHermitianTransposeColumnRange_C", MatMultHermitianTransposeColumnRange_MPIDense)); 1775 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultHermitianTransposeAddColumnRange_C", MatMultHermitianTransposeAddColumnRange_MPIDense)); 1776 PetscCall(PetscObjectChangeTypeName((PetscObject)mat, MATMPIDENSE)); 1777 PetscFunctionReturn(PETSC_SUCCESS); 1778 } 1779 1780 /*MC 1781 MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices. 1782 1783 This matrix type is identical to `MATSEQDENSE` when constructed with a single process communicator, 1784 and `MATMPIDENSE` otherwise. 1785 1786 Options Database Key: 1787 . -mat_type dense - sets the matrix type to `MATDENSE` during a call to `MatSetFromOptions()` 1788 1789 Level: beginner 1790 1791 .seealso: [](ch_matrices), `Mat`, `MATSEQDENSE`, `MATMPIDENSE`, `MATDENSECUDA`, `MATDENSEHIP` 1792 M*/ 1793 1794 /*@ 1795 MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries 1796 1797 Collective 1798 1799 Input Parameters: 1800 + B - the matrix 1801 - data - optional location of matrix data. Set to `NULL` for PETSc 1802 to control all matrix memory allocation. 1803 1804 Level: intermediate 1805 1806 Notes: 1807 The dense format is fully compatible with standard Fortran 1808 storage by columns. 1809 1810 The data input variable is intended primarily for Fortran programmers 1811 who wish to allocate their own matrix memory space. Most users should 1812 set `data` to `NULL`. 1813 1814 .seealso: [](ch_matrices), `Mat`, `MATMPIDENSE`, `MatCreate()`, `MatCreateSeqDense()`, `MatSetValues()` 1815 @*/ 1816 PetscErrorCode MatMPIDenseSetPreallocation(Mat B, PetscScalar *data) 1817 { 1818 PetscFunctionBegin; 1819 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 1820 PetscTryMethod(B, "MatMPIDenseSetPreallocation_C", (Mat, PetscScalar *), (B, data)); 1821 PetscFunctionReturn(PETSC_SUCCESS); 1822 } 1823 1824 /*@ 1825 MatDensePlaceArray - Allows one to replace the array in a `MATDENSE` matrix with an 1826 array provided by the user. This is useful to avoid copying an array 1827 into a matrix 1828 1829 Not Collective 1830 1831 Input Parameters: 1832 + mat - the matrix 1833 - array - the array in column major order 1834 1835 Level: developer 1836 1837 Note: 1838 You can return to the original array with a call to `MatDenseResetArray()`. The user is responsible for freeing this array; it will not be 1839 freed when the matrix is destroyed. 1840 1841 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseGetArray()`, `MatDenseResetArray()`, `VecPlaceArray()`, `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`, `VecResetArray()`, 1842 `MatDenseReplaceArray()` 1843 @*/ 1844 PetscErrorCode MatDensePlaceArray(Mat mat, const PetscScalar *array) 1845 { 1846 PetscFunctionBegin; 1847 PetscValidHeaderSpecific(mat, MAT_CLASSID, 1); 1848 PetscUseMethod(mat, "MatDensePlaceArray_C", (Mat, const PetscScalar *), (mat, array)); 1849 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 1850 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 1851 mat->offloadmask = PETSC_OFFLOAD_CPU; 1852 #endif 1853 PetscFunctionReturn(PETSC_SUCCESS); 1854 } 1855 1856 /*@ 1857 MatDenseResetArray - Resets the matrix array to that it previously had before the call to `MatDensePlaceArray()` 1858 1859 Not Collective 1860 1861 Input Parameter: 1862 . mat - the matrix 1863 1864 Level: developer 1865 1866 Note: 1867 You can only call this after a call to `MatDensePlaceArray()` 1868 1869 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseGetArray()`, `MatDensePlaceArray()`, `VecPlaceArray()`, `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`, `VecResetArray()` 1870 @*/ 1871 PetscErrorCode MatDenseResetArray(Mat mat) 1872 { 1873 PetscFunctionBegin; 1874 PetscValidHeaderSpecific(mat, MAT_CLASSID, 1); 1875 PetscUseMethod(mat, "MatDenseResetArray_C", (Mat), (mat)); 1876 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 1877 PetscFunctionReturn(PETSC_SUCCESS); 1878 } 1879 1880 /*@ 1881 MatDenseReplaceArray - Allows one to replace the array in a dense matrix with an 1882 array provided by the user. This is useful to avoid copying an array 1883 into a matrix 1884 1885 Not Collective 1886 1887 Input Parameters: 1888 + mat - the matrix 1889 - array - the array in column major order 1890 1891 Level: developer 1892 1893 Note: 1894 The memory passed in MUST be obtained with `PetscMalloc()` and CANNOT be 1895 freed by the user. It will be freed when the matrix is destroyed. 1896 1897 .seealso: [](ch_matrices), `Mat`, `MatDensePlaceArray()`, `MatDenseGetArray()`, `VecReplaceArray()` 1898 @*/ 1899 PetscErrorCode MatDenseReplaceArray(Mat mat, const PetscScalar *array) 1900 { 1901 PetscFunctionBegin; 1902 PetscValidHeaderSpecific(mat, MAT_CLASSID, 1); 1903 PetscUseMethod(mat, "MatDenseReplaceArray_C", (Mat, const PetscScalar *), (mat, array)); 1904 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 1905 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 1906 mat->offloadmask = PETSC_OFFLOAD_CPU; 1907 #endif 1908 PetscFunctionReturn(PETSC_SUCCESS); 1909 } 1910 1911 /*@ 1912 MatCreateDense - Creates a matrix in `MATDENSE` format. 1913 1914 Collective 1915 1916 Input Parameters: 1917 + comm - MPI communicator 1918 . m - number of local rows (or `PETSC_DECIDE` to have calculated if `M` is given) 1919 . n - number of local columns (or `PETSC_DECIDE` to have calculated if `N` is given) 1920 . M - number of global rows (or `PETSC_DECIDE` to have calculated if `m` is given) 1921 . N - number of global columns (or `PETSC_DECIDE` to have calculated if `n` is given) 1922 - data - optional location of matrix data. Set data to `NULL` (`PETSC_NULL_SCALAR` for Fortran users) for PETSc 1923 to control all matrix memory allocation. 1924 1925 Output Parameter: 1926 . A - the matrix 1927 1928 Level: intermediate 1929 1930 Notes: 1931 The dense format is fully compatible with standard Fortran 1932 storage by columns. 1933 1934 Although local portions of the matrix are stored in column-major 1935 order, the matrix is partitioned across MPI ranks by row. 1936 1937 The data input variable is intended primarily for Fortran programmers 1938 who wish to allocate their own matrix memory space. Most users should 1939 set `data` to `NULL` (`PETSC_NULL_SCALAR` for Fortran users). 1940 1941 The user MUST specify either the local or global matrix dimensions 1942 (possibly both). 1943 1944 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatCreate()`, `MatCreateSeqDense()`, `MatSetValues()` 1945 @*/ 1946 PetscErrorCode MatCreateDense(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscScalar *data, Mat *A) 1947 { 1948 PetscFunctionBegin; 1949 PetscCall(MatCreate(comm, A)); 1950 PetscCall(MatSetSizes(*A, m, n, M, N)); 1951 PetscCall(MatSetType(*A, MATDENSE)); 1952 PetscCall(MatSeqDenseSetPreallocation(*A, data)); 1953 PetscCall(MatMPIDenseSetPreallocation(*A, data)); 1954 PetscFunctionReturn(PETSC_SUCCESS); 1955 } 1956 1957 static PetscErrorCode MatDuplicate_MPIDense(Mat A, MatDuplicateOption cpvalues, Mat *newmat) 1958 { 1959 Mat mat; 1960 Mat_MPIDense *a, *oldmat = (Mat_MPIDense *)A->data; 1961 1962 PetscFunctionBegin; 1963 *newmat = NULL; 1964 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat)); 1965 PetscCall(MatSetSizes(mat, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 1966 PetscCall(MatSetType(mat, ((PetscObject)A)->type_name)); 1967 a = (Mat_MPIDense *)mat->data; 1968 1969 mat->factortype = A->factortype; 1970 mat->assembled = PETSC_TRUE; 1971 mat->preallocated = PETSC_TRUE; 1972 1973 mat->insertmode = NOT_SET_VALUES; 1974 a->donotstash = oldmat->donotstash; 1975 1976 PetscCall(PetscLayoutReference(A->rmap, &mat->rmap)); 1977 PetscCall(PetscLayoutReference(A->cmap, &mat->cmap)); 1978 1979 PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A)); 1980 1981 *newmat = mat; 1982 PetscFunctionReturn(PETSC_SUCCESS); 1983 } 1984 1985 static PetscErrorCode MatLoad_MPIDense(Mat newMat, PetscViewer viewer) 1986 { 1987 PetscBool isbinary; 1988 #if defined(PETSC_HAVE_HDF5) 1989 PetscBool ishdf5; 1990 #endif 1991 1992 PetscFunctionBegin; 1993 PetscValidHeaderSpecific(newMat, MAT_CLASSID, 1); 1994 PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2); 1995 /* force binary viewer to load .info file if it has not yet done so */ 1996 PetscCall(PetscViewerSetUp(viewer)); 1997 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 1998 #if defined(PETSC_HAVE_HDF5) 1999 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5)); 2000 #endif 2001 if (isbinary) { 2002 PetscCall(MatLoad_Dense_Binary(newMat, viewer)); 2003 #if defined(PETSC_HAVE_HDF5) 2004 } else if (ishdf5) { 2005 PetscCall(MatLoad_Dense_HDF5(newMat, viewer)); 2006 #endif 2007 } else SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name); 2008 PetscFunctionReturn(PETSC_SUCCESS); 2009 } 2010 2011 static PetscErrorCode MatEqual_MPIDense(Mat A, Mat B, PetscBool *flag) 2012 { 2013 Mat_MPIDense *matB = (Mat_MPIDense *)B->data, *matA = (Mat_MPIDense *)A->data; 2014 Mat a, b; 2015 2016 PetscFunctionBegin; 2017 a = matA->A; 2018 b = matB->A; 2019 PetscCall(MatEqual(a, b, flag)); 2020 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A))); 2021 PetscFunctionReturn(PETSC_SUCCESS); 2022 } 2023 2024 static PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(void *data) 2025 { 2026 Mat_TransMatMultDense *atb = (Mat_TransMatMultDense *)data; 2027 2028 PetscFunctionBegin; 2029 PetscCall(PetscFree2(atb->sendbuf, atb->recvcounts)); 2030 PetscCall(MatDestroy(&atb->atb)); 2031 PetscCall(PetscFree(atb)); 2032 PetscFunctionReturn(PETSC_SUCCESS); 2033 } 2034 2035 static PetscErrorCode MatDestroy_MatMatTransMult_MPIDense_MPIDense(void *data) 2036 { 2037 Mat_MatTransMultDense *abt = (Mat_MatTransMultDense *)data; 2038 2039 PetscFunctionBegin; 2040 PetscCall(PetscFree2(abt->buf[0], abt->buf[1])); 2041 PetscCall(PetscFree2(abt->recvcounts, abt->recvdispls)); 2042 PetscCall(PetscFree(abt)); 2043 PetscFunctionReturn(PETSC_SUCCESS); 2044 } 2045 2046 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C) 2047 { 2048 Mat_MPIDense *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data; 2049 Mat_TransMatMultDense *atb; 2050 MPI_Comm comm; 2051 PetscMPIInt size, *recvcounts; 2052 PetscScalar *carray, *sendbuf; 2053 const PetscScalar *atbarray; 2054 PetscInt i, cN = C->cmap->N, proc, k, j, lda; 2055 const PetscInt *ranges; 2056 2057 PetscFunctionBegin; 2058 MatCheckProduct(C, 3); 2059 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 2060 atb = (Mat_TransMatMultDense *)C->product->data; 2061 recvcounts = atb->recvcounts; 2062 sendbuf = atb->sendbuf; 2063 2064 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 2065 PetscCallMPI(MPI_Comm_size(comm, &size)); 2066 2067 /* compute atbarray = aseq^T * bseq */ 2068 PetscCall(MatTransposeMatMult(a->A, b->A, atb->atb ? MAT_REUSE_MATRIX : MAT_INITIAL_MATRIX, PETSC_DETERMINE, &atb->atb)); 2069 2070 PetscCall(MatGetOwnershipRanges(C, &ranges)); 2071 2072 /* arrange atbarray into sendbuf */ 2073 PetscCall(MatDenseGetArrayRead(atb->atb, &atbarray)); 2074 PetscCall(MatDenseGetLDA(atb->atb, &lda)); 2075 for (proc = 0, k = 0; proc < size; proc++) { 2076 for (j = 0; j < cN; j++) { 2077 for (i = ranges[proc]; i < ranges[proc + 1]; i++) sendbuf[k++] = atbarray[i + j * lda]; 2078 } 2079 } 2080 PetscCall(MatDenseRestoreArrayRead(atb->atb, &atbarray)); 2081 2082 /* sum all atbarray to local values of C */ 2083 PetscCall(MatDenseGetArrayWrite(c->A, &carray)); 2084 PetscCallMPI(MPI_Reduce_scatter(sendbuf, carray, recvcounts, MPIU_SCALAR, MPIU_SUM, comm)); 2085 PetscCall(MatDenseRestoreArrayWrite(c->A, &carray)); 2086 PetscCall(MatSetOption(C, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 2087 PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY)); 2088 PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY)); 2089 PetscFunctionReturn(PETSC_SUCCESS); 2090 } 2091 2092 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C) 2093 { 2094 MPI_Comm comm; 2095 PetscMPIInt size; 2096 PetscInt cm = A->cmap->n, cM, cN = B->cmap->N; 2097 Mat_TransMatMultDense *atb; 2098 PetscBool cisdense = PETSC_FALSE; 2099 const PetscInt *ranges; 2100 2101 PetscFunctionBegin; 2102 MatCheckProduct(C, 4); 2103 PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty"); 2104 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 2105 if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) { 2106 SETERRQ(comm, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend); 2107 } 2108 2109 /* create matrix product C */ 2110 PetscCall(MatSetSizes(C, cm, B->cmap->n, A->cmap->N, B->cmap->N)); 2111 #if defined(PETSC_HAVE_CUDA) 2112 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSECUDA, "")); 2113 #endif 2114 #if defined(PETSC_HAVE_HIP) 2115 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSEHIP, "")); 2116 #endif 2117 if (!cisdense) PetscCall(MatSetType(C, ((PetscObject)A)->type_name)); 2118 PetscCall(MatSetUp(C)); 2119 2120 /* create data structure for reuse C */ 2121 PetscCallMPI(MPI_Comm_size(comm, &size)); 2122 PetscCall(PetscNew(&atb)); 2123 cM = C->rmap->N; 2124 PetscCall(PetscMalloc2(cM * cN, &atb->sendbuf, size, &atb->recvcounts)); 2125 PetscCall(MatGetOwnershipRanges(C, &ranges)); 2126 for (PetscMPIInt i = 0; i < size; i++) PetscCall(PetscMPIIntCast((ranges[i + 1] - ranges[i]) * cN, &atb->recvcounts[i])); 2127 C->product->data = atb; 2128 C->product->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense; 2129 PetscFunctionReturn(PETSC_SUCCESS); 2130 } 2131 2132 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C) 2133 { 2134 MPI_Comm comm; 2135 PetscMPIInt i, size; 2136 PetscInt maxRows, bufsiz; 2137 PetscMPIInt tag; 2138 PetscInt alg; 2139 Mat_MatTransMultDense *abt; 2140 Mat_Product *product = C->product; 2141 PetscBool flg; 2142 2143 PetscFunctionBegin; 2144 MatCheckProduct(C, 4); 2145 PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty"); 2146 /* check local size of A and B */ 2147 PetscCheck(A->cmap->n == B->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local column dimensions are incompatible, A (%" PetscInt_FMT ") != B (%" PetscInt_FMT ")", A->cmap->n, B->cmap->n); 2148 2149 PetscCall(PetscStrcmp(product->alg, "allgatherv", &flg)); 2150 alg = flg ? 0 : 1; 2151 2152 /* setup matrix product C */ 2153 PetscCall(MatSetSizes(C, A->rmap->n, B->rmap->n, A->rmap->N, B->rmap->N)); 2154 PetscCall(MatSetType(C, MATMPIDENSE)); 2155 PetscCall(MatSetUp(C)); 2156 PetscCall(PetscObjectGetNewTag((PetscObject)C, &tag)); 2157 2158 /* create data structure for reuse C */ 2159 PetscCall(PetscObjectGetComm((PetscObject)C, &comm)); 2160 PetscCallMPI(MPI_Comm_size(comm, &size)); 2161 PetscCall(PetscNew(&abt)); 2162 abt->tag = tag; 2163 abt->alg = alg; 2164 switch (alg) { 2165 case 1: /* alg: "cyclic" */ 2166 for (maxRows = 0, i = 0; i < size; i++) maxRows = PetscMax(maxRows, B->rmap->range[i + 1] - B->rmap->range[i]); 2167 bufsiz = A->cmap->N * maxRows; 2168 PetscCall(PetscMalloc2(bufsiz, &abt->buf[0], bufsiz, &abt->buf[1])); 2169 break; 2170 default: /* alg: "allgatherv" */ 2171 PetscCall(PetscMalloc2(B->rmap->n * B->cmap->N, &abt->buf[0], B->rmap->N * B->cmap->N, &abt->buf[1])); 2172 PetscCall(PetscMalloc2(size, &abt->recvcounts, size + 1, &abt->recvdispls)); 2173 for (i = 0; i <= size; i++) PetscCall(PetscMPIIntCast(B->rmap->range[i] * A->cmap->N, &abt->recvdispls[i])); 2174 for (i = 0; i < size; i++) PetscCall(PetscMPIIntCast(abt->recvdispls[i + 1] - abt->recvdispls[i], &abt->recvcounts[i])); 2175 break; 2176 } 2177 2178 C->product->data = abt; 2179 C->product->destroy = MatDestroy_MatMatTransMult_MPIDense_MPIDense; 2180 C->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_MPIDense_MPIDense; 2181 PetscFunctionReturn(PETSC_SUCCESS); 2182 } 2183 2184 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(Mat A, Mat B, Mat C) 2185 { 2186 Mat_MPIDense *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data; 2187 Mat_MatTransMultDense *abt; 2188 MPI_Comm comm; 2189 PetscMPIInt rank, size, sendto, recvfrom, recvisfrom; 2190 PetscScalar *sendbuf, *recvbuf = NULL, *cv; 2191 PetscInt i, cK = A->cmap->N, sendsiz, recvsiz, k, j, bn; 2192 PetscScalar _DOne = 1.0, _DZero = 0.0; 2193 const PetscScalar *av, *bv; 2194 PetscBLASInt cm, cn, ck, alda, blda = 0, clda; 2195 MPI_Request reqs[2]; 2196 const PetscInt *ranges; 2197 2198 PetscFunctionBegin; 2199 MatCheckProduct(C, 3); 2200 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 2201 abt = (Mat_MatTransMultDense *)C->product->data; 2202 PetscCall(PetscObjectGetComm((PetscObject)C, &comm)); 2203 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 2204 PetscCallMPI(MPI_Comm_size(comm, &size)); 2205 PetscCall(MatDenseGetArrayRead(a->A, &av)); 2206 PetscCall(MatDenseGetArrayRead(b->A, &bv)); 2207 PetscCall(MatDenseGetArrayWrite(c->A, &cv)); 2208 PetscCall(MatDenseGetLDA(a->A, &i)); 2209 PetscCall(PetscBLASIntCast(i, &alda)); 2210 PetscCall(MatDenseGetLDA(b->A, &i)); 2211 PetscCall(PetscBLASIntCast(i, &blda)); 2212 PetscCall(MatDenseGetLDA(c->A, &i)); 2213 PetscCall(PetscBLASIntCast(i, &clda)); 2214 PetscCall(MatGetOwnershipRanges(B, &ranges)); 2215 bn = B->rmap->n; 2216 if (blda == bn) { 2217 sendbuf = (PetscScalar *)bv; 2218 } else { 2219 sendbuf = abt->buf[0]; 2220 for (k = 0, i = 0; i < cK; i++) { 2221 for (j = 0; j < bn; j++, k++) sendbuf[k] = bv[i * blda + j]; 2222 } 2223 } 2224 if (size > 1) { 2225 sendto = (rank + size - 1) % size; 2226 recvfrom = (rank + size + 1) % size; 2227 } else { 2228 sendto = recvfrom = 0; 2229 } 2230 PetscCall(PetscBLASIntCast(cK, &ck)); 2231 PetscCall(PetscBLASIntCast(c->A->rmap->n, &cm)); 2232 recvisfrom = rank; 2233 for (i = 0; i < size; i++) { 2234 /* we have finished receiving in sending, bufs can be read/modified */ 2235 PetscMPIInt nextrecvisfrom = (recvisfrom + 1) % size; /* which process the next recvbuf will originate on */ 2236 PetscInt nextbn = ranges[nextrecvisfrom + 1] - ranges[nextrecvisfrom]; 2237 2238 if (nextrecvisfrom != rank) { 2239 /* start the cyclic sends from sendbuf, to recvbuf (which will switch to sendbuf) */ 2240 sendsiz = cK * bn; 2241 recvsiz = cK * nextbn; 2242 recvbuf = (i & 1) ? abt->buf[0] : abt->buf[1]; 2243 PetscCallMPI(MPIU_Isend(sendbuf, sendsiz, MPIU_SCALAR, sendto, abt->tag, comm, &reqs[0])); 2244 PetscCallMPI(MPIU_Irecv(recvbuf, recvsiz, MPIU_SCALAR, recvfrom, abt->tag, comm, &reqs[1])); 2245 } 2246 2247 /* local aseq * sendbuf^T */ 2248 PetscCall(PetscBLASIntCast(ranges[recvisfrom + 1] - ranges[recvisfrom], &cn)); 2249 if (cm && cn && ck) PetscCallBLAS("BLASgemm", BLASgemm_("N", "T", &cm, &cn, &ck, &_DOne, av, &alda, sendbuf, &cn, &_DZero, cv + clda * ranges[recvisfrom], &clda)); 2250 2251 if (nextrecvisfrom != rank) { 2252 /* wait for the sends and receives to complete, swap sendbuf and recvbuf */ 2253 PetscCallMPI(MPI_Waitall(2, reqs, MPI_STATUSES_IGNORE)); 2254 } 2255 bn = nextbn; 2256 recvisfrom = nextrecvisfrom; 2257 sendbuf = recvbuf; 2258 } 2259 PetscCall(MatDenseRestoreArrayRead(a->A, &av)); 2260 PetscCall(MatDenseRestoreArrayRead(b->A, &bv)); 2261 PetscCall(MatDenseRestoreArrayWrite(c->A, &cv)); 2262 PetscCall(MatSetOption(C, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 2263 PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY)); 2264 PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY)); 2265 PetscFunctionReturn(PETSC_SUCCESS); 2266 } 2267 2268 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(Mat A, Mat B, Mat C) 2269 { 2270 Mat_MPIDense *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data; 2271 Mat_MatTransMultDense *abt; 2272 MPI_Comm comm; 2273 PetscMPIInt size, ibn; 2274 PetscScalar *cv, *sendbuf, *recvbuf; 2275 const PetscScalar *av, *bv; 2276 PetscInt blda, i, cK = A->cmap->N, k, j, bn; 2277 PetscScalar _DOne = 1.0, _DZero = 0.0; 2278 PetscBLASInt cm, cn, ck, alda, clda; 2279 2280 PetscFunctionBegin; 2281 MatCheckProduct(C, 3); 2282 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 2283 abt = (Mat_MatTransMultDense *)C->product->data; 2284 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 2285 PetscCallMPI(MPI_Comm_size(comm, &size)); 2286 PetscCall(MatDenseGetArrayRead(a->A, &av)); 2287 PetscCall(MatDenseGetArrayRead(b->A, &bv)); 2288 PetscCall(MatDenseGetArrayWrite(c->A, &cv)); 2289 PetscCall(MatDenseGetLDA(a->A, &i)); 2290 PetscCall(PetscBLASIntCast(i, &alda)); 2291 PetscCall(MatDenseGetLDA(b->A, &blda)); 2292 PetscCall(MatDenseGetLDA(c->A, &i)); 2293 PetscCall(PetscBLASIntCast(i, &clda)); 2294 /* copy transpose of B into buf[0] */ 2295 bn = B->rmap->n; 2296 sendbuf = abt->buf[0]; 2297 recvbuf = abt->buf[1]; 2298 for (k = 0, j = 0; j < bn; j++) { 2299 for (i = 0; i < cK; i++, k++) sendbuf[k] = bv[i * blda + j]; 2300 } 2301 PetscCall(MatDenseRestoreArrayRead(b->A, &bv)); 2302 PetscCall(PetscMPIIntCast(bn * cK, &ibn)); 2303 PetscCallMPI(MPI_Allgatherv(sendbuf, ibn, MPIU_SCALAR, recvbuf, abt->recvcounts, abt->recvdispls, MPIU_SCALAR, comm)); 2304 PetscCall(PetscBLASIntCast(cK, &ck)); 2305 PetscCall(PetscBLASIntCast(c->A->rmap->n, &cm)); 2306 PetscCall(PetscBLASIntCast(c->A->cmap->n, &cn)); 2307 if (cm && cn && ck) PetscCallBLAS("BLASgemm", BLASgemm_("N", "N", &cm, &cn, &ck, &_DOne, av, &alda, recvbuf, &ck, &_DZero, cv, &clda)); 2308 PetscCall(MatDenseRestoreArrayRead(a->A, &av)); 2309 PetscCall(MatDenseRestoreArrayRead(b->A, &bv)); 2310 PetscCall(MatDenseRestoreArrayWrite(c->A, &cv)); 2311 PetscCall(MatSetOption(C, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 2312 PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY)); 2313 PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY)); 2314 PetscFunctionReturn(PETSC_SUCCESS); 2315 } 2316 2317 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C) 2318 { 2319 Mat_MatTransMultDense *abt; 2320 2321 PetscFunctionBegin; 2322 MatCheckProduct(C, 3); 2323 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 2324 abt = (Mat_MatTransMultDense *)C->product->data; 2325 switch (abt->alg) { 2326 case 1: 2327 PetscCall(MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(A, B, C)); 2328 break; 2329 default: 2330 PetscCall(MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(A, B, C)); 2331 break; 2332 } 2333 PetscFunctionReturn(PETSC_SUCCESS); 2334 } 2335 2336 static PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(void *data) 2337 { 2338 Mat_MatMultDense *ab = (Mat_MatMultDense *)data; 2339 2340 PetscFunctionBegin; 2341 PetscCall(MatDestroy(&ab->Ce)); 2342 PetscCall(MatDestroy(&ab->Ae)); 2343 PetscCall(MatDestroy(&ab->Be)); 2344 PetscCall(PetscFree(ab)); 2345 PetscFunctionReturn(PETSC_SUCCESS); 2346 } 2347 2348 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C) 2349 { 2350 Mat_MatMultDense *ab; 2351 Mat_MPIDense *mdn = (Mat_MPIDense *)A->data; 2352 2353 PetscFunctionBegin; 2354 MatCheckProduct(C, 3); 2355 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Missing product data"); 2356 ab = (Mat_MatMultDense *)C->product->data; 2357 if (ab->Ae && ab->Ce) { 2358 #if PetscDefined(HAVE_ELEMENTAL) 2359 PetscCall(MatConvert_MPIDense_Elemental(A, MATELEMENTAL, MAT_REUSE_MATRIX, &ab->Ae)); 2360 PetscCall(MatConvert_MPIDense_Elemental(B, MATELEMENTAL, MAT_REUSE_MATRIX, &ab->Be)); 2361 PetscCall(MatMatMultNumeric_Elemental(ab->Ae, ab->Be, ab->Ce)); 2362 PetscCall(MatConvert(ab->Ce, MATMPIDENSE, MAT_REUSE_MATRIX, &C)); 2363 #else 2364 SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "PETSC_HAVE_ELEMENTAL not defined"); 2365 #endif 2366 } else { 2367 const PetscScalar *read; 2368 PetscScalar *write; 2369 PetscInt lda; 2370 2371 PetscCall(MatDenseGetLDA(B, &lda)); 2372 PetscCall(MatDenseGetArrayRead(B, &read)); 2373 PetscCall(MatDenseGetArrayWrite(ab->Be, &write)); 2374 if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A)); /* cannot be done during the symbolic phase because of possible calls to MatProductReplaceMats() */ 2375 for (PetscInt i = 0; i < C->cmap->N; ++i) { 2376 PetscCall(PetscSFBcastBegin(mdn->Mvctx, MPIU_SCALAR, read + i * lda, write + i * ab->Be->rmap->n, MPI_REPLACE)); 2377 PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, read + i * lda, write + i * ab->Be->rmap->n, MPI_REPLACE)); 2378 } 2379 PetscCall(MatDenseRestoreArrayWrite(ab->Be, &write)); 2380 PetscCall(MatDenseRestoreArrayRead(B, &read)); 2381 PetscCall(MatMatMultNumeric_SeqDense_SeqDense(((Mat_MPIDense *)A->data)->A, ab->Be, ((Mat_MPIDense *)C->data)->A)); 2382 } 2383 PetscFunctionReturn(PETSC_SUCCESS); 2384 } 2385 2386 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C) 2387 { 2388 Mat_Product *product = C->product; 2389 PetscInt alg; 2390 Mat_MatMultDense *ab; 2391 PetscBool flg; 2392 2393 PetscFunctionBegin; 2394 MatCheckProduct(C, 4); 2395 PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty"); 2396 /* check local size of A and B */ 2397 PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ", %" PetscInt_FMT ")", 2398 A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend); 2399 2400 PetscCall(PetscStrcmp(product->alg, "petsc", &flg)); 2401 alg = flg ? 0 : 1; 2402 2403 /* setup C */ 2404 PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, A->rmap->N, B->cmap->N)); 2405 PetscCall(MatSetType(C, MATMPIDENSE)); 2406 PetscCall(MatSetUp(C)); 2407 2408 /* create data structure for reuse Cdense */ 2409 PetscCall(PetscNew(&ab)); 2410 2411 switch (alg) { 2412 case 1: /* alg: "elemental" */ 2413 #if PetscDefined(HAVE_ELEMENTAL) 2414 /* create elemental matrices Ae and Be */ 2415 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &ab->Ae)); 2416 PetscCall(MatSetSizes(ab->Ae, PETSC_DECIDE, PETSC_DECIDE, A->rmap->N, A->cmap->N)); 2417 PetscCall(MatSetType(ab->Ae, MATELEMENTAL)); 2418 PetscCall(MatSetUp(ab->Ae)); 2419 PetscCall(MatSetOption(ab->Ae, MAT_ROW_ORIENTED, PETSC_FALSE)); 2420 2421 PetscCall(MatCreate(PetscObjectComm((PetscObject)B), &ab->Be)); 2422 PetscCall(MatSetSizes(ab->Be, PETSC_DECIDE, PETSC_DECIDE, B->rmap->N, B->cmap->N)); 2423 PetscCall(MatSetType(ab->Be, MATELEMENTAL)); 2424 PetscCall(MatSetUp(ab->Be)); 2425 PetscCall(MatSetOption(ab->Be, MAT_ROW_ORIENTED, PETSC_FALSE)); 2426 2427 /* compute symbolic Ce = Ae*Be */ 2428 PetscCall(MatCreate(PetscObjectComm((PetscObject)C), &ab->Ce)); 2429 PetscCall(MatMatMultSymbolic_Elemental(ab->Ae, ab->Be, fill, ab->Ce)); 2430 #else 2431 SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "PETSC_HAVE_ELEMENTAL not defined"); 2432 #endif 2433 break; 2434 default: /* alg: "petsc" */ 2435 ab->Ae = NULL; 2436 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, A->cmap->N, B->cmap->N, NULL, &ab->Be)); 2437 ab->Ce = NULL; 2438 break; 2439 } 2440 2441 C->product->data = ab; 2442 C->product->destroy = MatDestroy_MatMatMult_MPIDense_MPIDense; 2443 C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense; 2444 PetscFunctionReturn(PETSC_SUCCESS); 2445 } 2446 2447 static PetscErrorCode MatProductSetFromOptions_MPIDense_AB(Mat C) 2448 { 2449 Mat_Product *product = C->product; 2450 const char *algTypes[2] = {"petsc", "elemental"}; 2451 PetscInt alg, nalg = PetscDefined(HAVE_ELEMENTAL) ? 2 : 1; 2452 PetscBool flg = PETSC_FALSE; 2453 2454 PetscFunctionBegin; 2455 /* Set default algorithm */ 2456 alg = 0; /* default is petsc */ 2457 PetscCall(PetscStrcmp(product->alg, "default", &flg)); 2458 if (flg) PetscCall(MatProductSetAlgorithm(C, algTypes[alg])); 2459 2460 /* Get runtime option */ 2461 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_AB", "Mat"); 2462 PetscCall(PetscOptionsEList("-mat_product_algorithm", "Algorithmic approach", "MatProduct_AB", algTypes, nalg, algTypes[alg], &alg, &flg)); 2463 PetscOptionsEnd(); 2464 if (flg) PetscCall(MatProductSetAlgorithm(C, algTypes[alg])); 2465 2466 C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIDense; 2467 C->ops->productsymbolic = MatProductSymbolic_AB; 2468 PetscFunctionReturn(PETSC_SUCCESS); 2469 } 2470 2471 static PetscErrorCode MatProductSetFromOptions_MPIDense_AtB(Mat C) 2472 { 2473 Mat_Product *product = C->product; 2474 Mat A = product->A, B = product->B; 2475 2476 PetscFunctionBegin; 2477 PetscCheck(A->rmap->rstart == B->rmap->rstart && A->rmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", 2478 A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend); 2479 C->ops->transposematmultsymbolic = MatTransposeMatMultSymbolic_MPIDense_MPIDense; 2480 C->ops->productsymbolic = MatProductSymbolic_AtB; 2481 PetscFunctionReturn(PETSC_SUCCESS); 2482 } 2483 2484 static PetscErrorCode MatProductSetFromOptions_MPIDense_ABt(Mat C) 2485 { 2486 Mat_Product *product = C->product; 2487 const char *algTypes[2] = {"allgatherv", "cyclic"}; 2488 PetscInt alg, nalg = 2; 2489 PetscBool flg = PETSC_FALSE; 2490 2491 PetscFunctionBegin; 2492 /* Set default algorithm */ 2493 alg = 0; /* default is allgatherv */ 2494 PetscCall(PetscStrcmp(product->alg, "default", &flg)); 2495 if (flg) PetscCall(MatProductSetAlgorithm(C, algTypes[alg])); 2496 2497 /* Get runtime option */ 2498 if (product->api_user) { 2499 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatMatTransposeMult", "Mat"); 2500 PetscCall(PetscOptionsEList("-matmattransmult_mpidense_mpidense_via", "Algorithmic approach", "MatMatTransposeMult", algTypes, nalg, algTypes[alg], &alg, &flg)); 2501 PetscOptionsEnd(); 2502 } else { 2503 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_ABt", "Mat"); 2504 PetscCall(PetscOptionsEList("-mat_product_algorithm", "Algorithmic approach", "MatProduct_ABt", algTypes, nalg, algTypes[alg], &alg, &flg)); 2505 PetscOptionsEnd(); 2506 } 2507 if (flg) PetscCall(MatProductSetAlgorithm(C, algTypes[alg])); 2508 2509 C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_MPIDense_MPIDense; 2510 C->ops->productsymbolic = MatProductSymbolic_ABt; 2511 PetscFunctionReturn(PETSC_SUCCESS); 2512 } 2513 2514 static PetscErrorCode MatProductSetFromOptions_MPIDense(Mat C) 2515 { 2516 Mat_Product *product = C->product; 2517 2518 PetscFunctionBegin; 2519 switch (product->type) { 2520 case MATPRODUCT_AB: 2521 PetscCall(MatProductSetFromOptions_MPIDense_AB(C)); 2522 break; 2523 case MATPRODUCT_AtB: 2524 PetscCall(MatProductSetFromOptions_MPIDense_AtB(C)); 2525 break; 2526 case MATPRODUCT_ABt: 2527 PetscCall(MatProductSetFromOptions_MPIDense_ABt(C)); 2528 break; 2529 default: 2530 break; 2531 } 2532 PetscFunctionReturn(PETSC_SUCCESS); 2533 } 2534