1 /* 2 Basic functions for basic parallel dense matrices. 3 Portions of this code are under: 4 Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. 5 */ 6 7 #include <../src/mat/impls/dense/mpi/mpidense.h> /*I "petscmat.h" I*/ 8 #include <../src/mat/impls/aij/mpi/mpiaij.h> 9 #include <petscblaslapack.h> 10 #include <petsc/private/vecimpl.h> 11 12 /*@ 13 MatDenseGetLocalMatrix - For a `MATMPIDENSE` or `MATSEQDENSE` matrix returns the sequential 14 matrix that represents the operator. For sequential matrices it returns itself. 15 16 Input Parameter: 17 . A - the sequential or MPI `MATDENSE` matrix 18 19 Output Parameter: 20 . B - the inner matrix 21 22 Level: intermediate 23 24 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MATMPIDENSE`, `MATSEQDENSE` 25 @*/ 26 PetscErrorCode MatDenseGetLocalMatrix(Mat A, Mat *B) 27 { 28 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 29 PetscBool flg; 30 31 PetscFunctionBegin; 32 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 33 PetscAssertPointer(B, 2); 34 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIDENSE, &flg)); 35 if (flg) *B = mat->A; 36 else { 37 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQDENSE, &flg)); 38 PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Not for matrix type %s", ((PetscObject)A)->type_name); 39 *B = A; 40 } 41 PetscFunctionReturn(PETSC_SUCCESS); 42 } 43 44 static PetscErrorCode MatCopy_MPIDense(Mat A, Mat B, MatStructure s) 45 { 46 Mat_MPIDense *Amat = (Mat_MPIDense *)A->data; 47 Mat_MPIDense *Bmat = (Mat_MPIDense *)B->data; 48 49 PetscFunctionBegin; 50 PetscCall(MatCopy(Amat->A, Bmat->A, s)); 51 PetscFunctionReturn(PETSC_SUCCESS); 52 } 53 54 PetscErrorCode MatShift_MPIDense(Mat A, PetscScalar alpha) 55 { 56 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 57 PetscInt j, lda, rstart = A->rmap->rstart, rend = A->rmap->rend, rend2; 58 PetscScalar *v; 59 60 PetscFunctionBegin; 61 PetscCall(MatDenseGetArray(mat->A, &v)); 62 PetscCall(MatDenseGetLDA(mat->A, &lda)); 63 rend2 = PetscMin(rend, A->cmap->N); 64 if (rend2 > rstart) { 65 for (j = rstart; j < rend2; j++) v[j - rstart + j * lda] += alpha; 66 PetscCall(PetscLogFlops(rend2 - rstart)); 67 } 68 PetscCall(MatDenseRestoreArray(mat->A, &v)); 69 PetscFunctionReturn(PETSC_SUCCESS); 70 } 71 72 static PetscErrorCode MatGetRow_MPIDense(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 73 { 74 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 75 PetscInt lrow, rstart = A->rmap->rstart, rend = A->rmap->rend; 76 77 PetscFunctionBegin; 78 PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "only local rows"); 79 lrow = row - rstart; 80 PetscCall(MatGetRow(mat->A, lrow, nz, (const PetscInt **)idx, (const PetscScalar **)v)); 81 PetscFunctionReturn(PETSC_SUCCESS); 82 } 83 84 static PetscErrorCode MatRestoreRow_MPIDense(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 85 { 86 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 87 PetscInt lrow, rstart = A->rmap->rstart, rend = A->rmap->rend; 88 89 PetscFunctionBegin; 90 PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "only local rows"); 91 lrow = row - rstart; 92 PetscCall(MatRestoreRow(mat->A, lrow, nz, (const PetscInt **)idx, (const PetscScalar **)v)); 93 PetscFunctionReturn(PETSC_SUCCESS); 94 } 95 96 static PetscErrorCode MatGetDiagonalBlock_MPIDense(Mat A, Mat *a) 97 { 98 Mat_MPIDense *mdn = (Mat_MPIDense *)A->data; 99 PetscInt m = A->rmap->n, rstart = A->rmap->rstart; 100 PetscScalar *array; 101 MPI_Comm comm; 102 PetscBool flg; 103 Mat B; 104 105 PetscFunctionBegin; 106 PetscCall(MatHasCongruentLayouts(A, &flg)); 107 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only square matrices supported."); 108 PetscCall(PetscObjectQuery((PetscObject)A, "DiagonalBlock", (PetscObject *)&B)); 109 if (!B) { /* This should use MatDenseGetSubMatrix (not create), but we would need a call like MatRestoreDiagonalBlock */ 110 #if PetscDefined(HAVE_CUDA) 111 PetscCall(PetscObjectTypeCompare((PetscObject)mdn->A, MATSEQDENSECUDA, &flg)); 112 PetscCheck(!flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature", MATSEQDENSECUDA); 113 #elif PetscDefined(HAVE_HIP) 114 PetscCall(PetscObjectTypeCompare((PetscObject)mdn->A, MATSEQDENSEHIP, &flg)); 115 PetscCheck(!flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature", MATSEQDENSEHIP); 116 #endif 117 PetscCall(PetscObjectGetComm((PetscObject)mdn->A, &comm)); 118 PetscCall(MatCreate(comm, &B)); 119 PetscCall(MatSetSizes(B, m, m, m, m)); 120 PetscCall(MatSetType(B, ((PetscObject)mdn->A)->type_name)); 121 PetscCall(MatDenseGetArrayRead(mdn->A, (const PetscScalar **)&array)); 122 PetscCall(MatSeqDenseSetPreallocation(B, array + m * rstart)); 123 PetscCall(MatDenseRestoreArrayRead(mdn->A, (const PetscScalar **)&array)); 124 PetscCall(PetscObjectCompose((PetscObject)A, "DiagonalBlock", (PetscObject)B)); 125 *a = B; 126 PetscCall(MatDestroy(&B)); 127 } else *a = B; 128 PetscFunctionReturn(PETSC_SUCCESS); 129 } 130 131 static PetscErrorCode MatSetValues_MPIDense(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], const PetscScalar v[], InsertMode addv) 132 { 133 Mat_MPIDense *A = (Mat_MPIDense *)mat->data; 134 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend, row; 135 PetscBool roworiented = A->roworiented; 136 137 PetscFunctionBegin; 138 for (i = 0; i < m; i++) { 139 if (idxm[i] < 0) continue; 140 PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large"); 141 if (idxm[i] >= rstart && idxm[i] < rend) { 142 row = idxm[i] - rstart; 143 if (roworiented) { 144 PetscCall(MatSetValues(A->A, 1, &row, n, idxn, PetscSafePointerPlusOffset(v, i * n), addv)); 145 } else { 146 for (j = 0; j < n; j++) { 147 if (idxn[j] < 0) continue; 148 PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large"); 149 PetscCall(MatSetValues(A->A, 1, &row, 1, &idxn[j], PetscSafePointerPlusOffset(v, i + j * m), addv)); 150 } 151 } 152 } else if (!A->donotstash) { 153 mat->assembled = PETSC_FALSE; 154 if (roworiented) { 155 PetscCall(MatStashValuesRow_Private(&mat->stash, idxm[i], n, idxn, PetscSafePointerPlusOffset(v, i * n), PETSC_FALSE)); 156 } else { 157 PetscCall(MatStashValuesCol_Private(&mat->stash, idxm[i], n, idxn, PetscSafePointerPlusOffset(v, i), m, PETSC_FALSE)); 158 } 159 } 160 } 161 PetscFunctionReturn(PETSC_SUCCESS); 162 } 163 164 static PetscErrorCode MatGetValues_MPIDense(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[]) 165 { 166 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 167 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend, row; 168 169 PetscFunctionBegin; 170 for (i = 0; i < m; i++) { 171 if (idxm[i] < 0) continue; /* negative row */ 172 PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large"); 173 if (idxm[i] >= rstart && idxm[i] < rend) { 174 row = idxm[i] - rstart; 175 for (j = 0; j < n; j++) { 176 if (idxn[j] < 0) continue; /* negative column */ 177 PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large"); 178 PetscCall(MatGetValues(mdn->A, 1, &row, 1, &idxn[j], v + i * n + j)); 179 } 180 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported"); 181 } 182 PetscFunctionReturn(PETSC_SUCCESS); 183 } 184 185 static PetscErrorCode MatDenseGetLDA_MPIDense(Mat A, PetscInt *lda) 186 { 187 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 188 189 PetscFunctionBegin; 190 PetscCall(MatDenseGetLDA(a->A, lda)); 191 PetscFunctionReturn(PETSC_SUCCESS); 192 } 193 194 static PetscErrorCode MatDenseSetLDA_MPIDense(Mat A, PetscInt lda) 195 { 196 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 197 MatType mtype = MATSEQDENSE; 198 199 PetscFunctionBegin; 200 if (!a->A) { 201 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 202 PetscCall(PetscLayoutSetUp(A->rmap)); 203 PetscCall(PetscLayoutSetUp(A->cmap)); 204 PetscCall(MatCreate(PETSC_COMM_SELF, &a->A)); 205 PetscCall(MatSetSizes(a->A, A->rmap->n, A->cmap->N, A->rmap->n, A->cmap->N)); 206 #if PetscDefined(HAVE_CUDA) 207 PetscBool iscuda; 208 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIDENSECUDA, &iscuda)); 209 if (iscuda) mtype = MATSEQDENSECUDA; 210 #elif PetscDefined(HAVE_HIP) 211 PetscBool iship; 212 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIDENSEHIP, &iship)); 213 if (iship) mtype = MATSEQDENSEHIP; 214 #endif 215 PetscCall(MatSetType(a->A, mtype)); 216 } 217 PetscCall(MatDenseSetLDA(a->A, lda)); 218 PetscFunctionReturn(PETSC_SUCCESS); 219 } 220 221 static PetscErrorCode MatDenseGetArray_MPIDense(Mat A, PetscScalar **array) 222 { 223 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 224 225 PetscFunctionBegin; 226 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 227 PetscCall(MatDenseGetArray(a->A, array)); 228 PetscFunctionReturn(PETSC_SUCCESS); 229 } 230 231 static PetscErrorCode MatDenseGetArrayRead_MPIDense(Mat A, PetscScalar **array) 232 { 233 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 234 235 PetscFunctionBegin; 236 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 237 PetscCall(MatDenseGetArrayRead(a->A, (const PetscScalar **)array)); 238 PetscFunctionReturn(PETSC_SUCCESS); 239 } 240 241 static PetscErrorCode MatDenseGetArrayWrite_MPIDense(Mat A, PetscScalar **array) 242 { 243 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 244 245 PetscFunctionBegin; 246 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 247 PetscCall(MatDenseGetArrayWrite(a->A, array)); 248 PetscFunctionReturn(PETSC_SUCCESS); 249 } 250 251 static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A, const PetscScalar *array) 252 { 253 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 254 255 PetscFunctionBegin; 256 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 257 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 258 PetscCall(MatDensePlaceArray(a->A, array)); 259 PetscFunctionReturn(PETSC_SUCCESS); 260 } 261 262 static PetscErrorCode MatDenseResetArray_MPIDense(Mat A) 263 { 264 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 265 266 PetscFunctionBegin; 267 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 268 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 269 PetscCall(MatDenseResetArray(a->A)); 270 PetscFunctionReturn(PETSC_SUCCESS); 271 } 272 273 static PetscErrorCode MatDenseReplaceArray_MPIDense(Mat A, const PetscScalar *array) 274 { 275 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 276 277 PetscFunctionBegin; 278 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 279 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 280 PetscCall(MatDenseReplaceArray(a->A, array)); 281 PetscFunctionReturn(PETSC_SUCCESS); 282 } 283 284 static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A, IS isrow, IS iscol, MatReuse scall, Mat *B) 285 { 286 Mat_MPIDense *mat = (Mat_MPIDense *)A->data, *newmatd; 287 PetscInt lda, i, j, rstart, rend, nrows, ncols, Ncols, nlrows, nlcols; 288 const PetscInt *irow, *icol; 289 const PetscScalar *v; 290 PetscScalar *bv; 291 Mat newmat; 292 IS iscol_local; 293 MPI_Comm comm_is, comm_mat; 294 295 PetscFunctionBegin; 296 PetscCall(PetscObjectGetComm((PetscObject)A, &comm_mat)); 297 PetscCall(PetscObjectGetComm((PetscObject)iscol, &comm_is)); 298 PetscCheck(comm_mat == comm_is, PETSC_COMM_SELF, PETSC_ERR_ARG_NOTSAMECOMM, "IS communicator must match matrix communicator"); 299 300 PetscCall(ISAllGather(iscol, &iscol_local)); 301 PetscCall(ISGetIndices(isrow, &irow)); 302 PetscCall(ISGetIndices(iscol_local, &icol)); 303 PetscCall(ISGetLocalSize(isrow, &nrows)); 304 PetscCall(ISGetLocalSize(iscol, &ncols)); 305 PetscCall(ISGetSize(iscol, &Ncols)); /* global number of columns, size of iscol_local */ 306 307 /* No parallel redistribution currently supported! Should really check each index set 308 to confirm that it is OK. ... Currently supports only submatrix same partitioning as 309 original matrix! */ 310 311 PetscCall(MatGetLocalSize(A, &nlrows, &nlcols)); 312 PetscCall(MatGetOwnershipRange(A, &rstart, &rend)); 313 314 /* Check submatrix call */ 315 if (scall == MAT_REUSE_MATRIX) { 316 /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */ 317 /* Really need to test rows and column sizes! */ 318 newmat = *B; 319 } else { 320 /* Create and fill new matrix */ 321 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &newmat)); 322 PetscCall(MatSetSizes(newmat, nrows, ncols, PETSC_DECIDE, Ncols)); 323 PetscCall(MatSetType(newmat, ((PetscObject)A)->type_name)); 324 PetscCall(MatMPIDenseSetPreallocation(newmat, NULL)); 325 } 326 327 /* Now extract the data pointers and do the copy, column at a time */ 328 newmatd = (Mat_MPIDense *)newmat->data; 329 PetscCall(MatDenseGetArray(newmatd->A, &bv)); 330 PetscCall(MatDenseGetArrayRead(mat->A, &v)); 331 PetscCall(MatDenseGetLDA(mat->A, &lda)); 332 for (i = 0; i < Ncols; i++) { 333 const PetscScalar *av = v + lda * icol[i]; 334 for (j = 0; j < nrows; j++) *bv++ = av[irow[j] - rstart]; 335 } 336 PetscCall(MatDenseRestoreArrayRead(mat->A, &v)); 337 PetscCall(MatDenseRestoreArray(newmatd->A, &bv)); 338 339 /* Assemble the matrices so that the correct flags are set */ 340 PetscCall(MatAssemblyBegin(newmat, MAT_FINAL_ASSEMBLY)); 341 PetscCall(MatAssemblyEnd(newmat, MAT_FINAL_ASSEMBLY)); 342 343 /* Free work space */ 344 PetscCall(ISRestoreIndices(isrow, &irow)); 345 PetscCall(ISRestoreIndices(iscol_local, &icol)); 346 PetscCall(ISDestroy(&iscol_local)); 347 *B = newmat; 348 PetscFunctionReturn(PETSC_SUCCESS); 349 } 350 351 static PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A, PetscScalar **array) 352 { 353 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 354 355 PetscFunctionBegin; 356 PetscCall(MatDenseRestoreArray(a->A, array)); 357 PetscFunctionReturn(PETSC_SUCCESS); 358 } 359 360 static PetscErrorCode MatDenseRestoreArrayRead_MPIDense(Mat A, PetscScalar **array) 361 { 362 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 363 364 PetscFunctionBegin; 365 PetscCall(MatDenseRestoreArrayRead(a->A, (const PetscScalar **)array)); 366 PetscFunctionReturn(PETSC_SUCCESS); 367 } 368 369 static PetscErrorCode MatDenseRestoreArrayWrite_MPIDense(Mat A, PetscScalar **array) 370 { 371 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 372 373 PetscFunctionBegin; 374 PetscCall(MatDenseRestoreArrayWrite(a->A, array)); 375 PetscFunctionReturn(PETSC_SUCCESS); 376 } 377 378 static PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat, MatAssemblyType mode) 379 { 380 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 381 PetscInt nstash, reallocs; 382 383 PetscFunctionBegin; 384 if (mdn->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS); 385 386 PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range)); 387 PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs)); 388 PetscCall(PetscInfo(mdn->A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 389 PetscFunctionReturn(PETSC_SUCCESS); 390 } 391 392 static PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat, MatAssemblyType mode) 393 { 394 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 395 PetscInt i, *row, *col, flg, j, rstart, ncols; 396 PetscMPIInt n; 397 PetscScalar *val; 398 399 PetscFunctionBegin; 400 if (!mdn->donotstash && !mat->nooffprocentries) { 401 /* wait on receives */ 402 while (1) { 403 PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg)); 404 if (!flg) break; 405 406 for (i = 0; i < n;) { 407 /* Now identify the consecutive vals belonging to the same row */ 408 for (j = i, rstart = row[j]; j < n; j++) { 409 if (row[j] != rstart) break; 410 } 411 if (j < n) ncols = j - i; 412 else ncols = n - i; 413 /* Now assemble all these values with a single function call */ 414 PetscCall(MatSetValues_MPIDense(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode)); 415 i = j; 416 } 417 } 418 PetscCall(MatStashScatterEnd_Private(&mat->stash)); 419 } 420 421 PetscCall(MatAssemblyBegin(mdn->A, mode)); 422 PetscCall(MatAssemblyEnd(mdn->A, mode)); 423 PetscFunctionReturn(PETSC_SUCCESS); 424 } 425 426 static PetscErrorCode MatZeroEntries_MPIDense(Mat A) 427 { 428 Mat_MPIDense *l = (Mat_MPIDense *)A->data; 429 430 PetscFunctionBegin; 431 PetscCall(MatZeroEntries(l->A)); 432 PetscFunctionReturn(PETSC_SUCCESS); 433 } 434 435 static PetscErrorCode MatZeroRows_MPIDense(Mat A, PetscInt n, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 436 { 437 Mat_MPIDense *l = (Mat_MPIDense *)A->data; 438 PetscInt i, len, *lrows; 439 440 PetscFunctionBegin; 441 /* get locally owned rows */ 442 PetscCall(PetscLayoutMapLocal(A->rmap, n, rows, &len, &lrows, NULL)); 443 /* fix right-hand side if needed */ 444 if (x && b) { 445 const PetscScalar *xx; 446 PetscScalar *bb; 447 448 PetscCall(VecGetArrayRead(x, &xx)); 449 PetscCall(VecGetArrayWrite(b, &bb)); 450 for (i = 0; i < len; ++i) bb[lrows[i]] = diag * xx[lrows[i]]; 451 PetscCall(VecRestoreArrayRead(x, &xx)); 452 PetscCall(VecRestoreArrayWrite(b, &bb)); 453 } 454 PetscCall(MatZeroRows(l->A, len, lrows, 0.0, NULL, NULL)); 455 if (diag != 0.0) { 456 Vec d; 457 458 PetscCall(MatCreateVecs(A, NULL, &d)); 459 PetscCall(VecSet(d, diag)); 460 PetscCall(MatDiagonalSet(A, d, INSERT_VALUES)); 461 PetscCall(VecDestroy(&d)); 462 } 463 PetscCall(PetscFree(lrows)); 464 PetscFunctionReturn(PETSC_SUCCESS); 465 } 466 467 PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat, Vec, Vec); 468 PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat, Vec, Vec, Vec); 469 PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat, Vec, Vec); 470 PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat, Vec, Vec, Vec); 471 472 static PetscErrorCode MatMult_MPIDense(Mat mat, Vec xx, Vec yy) 473 { 474 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 475 const PetscScalar *ax; 476 PetscScalar *ay; 477 PetscMemType axmtype, aymtype; 478 479 PetscFunctionBegin; 480 if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat)); 481 PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype)); 482 PetscCall(VecGetArrayAndMemType(mdn->lvec, &ay, &aymtype)); 483 PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE)); 484 PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE)); 485 PetscCall(VecRestoreArrayAndMemType(mdn->lvec, &ay)); 486 PetscCall(VecRestoreArrayReadAndMemType(xx, &ax)); 487 PetscCall((*mdn->A->ops->mult)(mdn->A, mdn->lvec, yy)); 488 PetscFunctionReturn(PETSC_SUCCESS); 489 } 490 491 static PetscErrorCode MatMultAddColumnRange_MPIDense(Mat mat, Vec xx, Vec yy, Vec zz, PetscInt c_start, PetscInt c_end) 492 { 493 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 494 const PetscScalar *ax; 495 PetscScalar *ay; 496 PetscMemType axmtype, aymtype; 497 498 PetscFunctionBegin; 499 if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat)); 500 PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype)); 501 PetscCall(VecGetArrayAndMemType(mdn->lvec, &ay, &aymtype)); 502 PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE)); 503 PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE)); 504 PetscCall(VecRestoreArrayAndMemType(mdn->lvec, &ay)); 505 PetscCall(VecRestoreArrayReadAndMemType(xx, &ax)); 506 PetscUseMethod(mdn->A, "MatMultAddColumnRange_C", (Mat, Vec, Vec, Vec, PetscInt, PetscInt), (mdn->A, mdn->lvec, yy, zz, c_start, c_end)); 507 PetscFunctionReturn(PETSC_SUCCESS); 508 } 509 510 static PetscErrorCode MatMultAdd_MPIDense(Mat mat, Vec xx, Vec yy, Vec zz) 511 { 512 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 513 const PetscScalar *ax; 514 PetscScalar *ay; 515 PetscMemType axmtype, aymtype; 516 517 PetscFunctionBegin; 518 if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat)); 519 PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype)); 520 PetscCall(VecGetArrayAndMemType(mdn->lvec, &ay, &aymtype)); 521 PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE)); 522 PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE)); 523 PetscCall(VecRestoreArrayAndMemType(mdn->lvec, &ay)); 524 PetscCall(VecRestoreArrayReadAndMemType(xx, &ax)); 525 PetscCall((*mdn->A->ops->multadd)(mdn->A, mdn->lvec, yy, zz)); 526 PetscFunctionReturn(PETSC_SUCCESS); 527 } 528 529 static PetscErrorCode MatMultHermitianTransposeColumnRange_MPIDense(Mat A, Vec xx, Vec yy, PetscInt c_start, PetscInt c_end) 530 { 531 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 532 const PetscScalar *ax; 533 PetscScalar *ay; 534 PetscMemType axmtype, aymtype; 535 536 PetscFunctionBegin; 537 if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A)); 538 PetscCall(VecSet(yy, 0.0)); 539 PetscUseMethod(a->A, "MatMultHermitianTransposeColumnRange_C", (Mat, Vec, Vec, PetscInt, PetscInt), (a->A, xx, a->lvec, c_start, c_end)); 540 PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype)); 541 PetscCall(VecGetArrayAndMemType(yy, &ay, &aymtype)); 542 PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM)); 543 PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM)); 544 PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax)); 545 PetscCall(VecRestoreArrayAndMemType(yy, &ay)); 546 PetscFunctionReturn(PETSC_SUCCESS); 547 } 548 549 static PetscErrorCode MatMultTransposeKernel_MPIDense(Mat A, Vec xx, Vec yy, PetscBool herm) 550 { 551 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 552 const PetscScalar *ax; 553 PetscScalar *ay; 554 PetscMemType axmtype, aymtype; 555 556 PetscFunctionBegin; 557 if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A)); 558 PetscCall(VecSet(yy, 0.0)); 559 if (herm) PetscCall((*a->A->ops->multhermitiantranspose)(a->A, xx, a->lvec)); 560 else PetscCall((*a->A->ops->multtranspose)(a->A, xx, a->lvec)); 561 PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype)); 562 PetscCall(VecGetArrayAndMemType(yy, &ay, &aymtype)); 563 PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM)); 564 PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM)); 565 PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax)); 566 PetscCall(VecRestoreArrayAndMemType(yy, &ay)); 567 PetscFunctionReturn(PETSC_SUCCESS); 568 } 569 570 static PetscErrorCode MatMultHermitianTransposeAddColumnRange_MPIDense(Mat A, Vec xx, Vec yy, Vec zz, PetscInt c_start, PetscInt c_end) 571 { 572 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 573 const PetscScalar *ax; 574 PetscScalar *ay; 575 PetscMemType axmtype, aymtype; 576 577 PetscFunctionBegin; 578 if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A)); 579 PetscCall(VecCopy(yy, zz)); 580 PetscMPIInt rank; 581 PetscCallMPI(MPI_Comm_rank(MPI_COMM_WORLD, &rank)); 582 PetscUseMethod(a->A, "MatMultHermitianTransposeColumnRange_C", (Mat, Vec, Vec, PetscInt, PetscInt), (a->A, xx, a->lvec, c_start, c_end)); 583 PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype)); 584 PetscCall(VecGetArrayAndMemType(zz, &ay, &aymtype)); 585 PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM)); 586 PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM)); 587 PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax)); 588 PetscCall(VecRestoreArrayAndMemType(zz, &ay)); 589 PetscFunctionReturn(PETSC_SUCCESS); 590 } 591 592 static PetscErrorCode MatMultTransposeAddKernel_MPIDense(Mat A, Vec xx, Vec yy, Vec zz, PetscBool herm) 593 { 594 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 595 const PetscScalar *ax; 596 PetscScalar *ay; 597 PetscMemType axmtype, aymtype; 598 599 PetscFunctionBegin; 600 if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A)); 601 PetscCall(VecCopy(yy, zz)); 602 if (herm) PetscCall((*a->A->ops->multhermitiantranspose)(a->A, xx, a->lvec)); 603 else PetscCall((*a->A->ops->multtranspose)(a->A, xx, a->lvec)); 604 PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype)); 605 PetscCall(VecGetArrayAndMemType(zz, &ay, &aymtype)); 606 PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM)); 607 PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM)); 608 PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax)); 609 PetscCall(VecRestoreArrayAndMemType(zz, &ay)); 610 PetscFunctionReturn(PETSC_SUCCESS); 611 } 612 613 static PetscErrorCode MatMultTranspose_MPIDense(Mat A, Vec xx, Vec yy) 614 { 615 PetscFunctionBegin; 616 PetscCall(MatMultTransposeKernel_MPIDense(A, xx, yy, PETSC_FALSE)); 617 PetscFunctionReturn(PETSC_SUCCESS); 618 } 619 620 static PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A, Vec xx, Vec yy, Vec zz) 621 { 622 PetscFunctionBegin; 623 PetscCall(MatMultTransposeAddKernel_MPIDense(A, xx, yy, zz, PETSC_FALSE)); 624 PetscFunctionReturn(PETSC_SUCCESS); 625 } 626 627 static PetscErrorCode MatMultHermitianTranspose_MPIDense(Mat A, Vec xx, Vec yy) 628 { 629 PetscFunctionBegin; 630 PetscCall(MatMultTransposeKernel_MPIDense(A, xx, yy, PETSC_TRUE)); 631 PetscFunctionReturn(PETSC_SUCCESS); 632 } 633 634 static PetscErrorCode MatMultHermitianTransposeAdd_MPIDense(Mat A, Vec xx, Vec yy, Vec zz) 635 { 636 PetscFunctionBegin; 637 PetscCall(MatMultTransposeAddKernel_MPIDense(A, xx, yy, zz, PETSC_TRUE)); 638 PetscFunctionReturn(PETSC_SUCCESS); 639 } 640 641 PetscErrorCode MatGetDiagonal_MPIDense(Mat A, Vec v) 642 { 643 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 644 PetscInt lda, len, i, nl, ng, m = A->rmap->n, radd; 645 PetscScalar *x; 646 const PetscScalar *av; 647 648 PetscFunctionBegin; 649 PetscCall(VecGetArray(v, &x)); 650 PetscCall(VecGetSize(v, &ng)); 651 PetscCheck(ng == A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming mat and vec"); 652 PetscCall(VecGetLocalSize(v, &nl)); 653 len = PetscMin(a->A->rmap->n, a->A->cmap->n); 654 radd = A->rmap->rstart * m; 655 PetscCall(MatDenseGetArrayRead(a->A, &av)); 656 PetscCall(MatDenseGetLDA(a->A, &lda)); 657 for (i = 0; i < len; i++) x[i] = av[radd + i * lda + i]; 658 PetscCall(MatDenseRestoreArrayRead(a->A, &av)); 659 if (nl - i > 0) PetscCall(PetscArrayzero(x + i, nl - i)); 660 PetscCall(VecRestoreArray(v, &x)); 661 PetscFunctionReturn(PETSC_SUCCESS); 662 } 663 664 static PetscErrorCode MatDestroy_MPIDense(Mat mat) 665 { 666 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 667 668 PetscFunctionBegin; 669 PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N)); 670 PetscCall(MatStashDestroy_Private(&mat->stash)); 671 PetscCheck(!mdn->vecinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 672 PetscCheck(!mdn->matinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 673 PetscCall(MatDestroy(&mdn->A)); 674 PetscCall(VecDestroy(&mdn->lvec)); 675 PetscCall(PetscSFDestroy(&mdn->Mvctx)); 676 PetscCall(VecDestroy(&mdn->cvec)); 677 PetscCall(MatDestroy(&mdn->cmat)); 678 679 PetscCall(PetscFree(mat->data)); 680 PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL)); 681 682 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetLDA_C", NULL)); 683 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseSetLDA_C", NULL)); 684 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArray_C", NULL)); 685 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArray_C", NULL)); 686 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayRead_C", NULL)); 687 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayRead_C", NULL)); 688 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayWrite_C", NULL)); 689 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayWrite_C", NULL)); 690 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDensePlaceArray_C", NULL)); 691 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseResetArray_C", NULL)); 692 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseReplaceArray_C", NULL)); 693 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", NULL)); 694 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpiaij_C", NULL)); 695 #if defined(PETSC_HAVE_ELEMENTAL) 696 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_elemental_C", NULL)); 697 #endif 698 #if defined(PETSC_HAVE_SCALAPACK) 699 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_scalapack_C", NULL)); 700 #endif 701 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIDenseSetPreallocation_C", NULL)); 702 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidense_C", NULL)); 703 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaij_C", NULL)); 704 #if defined(PETSC_HAVE_CUDA) 705 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidense_C", NULL)); 706 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijcusparse_C", NULL)); 707 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensecuda_C", NULL)); 708 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidensecuda_mpidense_C", NULL)); 709 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidensecuda_C", NULL)); 710 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C", NULL)); 711 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensecuda_mpiaij_C", NULL)); 712 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C", NULL)); 713 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArray_C", NULL)); 714 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArrayRead_C", NULL)); 715 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArrayWrite_C", NULL)); 716 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArray_C", NULL)); 717 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArrayRead_C", NULL)); 718 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArrayWrite_C", NULL)); 719 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAPlaceArray_C", NULL)); 720 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAResetArray_C", NULL)); 721 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAReplaceArray_C", NULL)); 722 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDASetPreallocation_C", NULL)); 723 #endif 724 #if defined(PETSC_HAVE_HIP) 725 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidense_C", NULL)); 726 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijhipsparse_C", NULL)); 727 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensehip_C", NULL)); 728 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidensehip_mpidense_C", NULL)); 729 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidensehip_C", NULL)); 730 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidensehip_C", NULL)); 731 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensehip_mpiaij_C", NULL)); 732 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensehip_mpiaijhipsparse_C", NULL)); 733 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArray_C", NULL)); 734 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArrayRead_C", NULL)); 735 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArrayWrite_C", NULL)); 736 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArray_C", NULL)); 737 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArrayRead_C", NULL)); 738 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArrayWrite_C", NULL)); 739 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPPlaceArray_C", NULL)); 740 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPResetArray_C", NULL)); 741 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPReplaceArray_C", NULL)); 742 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPSetPreallocation_C", NULL)); 743 #endif 744 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumn_C", NULL)); 745 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumn_C", NULL)); 746 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", NULL)); 747 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", NULL)); 748 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", NULL)); 749 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", NULL)); 750 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", NULL)); 751 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", NULL)); 752 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetSubMatrix_C", NULL)); 753 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreSubMatrix_C", NULL)); 754 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultAddColumnRange_C", NULL)); 755 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultHermitianTransposeColumnRange_C", NULL)); 756 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultHermitianTransposeAddColumnRange_C", NULL)); 757 758 PetscCall(PetscObjectCompose((PetscObject)mat, "DiagonalBlock", NULL)); 759 PetscFunctionReturn(PETSC_SUCCESS); 760 } 761 762 #include <petscdraw.h> 763 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat, PetscViewer viewer) 764 { 765 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 766 PetscMPIInt rank; 767 PetscViewerType vtype; 768 PetscBool iascii, isdraw; 769 PetscViewer sviewer; 770 PetscViewerFormat format; 771 772 PetscFunctionBegin; 773 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 774 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 775 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 776 if (iascii) { 777 PetscCall(PetscViewerGetType(viewer, &vtype)); 778 PetscCall(PetscViewerGetFormat(viewer, &format)); 779 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 780 MatInfo info; 781 PetscCall(MatGetInfo(mat, MAT_LOCAL, &info)); 782 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 783 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %" PetscInt_FMT " \n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, 784 (PetscInt)info.memory)); 785 PetscCall(PetscViewerFlush(viewer)); 786 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 787 if (mdn->Mvctx) PetscCall(PetscSFView(mdn->Mvctx, viewer)); 788 PetscFunctionReturn(PETSC_SUCCESS); 789 } else if (format == PETSC_VIEWER_ASCII_INFO) { 790 PetscFunctionReturn(PETSC_SUCCESS); 791 } 792 } else if (isdraw) { 793 PetscDraw draw; 794 PetscBool isnull; 795 796 PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw)); 797 PetscCall(PetscDrawIsNull(draw, &isnull)); 798 if (isnull) PetscFunctionReturn(PETSC_SUCCESS); 799 } 800 801 { 802 /* assemble the entire matrix onto first processor. */ 803 Mat A; 804 PetscInt M = mat->rmap->N, N = mat->cmap->N, m, row, i, nz; 805 PetscInt *cols; 806 PetscScalar *vals; 807 808 PetscCall(MatCreate(PetscObjectComm((PetscObject)mat), &A)); 809 if (rank == 0) { 810 PetscCall(MatSetSizes(A, M, N, M, N)); 811 } else { 812 PetscCall(MatSetSizes(A, 0, 0, M, N)); 813 } 814 /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */ 815 PetscCall(MatSetType(A, MATMPIDENSE)); 816 PetscCall(MatMPIDenseSetPreallocation(A, NULL)); 817 818 /* Copy the matrix ... This isn't the most efficient means, 819 but it's quick for now */ 820 A->insertmode = INSERT_VALUES; 821 822 row = mat->rmap->rstart; 823 m = mdn->A->rmap->n; 824 for (i = 0; i < m; i++) { 825 PetscCall(MatGetRow_MPIDense(mat, row, &nz, &cols, &vals)); 826 PetscCall(MatSetValues_MPIDense(A, 1, &row, nz, cols, vals, INSERT_VALUES)); 827 PetscCall(MatRestoreRow_MPIDense(mat, row, &nz, &cols, &vals)); 828 row++; 829 } 830 831 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 832 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 833 PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 834 if (rank == 0) { 835 PetscCall(PetscObjectSetName((PetscObject)((Mat_MPIDense *)A->data)->A, ((PetscObject)mat)->name)); 836 PetscCall(MatView_SeqDense(((Mat_MPIDense *)A->data)->A, sviewer)); 837 } 838 PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 839 PetscCall(MatDestroy(&A)); 840 } 841 PetscFunctionReturn(PETSC_SUCCESS); 842 } 843 844 static PetscErrorCode MatView_MPIDense(Mat mat, PetscViewer viewer) 845 { 846 PetscBool iascii, isbinary, isdraw, issocket; 847 848 PetscFunctionBegin; 849 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 850 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 851 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket)); 852 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 853 854 if (iascii || issocket || isdraw) { 855 PetscCall(MatView_MPIDense_ASCIIorDraworSocket(mat, viewer)); 856 } else if (isbinary) PetscCall(MatView_Dense_Binary(mat, viewer)); 857 PetscFunctionReturn(PETSC_SUCCESS); 858 } 859 860 static PetscErrorCode MatGetInfo_MPIDense(Mat A, MatInfoType flag, MatInfo *info) 861 { 862 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 863 Mat mdn = mat->A; 864 PetscLogDouble isend[5], irecv[5]; 865 866 PetscFunctionBegin; 867 info->block_size = 1.0; 868 869 PetscCall(MatGetInfo(mdn, MAT_LOCAL, info)); 870 871 isend[0] = info->nz_used; 872 isend[1] = info->nz_allocated; 873 isend[2] = info->nz_unneeded; 874 isend[3] = info->memory; 875 isend[4] = info->mallocs; 876 if (flag == MAT_LOCAL) { 877 info->nz_used = isend[0]; 878 info->nz_allocated = isend[1]; 879 info->nz_unneeded = isend[2]; 880 info->memory = isend[3]; 881 info->mallocs = isend[4]; 882 } else if (flag == MAT_GLOBAL_MAX) { 883 PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)A))); 884 885 info->nz_used = irecv[0]; 886 info->nz_allocated = irecv[1]; 887 info->nz_unneeded = irecv[2]; 888 info->memory = irecv[3]; 889 info->mallocs = irecv[4]; 890 } else if (flag == MAT_GLOBAL_SUM) { 891 PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)A))); 892 893 info->nz_used = irecv[0]; 894 info->nz_allocated = irecv[1]; 895 info->nz_unneeded = irecv[2]; 896 info->memory = irecv[3]; 897 info->mallocs = irecv[4]; 898 } 899 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 900 info->fill_ratio_needed = 0; 901 info->factor_mallocs = 0; 902 PetscFunctionReturn(PETSC_SUCCESS); 903 } 904 905 static PetscErrorCode MatSetOption_MPIDense(Mat A, MatOption op, PetscBool flg) 906 { 907 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 908 909 PetscFunctionBegin; 910 switch (op) { 911 case MAT_NEW_NONZERO_LOCATIONS: 912 case MAT_NEW_NONZERO_LOCATION_ERR: 913 case MAT_NEW_NONZERO_ALLOCATION_ERR: 914 MatCheckPreallocated(A, 1); 915 PetscCall(MatSetOption(a->A, op, flg)); 916 break; 917 case MAT_ROW_ORIENTED: 918 MatCheckPreallocated(A, 1); 919 a->roworiented = flg; 920 PetscCall(MatSetOption(a->A, op, flg)); 921 break; 922 case MAT_FORCE_DIAGONAL_ENTRIES: 923 case MAT_KEEP_NONZERO_PATTERN: 924 case MAT_USE_HASH_TABLE: 925 case MAT_SORTED_FULL: 926 PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op])); 927 break; 928 case MAT_IGNORE_OFF_PROC_ENTRIES: 929 a->donotstash = flg; 930 break; 931 case MAT_SYMMETRIC: 932 case MAT_STRUCTURALLY_SYMMETRIC: 933 case MAT_HERMITIAN: 934 case MAT_SYMMETRY_ETERNAL: 935 case MAT_STRUCTURAL_SYMMETRY_ETERNAL: 936 case MAT_SPD: 937 case MAT_IGNORE_LOWER_TRIANGULAR: 938 case MAT_IGNORE_ZERO_ENTRIES: 939 case MAT_SPD_ETERNAL: 940 /* if the diagonal matrix is square it inherits some of the properties above */ 941 PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op])); 942 break; 943 default: 944 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %s", MatOptions[op]); 945 } 946 PetscFunctionReturn(PETSC_SUCCESS); 947 } 948 949 static PetscErrorCode MatDiagonalScale_MPIDense(Mat A, Vec ll, Vec rr) 950 { 951 Mat_MPIDense *mdn = (Mat_MPIDense *)A->data; 952 const PetscScalar *l; 953 PetscScalar x, *v, *vv, *r; 954 PetscInt i, j, s2a, s3a, s2, s3, m = mdn->A->rmap->n, n = mdn->A->cmap->n, lda; 955 956 PetscFunctionBegin; 957 PetscCall(MatDenseGetArray(mdn->A, &vv)); 958 PetscCall(MatDenseGetLDA(mdn->A, &lda)); 959 PetscCall(MatGetLocalSize(A, &s2, &s3)); 960 if (ll) { 961 PetscCall(VecGetLocalSize(ll, &s2a)); 962 PetscCheck(s2a == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Left scaling vector non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT, s2a, s2); 963 PetscCall(VecGetArrayRead(ll, &l)); 964 for (i = 0; i < m; i++) { 965 x = l[i]; 966 v = vv + i; 967 for (j = 0; j < n; j++) { 968 (*v) *= x; 969 v += lda; 970 } 971 } 972 PetscCall(VecRestoreArrayRead(ll, &l)); 973 PetscCall(PetscLogFlops(1.0 * n * m)); 974 } 975 if (rr) { 976 const PetscScalar *ar; 977 978 PetscCall(VecGetLocalSize(rr, &s3a)); 979 PetscCheck(s3a == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Right scaling vec non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT ".", s3a, s3); 980 PetscCall(VecGetArrayRead(rr, &ar)); 981 if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A)); 982 PetscCall(VecGetArray(mdn->lvec, &r)); 983 PetscCall(PetscSFBcastBegin(mdn->Mvctx, MPIU_SCALAR, ar, r, MPI_REPLACE)); 984 PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ar, r, MPI_REPLACE)); 985 PetscCall(VecRestoreArrayRead(rr, &ar)); 986 for (i = 0; i < n; i++) { 987 x = r[i]; 988 v = vv + i * lda; 989 for (j = 0; j < m; j++) (*v++) *= x; 990 } 991 PetscCall(VecRestoreArray(mdn->lvec, &r)); 992 PetscCall(PetscLogFlops(1.0 * n * m)); 993 } 994 PetscCall(MatDenseRestoreArray(mdn->A, &vv)); 995 PetscFunctionReturn(PETSC_SUCCESS); 996 } 997 998 static PetscErrorCode MatNorm_MPIDense(Mat A, NormType type, PetscReal *nrm) 999 { 1000 Mat_MPIDense *mdn = (Mat_MPIDense *)A->data; 1001 PetscInt i, j; 1002 PetscMPIInt size; 1003 PetscReal sum = 0.0; 1004 const PetscScalar *av, *v; 1005 1006 PetscFunctionBegin; 1007 PetscCall(MatDenseGetArrayRead(mdn->A, &av)); 1008 v = av; 1009 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 1010 if (size == 1) { 1011 PetscCall(MatNorm(mdn->A, type, nrm)); 1012 } else { 1013 if (type == NORM_FROBENIUS) { 1014 for (i = 0; i < mdn->A->cmap->n * mdn->A->rmap->n; i++) { 1015 sum += PetscRealPart(PetscConj(*v) * (*v)); 1016 v++; 1017 } 1018 PetscCallMPI(MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A))); 1019 *nrm = PetscSqrtReal(*nrm); 1020 PetscCall(PetscLogFlops(2.0 * mdn->A->cmap->n * mdn->A->rmap->n)); 1021 } else if (type == NORM_1) { 1022 PetscReal *tmp; 1023 1024 PetscCall(PetscCalloc1(A->cmap->N, &tmp)); 1025 *nrm = 0.0; 1026 v = av; 1027 for (j = 0; j < mdn->A->cmap->n; j++) { 1028 for (i = 0; i < mdn->A->rmap->n; i++) { 1029 tmp[j] += PetscAbsScalar(*v); 1030 v++; 1031 } 1032 } 1033 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, tmp, A->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A))); 1034 for (j = 0; j < A->cmap->N; j++) { 1035 if (tmp[j] > *nrm) *nrm = tmp[j]; 1036 } 1037 PetscCall(PetscFree(tmp)); 1038 PetscCall(PetscLogFlops(A->cmap->n * A->rmap->n)); 1039 } else if (type == NORM_INFINITY) { /* max row norm */ 1040 PetscCall(MatNorm(mdn->A, type, nrm)); 1041 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, nrm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A))); 1042 } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "No support for two norm"); 1043 } 1044 PetscCall(MatDenseRestoreArrayRead(mdn->A, &av)); 1045 PetscFunctionReturn(PETSC_SUCCESS); 1046 } 1047 1048 static PetscErrorCode MatTranspose_MPIDense(Mat A, MatReuse reuse, Mat *matout) 1049 { 1050 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1051 Mat B; 1052 PetscInt M = A->rmap->N, N = A->cmap->N, m, n, *rwork, rstart = A->rmap->rstart; 1053 PetscInt j, i, lda; 1054 PetscScalar *v; 1055 1056 PetscFunctionBegin; 1057 if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout)); 1058 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) { 1059 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 1060 PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M)); 1061 PetscCall(MatSetType(B, ((PetscObject)A)->type_name)); 1062 PetscCall(MatMPIDenseSetPreallocation(B, NULL)); 1063 } else B = *matout; 1064 1065 m = a->A->rmap->n; 1066 n = a->A->cmap->n; 1067 PetscCall(MatDenseGetArrayRead(a->A, (const PetscScalar **)&v)); 1068 PetscCall(MatDenseGetLDA(a->A, &lda)); 1069 PetscCall(PetscMalloc1(m, &rwork)); 1070 for (i = 0; i < m; i++) rwork[i] = rstart + i; 1071 for (j = 0; j < n; j++) { 1072 PetscCall(MatSetValues(B, 1, &j, m, rwork, v, INSERT_VALUES)); 1073 v = PetscSafePointerPlusOffset(v, lda); 1074 } 1075 PetscCall(MatDenseRestoreArrayRead(a->A, (const PetscScalar **)&v)); 1076 PetscCall(PetscFree(rwork)); 1077 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 1078 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 1079 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 1080 *matout = B; 1081 } else { 1082 PetscCall(MatHeaderMerge(A, &B)); 1083 } 1084 PetscFunctionReturn(PETSC_SUCCESS); 1085 } 1086 1087 static PetscErrorCode MatDuplicate_MPIDense(Mat, MatDuplicateOption, Mat *); 1088 PETSC_INTERN PetscErrorCode MatScale_MPIDense(Mat, PetscScalar); 1089 1090 static PetscErrorCode MatSetUp_MPIDense(Mat A) 1091 { 1092 PetscFunctionBegin; 1093 PetscCall(PetscLayoutSetUp(A->rmap)); 1094 PetscCall(PetscLayoutSetUp(A->cmap)); 1095 if (!A->preallocated) PetscCall(MatMPIDenseSetPreallocation(A, NULL)); 1096 PetscFunctionReturn(PETSC_SUCCESS); 1097 } 1098 1099 static PetscErrorCode MatAXPY_MPIDense(Mat Y, PetscScalar alpha, Mat X, MatStructure str) 1100 { 1101 Mat_MPIDense *A = (Mat_MPIDense *)Y->data, *B = (Mat_MPIDense *)X->data; 1102 1103 PetscFunctionBegin; 1104 PetscCall(MatAXPY(A->A, alpha, B->A, str)); 1105 PetscFunctionReturn(PETSC_SUCCESS); 1106 } 1107 1108 static PetscErrorCode MatConjugate_MPIDense(Mat mat) 1109 { 1110 Mat_MPIDense *a = (Mat_MPIDense *)mat->data; 1111 1112 PetscFunctionBegin; 1113 PetscCall(MatConjugate(a->A)); 1114 PetscFunctionReturn(PETSC_SUCCESS); 1115 } 1116 1117 static PetscErrorCode MatRealPart_MPIDense(Mat A) 1118 { 1119 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1120 1121 PetscFunctionBegin; 1122 PetscCall(MatRealPart(a->A)); 1123 PetscFunctionReturn(PETSC_SUCCESS); 1124 } 1125 1126 static PetscErrorCode MatImaginaryPart_MPIDense(Mat A) 1127 { 1128 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1129 1130 PetscFunctionBegin; 1131 PetscCall(MatImaginaryPart(a->A)); 1132 PetscFunctionReturn(PETSC_SUCCESS); 1133 } 1134 1135 static PetscErrorCode MatGetColumnVector_MPIDense(Mat A, Vec v, PetscInt col) 1136 { 1137 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1138 1139 PetscFunctionBegin; 1140 PetscCheck(a->A, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Missing local matrix"); 1141 PetscCheck(a->A->ops->getcolumnvector, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Missing get column operation"); 1142 PetscCall((*a->A->ops->getcolumnvector)(a->A, v, col)); 1143 PetscFunctionReturn(PETSC_SUCCESS); 1144 } 1145 1146 PETSC_INTERN PetscErrorCode MatGetColumnReductions_SeqDense(Mat, PetscInt, PetscReal *); 1147 1148 static PetscErrorCode MatGetColumnReductions_MPIDense(Mat A, PetscInt type, PetscReal *reductions) 1149 { 1150 PetscInt i, m, n; 1151 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1152 1153 PetscFunctionBegin; 1154 PetscCall(MatGetSize(A, &m, &n)); 1155 if (type == REDUCTION_MEAN_REALPART) { 1156 PetscCall(MatGetColumnReductions_SeqDense(a->A, (PetscInt)REDUCTION_SUM_REALPART, reductions)); 1157 } else if (type == REDUCTION_MEAN_IMAGINARYPART) { 1158 PetscCall(MatGetColumnReductions_SeqDense(a->A, (PetscInt)REDUCTION_SUM_IMAGINARYPART, reductions)); 1159 } else { 1160 PetscCall(MatGetColumnReductions_SeqDense(a->A, type, reductions)); 1161 } 1162 if (type == NORM_2) { 1163 for (i = 0; i < n; i++) reductions[i] *= reductions[i]; 1164 } 1165 if (type == NORM_INFINITY) { 1166 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, reductions, n, MPIU_REAL, MPIU_MAX, A->hdr.comm)); 1167 } else { 1168 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, reductions, n, MPIU_REAL, MPIU_SUM, A->hdr.comm)); 1169 } 1170 if (type == NORM_2) { 1171 for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]); 1172 } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 1173 for (i = 0; i < n; i++) reductions[i] /= m; 1174 } 1175 PetscFunctionReturn(PETSC_SUCCESS); 1176 } 1177 1178 static PetscErrorCode MatSetRandom_MPIDense(Mat x, PetscRandom rctx) 1179 { 1180 Mat_MPIDense *d = (Mat_MPIDense *)x->data; 1181 1182 PetscFunctionBegin; 1183 PetscCall(MatSetRandom(d->A, rctx)); 1184 #if defined(PETSC_HAVE_DEVICE) 1185 x->offloadmask = d->A->offloadmask; 1186 #endif 1187 PetscFunctionReturn(PETSC_SUCCESS); 1188 } 1189 1190 static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A, PetscBool *missing, PetscInt *d) 1191 { 1192 PetscFunctionBegin; 1193 *missing = PETSC_FALSE; 1194 PetscFunctionReturn(PETSC_SUCCESS); 1195 } 1196 1197 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat, Mat, PetscReal, Mat); 1198 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat, Mat, Mat); 1199 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat, Mat, PetscReal, Mat); 1200 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat, Mat, Mat); 1201 static PetscErrorCode MatEqual_MPIDense(Mat, Mat, PetscBool *); 1202 static PetscErrorCode MatLoad_MPIDense(Mat, PetscViewer); 1203 static PetscErrorCode MatProductSetFromOptions_MPIDense(Mat); 1204 1205 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense, 1206 MatGetRow_MPIDense, 1207 MatRestoreRow_MPIDense, 1208 MatMult_MPIDense, 1209 /* 4*/ MatMultAdd_MPIDense, 1210 MatMultTranspose_MPIDense, 1211 MatMultTransposeAdd_MPIDense, 1212 NULL, 1213 NULL, 1214 NULL, 1215 /* 10*/ NULL, 1216 NULL, 1217 NULL, 1218 NULL, 1219 MatTranspose_MPIDense, 1220 /* 15*/ MatGetInfo_MPIDense, 1221 MatEqual_MPIDense, 1222 MatGetDiagonal_MPIDense, 1223 MatDiagonalScale_MPIDense, 1224 MatNorm_MPIDense, 1225 /* 20*/ MatAssemblyBegin_MPIDense, 1226 MatAssemblyEnd_MPIDense, 1227 MatSetOption_MPIDense, 1228 MatZeroEntries_MPIDense, 1229 /* 24*/ MatZeroRows_MPIDense, 1230 NULL, 1231 NULL, 1232 NULL, 1233 NULL, 1234 /* 29*/ MatSetUp_MPIDense, 1235 NULL, 1236 NULL, 1237 MatGetDiagonalBlock_MPIDense, 1238 NULL, 1239 /* 34*/ MatDuplicate_MPIDense, 1240 NULL, 1241 NULL, 1242 NULL, 1243 NULL, 1244 /* 39*/ MatAXPY_MPIDense, 1245 MatCreateSubMatrices_MPIDense, 1246 NULL, 1247 MatGetValues_MPIDense, 1248 MatCopy_MPIDense, 1249 /* 44*/ NULL, 1250 MatScale_MPIDense, 1251 MatShift_MPIDense, 1252 NULL, 1253 NULL, 1254 /* 49*/ MatSetRandom_MPIDense, 1255 NULL, 1256 NULL, 1257 NULL, 1258 NULL, 1259 /* 54*/ NULL, 1260 NULL, 1261 NULL, 1262 NULL, 1263 NULL, 1264 /* 59*/ MatCreateSubMatrix_MPIDense, 1265 MatDestroy_MPIDense, 1266 MatView_MPIDense, 1267 NULL, 1268 NULL, 1269 /* 64*/ NULL, 1270 NULL, 1271 NULL, 1272 NULL, 1273 NULL, 1274 /* 69*/ NULL, 1275 NULL, 1276 NULL, 1277 NULL, 1278 NULL, 1279 /* 74*/ NULL, 1280 NULL, 1281 NULL, 1282 NULL, 1283 NULL, 1284 /* 79*/ NULL, 1285 NULL, 1286 NULL, 1287 NULL, 1288 /* 83*/ MatLoad_MPIDense, 1289 NULL, 1290 NULL, 1291 NULL, 1292 NULL, 1293 NULL, 1294 /* 89*/ NULL, 1295 NULL, 1296 NULL, 1297 NULL, 1298 NULL, 1299 /* 94*/ NULL, 1300 NULL, 1301 MatMatTransposeMultSymbolic_MPIDense_MPIDense, 1302 MatMatTransposeMultNumeric_MPIDense_MPIDense, 1303 NULL, 1304 /* 99*/ MatProductSetFromOptions_MPIDense, 1305 NULL, 1306 NULL, 1307 MatConjugate_MPIDense, 1308 NULL, 1309 /*104*/ NULL, 1310 MatRealPart_MPIDense, 1311 MatImaginaryPart_MPIDense, 1312 NULL, 1313 NULL, 1314 /*109*/ NULL, 1315 NULL, 1316 NULL, 1317 MatGetColumnVector_MPIDense, 1318 MatMissingDiagonal_MPIDense, 1319 /*114*/ NULL, 1320 NULL, 1321 NULL, 1322 NULL, 1323 NULL, 1324 /*119*/ NULL, 1325 NULL, 1326 MatMultHermitianTranspose_MPIDense, 1327 MatMultHermitianTransposeAdd_MPIDense, 1328 NULL, 1329 /*124*/ NULL, 1330 MatGetColumnReductions_MPIDense, 1331 NULL, 1332 NULL, 1333 NULL, 1334 /*129*/ NULL, 1335 NULL, 1336 MatTransposeMatMultSymbolic_MPIDense_MPIDense, 1337 MatTransposeMatMultNumeric_MPIDense_MPIDense, 1338 NULL, 1339 /*134*/ NULL, 1340 NULL, 1341 NULL, 1342 NULL, 1343 NULL, 1344 /*139*/ NULL, 1345 NULL, 1346 NULL, 1347 NULL, 1348 NULL, 1349 MatCreateMPIMatConcatenateSeqMat_MPIDense, 1350 /*145*/ NULL, 1351 NULL, 1352 NULL, 1353 NULL, 1354 NULL, 1355 /*150*/ NULL, 1356 NULL, 1357 NULL, 1358 NULL, 1359 NULL, 1360 /*155*/ NULL, 1361 NULL}; 1362 1363 static PetscErrorCode MatMPIDenseSetPreallocation_MPIDense(Mat mat, PetscScalar *data) 1364 { 1365 Mat_MPIDense *a = (Mat_MPIDense *)mat->data; 1366 MatType mtype = MATSEQDENSE; 1367 1368 PetscFunctionBegin; 1369 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1370 PetscCall(PetscLayoutSetUp(mat->rmap)); 1371 PetscCall(PetscLayoutSetUp(mat->cmap)); 1372 if (!a->A) { 1373 PetscCall(MatCreate(PETSC_COMM_SELF, &a->A)); 1374 PetscCall(MatSetSizes(a->A, mat->rmap->n, mat->cmap->N, mat->rmap->n, mat->cmap->N)); 1375 } 1376 #if defined(PETSC_HAVE_CUDA) 1377 PetscBool iscuda; 1378 PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPIDENSECUDA, &iscuda)); 1379 if (iscuda) mtype = MATSEQDENSECUDA; 1380 #endif 1381 #if defined(PETSC_HAVE_HIP) 1382 PetscBool iship; 1383 PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPIDENSEHIP, &iship)); 1384 if (iship) mtype = MATSEQDENSEHIP; 1385 #endif 1386 PetscCall(MatSetType(a->A, mtype)); 1387 PetscCall(MatSeqDenseSetPreallocation(a->A, data)); 1388 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 1389 mat->offloadmask = a->A->offloadmask; 1390 #endif 1391 mat->preallocated = PETSC_TRUE; 1392 mat->assembled = PETSC_TRUE; 1393 PetscFunctionReturn(PETSC_SUCCESS); 1394 } 1395 1396 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIDense(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 1397 { 1398 Mat B, C; 1399 1400 PetscFunctionBegin; 1401 PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &C)); 1402 PetscCall(MatConvert_SeqAIJ_SeqDense(C, MATSEQDENSE, MAT_INITIAL_MATRIX, &B)); 1403 PetscCall(MatDestroy(&C)); 1404 if (reuse == MAT_REUSE_MATRIX) { 1405 C = *newmat; 1406 } else C = NULL; 1407 PetscCall(MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A), B, A->cmap->n, !C ? MAT_INITIAL_MATRIX : MAT_REUSE_MATRIX, &C)); 1408 PetscCall(MatDestroy(&B)); 1409 if (reuse == MAT_INPLACE_MATRIX) { 1410 PetscCall(MatHeaderReplace(A, &C)); 1411 } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C; 1412 PetscFunctionReturn(PETSC_SUCCESS); 1413 } 1414 1415 static PetscErrorCode MatConvert_MPIDense_MPIAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 1416 { 1417 Mat B, C; 1418 1419 PetscFunctionBegin; 1420 PetscCall(MatDenseGetLocalMatrix(A, &C)); 1421 PetscCall(MatConvert_SeqDense_SeqAIJ(C, MATSEQAIJ, MAT_INITIAL_MATRIX, &B)); 1422 if (reuse == MAT_REUSE_MATRIX) { 1423 C = *newmat; 1424 } else C = NULL; 1425 PetscCall(MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A), B, A->cmap->n, !C ? MAT_INITIAL_MATRIX : MAT_REUSE_MATRIX, &C)); 1426 PetscCall(MatDestroy(&B)); 1427 if (reuse == MAT_INPLACE_MATRIX) { 1428 PetscCall(MatHeaderReplace(A, &C)); 1429 } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C; 1430 PetscFunctionReturn(PETSC_SUCCESS); 1431 } 1432 1433 #if defined(PETSC_HAVE_ELEMENTAL) 1434 PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 1435 { 1436 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1437 Mat mat_elemental; 1438 PetscScalar *v; 1439 PetscInt m = A->rmap->n, N = A->cmap->N, rstart = A->rmap->rstart, i, *rows, *cols, lda; 1440 1441 PetscFunctionBegin; 1442 if (reuse == MAT_REUSE_MATRIX) { 1443 mat_elemental = *newmat; 1444 PetscCall(MatZeroEntries(*newmat)); 1445 } else { 1446 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental)); 1447 PetscCall(MatSetSizes(mat_elemental, PETSC_DECIDE, PETSC_DECIDE, A->rmap->N, A->cmap->N)); 1448 PetscCall(MatSetType(mat_elemental, MATELEMENTAL)); 1449 PetscCall(MatSetUp(mat_elemental)); 1450 PetscCall(MatSetOption(mat_elemental, MAT_ROW_ORIENTED, PETSC_FALSE)); 1451 } 1452 1453 PetscCall(PetscMalloc2(m, &rows, N, &cols)); 1454 for (i = 0; i < N; i++) cols[i] = i; 1455 for (i = 0; i < m; i++) rows[i] = rstart + i; 1456 1457 /* PETSc-Elemental interface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */ 1458 PetscCall(MatDenseGetArray(A, &v)); 1459 PetscCall(MatDenseGetLDA(a->A, &lda)); 1460 if (lda == m) PetscCall(MatSetValues(mat_elemental, m, rows, N, cols, v, ADD_VALUES)); 1461 else { 1462 for (i = 0; i < N; i++) PetscCall(MatSetValues(mat_elemental, m, rows, 1, &i, v + lda * i, ADD_VALUES)); 1463 } 1464 PetscCall(MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY)); 1465 PetscCall(MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY)); 1466 PetscCall(MatDenseRestoreArray(A, &v)); 1467 PetscCall(PetscFree2(rows, cols)); 1468 1469 if (reuse == MAT_INPLACE_MATRIX) { 1470 PetscCall(MatHeaderReplace(A, &mat_elemental)); 1471 } else { 1472 *newmat = mat_elemental; 1473 } 1474 PetscFunctionReturn(PETSC_SUCCESS); 1475 } 1476 #endif 1477 1478 static PetscErrorCode MatDenseGetColumn_MPIDense(Mat A, PetscInt col, PetscScalar **vals) 1479 { 1480 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 1481 1482 PetscFunctionBegin; 1483 PetscCall(MatDenseGetColumn(mat->A, col, vals)); 1484 PetscFunctionReturn(PETSC_SUCCESS); 1485 } 1486 1487 static PetscErrorCode MatDenseRestoreColumn_MPIDense(Mat A, PetscScalar **vals) 1488 { 1489 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 1490 1491 PetscFunctionBegin; 1492 PetscCall(MatDenseRestoreColumn(mat->A, vals)); 1493 PetscFunctionReturn(PETSC_SUCCESS); 1494 } 1495 1496 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIDense(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat) 1497 { 1498 Mat_MPIDense *mat; 1499 PetscInt m, nloc, N; 1500 1501 PetscFunctionBegin; 1502 PetscCall(MatGetSize(inmat, &m, &N)); 1503 PetscCall(MatGetLocalSize(inmat, NULL, &nloc)); 1504 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 1505 PetscInt sum; 1506 1507 if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnership(comm, &n, &N)); 1508 /* Check sum(n) = N */ 1509 PetscCallMPI(MPIU_Allreduce(&n, &sum, 1, MPIU_INT, MPI_SUM, comm)); 1510 PetscCheck(sum == N, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT, sum, N); 1511 1512 PetscCall(MatCreateDense(comm, m, n, PETSC_DETERMINE, N, NULL, outmat)); 1513 PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 1514 } 1515 1516 /* numeric phase */ 1517 mat = (Mat_MPIDense *)(*outmat)->data; 1518 PetscCall(MatCopy(inmat, mat->A, SAME_NONZERO_PATTERN)); 1519 PetscFunctionReturn(PETSC_SUCCESS); 1520 } 1521 1522 PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat A, PetscInt col, Vec *v) 1523 { 1524 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1525 PetscInt lda; 1526 1527 PetscFunctionBegin; 1528 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1529 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1530 if (!a->cvec) PetscCall(MatDenseCreateColumnVec_Private(A, &a->cvec)); 1531 a->vecinuse = col + 1; 1532 PetscCall(MatDenseGetLDA(a->A, &lda)); 1533 PetscCall(MatDenseGetArray(a->A, (PetscScalar **)&a->ptrinuse)); 1534 PetscCall(VecPlaceArray(a->cvec, PetscSafePointerPlusOffset(a->ptrinuse, (size_t)col * (size_t)lda))); 1535 *v = a->cvec; 1536 PetscFunctionReturn(PETSC_SUCCESS); 1537 } 1538 1539 PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat A, PetscInt col, Vec *v) 1540 { 1541 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1542 1543 PetscFunctionBegin; 1544 PetscCheck(a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first"); 1545 PetscCheck(a->cvec, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing internal column vector"); 1546 VecCheckAssembled(a->cvec); 1547 a->vecinuse = 0; 1548 PetscCall(MatDenseRestoreArray(a->A, (PetscScalar **)&a->ptrinuse)); 1549 PetscCall(VecResetArray(a->cvec)); 1550 if (v) *v = NULL; 1551 PetscFunctionReturn(PETSC_SUCCESS); 1552 } 1553 1554 PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat A, PetscInt col, Vec *v) 1555 { 1556 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1557 PetscInt lda; 1558 1559 PetscFunctionBegin; 1560 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1561 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1562 if (!a->cvec) PetscCall(MatDenseCreateColumnVec_Private(A, &a->cvec)); 1563 a->vecinuse = col + 1; 1564 PetscCall(MatDenseGetLDA(a->A, &lda)); 1565 PetscCall(MatDenseGetArrayRead(a->A, &a->ptrinuse)); 1566 PetscCall(VecPlaceArray(a->cvec, PetscSafePointerPlusOffset(a->ptrinuse, (size_t)col * (size_t)lda))); 1567 PetscCall(VecLockReadPush(a->cvec)); 1568 *v = a->cvec; 1569 PetscFunctionReturn(PETSC_SUCCESS); 1570 } 1571 1572 PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat A, PetscInt col, Vec *v) 1573 { 1574 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1575 1576 PetscFunctionBegin; 1577 PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first"); 1578 PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector"); 1579 VecCheckAssembled(a->cvec); 1580 a->vecinuse = 0; 1581 PetscCall(MatDenseRestoreArrayRead(a->A, &a->ptrinuse)); 1582 PetscCall(VecLockReadPop(a->cvec)); 1583 PetscCall(VecResetArray(a->cvec)); 1584 if (v) *v = NULL; 1585 PetscFunctionReturn(PETSC_SUCCESS); 1586 } 1587 1588 PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat A, PetscInt col, Vec *v) 1589 { 1590 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1591 PetscInt lda; 1592 1593 PetscFunctionBegin; 1594 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1595 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1596 if (!a->cvec) PetscCall(MatDenseCreateColumnVec_Private(A, &a->cvec)); 1597 a->vecinuse = col + 1; 1598 PetscCall(MatDenseGetLDA(a->A, &lda)); 1599 PetscCall(MatDenseGetArrayWrite(a->A, (PetscScalar **)&a->ptrinuse)); 1600 PetscCall(VecPlaceArray(a->cvec, PetscSafePointerPlusOffset(a->ptrinuse, (size_t)col * (size_t)lda))); 1601 *v = a->cvec; 1602 PetscFunctionReturn(PETSC_SUCCESS); 1603 } 1604 1605 PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat A, PetscInt col, Vec *v) 1606 { 1607 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1608 1609 PetscFunctionBegin; 1610 PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first"); 1611 PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector"); 1612 VecCheckAssembled(a->cvec); 1613 a->vecinuse = 0; 1614 PetscCall(MatDenseRestoreArrayWrite(a->A, (PetscScalar **)&a->ptrinuse)); 1615 PetscCall(VecResetArray(a->cvec)); 1616 if (v) *v = NULL; 1617 PetscFunctionReturn(PETSC_SUCCESS); 1618 } 1619 1620 static PetscErrorCode MatDenseGetSubMatrix_MPIDense(Mat A, PetscInt rbegin, PetscInt rend, PetscInt cbegin, PetscInt cend, Mat *v) 1621 { 1622 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1623 Mat_MPIDense *c; 1624 MPI_Comm comm; 1625 PetscInt pbegin, pend; 1626 1627 PetscFunctionBegin; 1628 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 1629 PetscCheck(!a->vecinuse, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1630 PetscCheck(!a->matinuse, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1631 pbegin = PetscMax(0, PetscMin(A->rmap->rend, rbegin) - A->rmap->rstart); 1632 pend = PetscMin(A->rmap->n, PetscMax(0, rend - A->rmap->rstart)); 1633 if (!a->cmat) { 1634 PetscCall(MatCreate(comm, &a->cmat)); 1635 PetscCall(MatSetType(a->cmat, ((PetscObject)A)->type_name)); 1636 if (rend - rbegin == A->rmap->N) PetscCall(PetscLayoutReference(A->rmap, &a->cmat->rmap)); 1637 else { 1638 PetscCall(PetscLayoutSetLocalSize(a->cmat->rmap, pend - pbegin)); 1639 PetscCall(PetscLayoutSetSize(a->cmat->rmap, rend - rbegin)); 1640 PetscCall(PetscLayoutSetUp(a->cmat->rmap)); 1641 } 1642 PetscCall(PetscLayoutSetSize(a->cmat->cmap, cend - cbegin)); 1643 PetscCall(PetscLayoutSetUp(a->cmat->cmap)); 1644 } else { 1645 PetscBool same = (PetscBool)(rend - rbegin == a->cmat->rmap->N); 1646 if (same && a->cmat->rmap->N != A->rmap->N) { 1647 same = (PetscBool)(pend - pbegin == a->cmat->rmap->n); 1648 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &same, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A))); 1649 } 1650 if (!same) { 1651 PetscCall(PetscLayoutDestroy(&a->cmat->rmap)); 1652 PetscCall(PetscLayoutCreate(comm, &a->cmat->rmap)); 1653 PetscCall(PetscLayoutSetLocalSize(a->cmat->rmap, pend - pbegin)); 1654 PetscCall(PetscLayoutSetSize(a->cmat->rmap, rend - rbegin)); 1655 PetscCall(PetscLayoutSetUp(a->cmat->rmap)); 1656 } 1657 if (cend - cbegin != a->cmat->cmap->N) { 1658 PetscCall(PetscLayoutDestroy(&a->cmat->cmap)); 1659 PetscCall(PetscLayoutCreate(comm, &a->cmat->cmap)); 1660 PetscCall(PetscLayoutSetSize(a->cmat->cmap, cend - cbegin)); 1661 PetscCall(PetscLayoutSetUp(a->cmat->cmap)); 1662 } 1663 } 1664 c = (Mat_MPIDense *)a->cmat->data; 1665 PetscCheck(!c->A, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1666 PetscCall(MatDenseGetSubMatrix(a->A, pbegin, pend, cbegin, cend, &c->A)); 1667 1668 a->cmat->preallocated = PETSC_TRUE; 1669 a->cmat->assembled = PETSC_TRUE; 1670 #if defined(PETSC_HAVE_DEVICE) 1671 a->cmat->offloadmask = c->A->offloadmask; 1672 #endif 1673 a->matinuse = cbegin + 1; 1674 *v = a->cmat; 1675 PetscFunctionReturn(PETSC_SUCCESS); 1676 } 1677 1678 static PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat A, Mat *v) 1679 { 1680 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1681 Mat_MPIDense *c; 1682 1683 PetscFunctionBegin; 1684 PetscCheck(a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetSubMatrix() first"); 1685 PetscCheck(a->cmat, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal matrix"); 1686 PetscCheck(*v == a->cmat, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Not the matrix obtained from MatDenseGetSubMatrix()"); 1687 a->matinuse = 0; 1688 c = (Mat_MPIDense *)a->cmat->data; 1689 PetscCall(MatDenseRestoreSubMatrix(a->A, &c->A)); 1690 if (v) *v = NULL; 1691 #if defined(PETSC_HAVE_DEVICE) 1692 A->offloadmask = a->A->offloadmask; 1693 #endif 1694 PetscFunctionReturn(PETSC_SUCCESS); 1695 } 1696 1697 /*MC 1698 MATMPIDENSE - MATMPIDENSE = "mpidense" - A matrix type to be used for distributed dense matrices. 1699 1700 Options Database Key: 1701 . -mat_type mpidense - sets the matrix type to `MATMPIDENSE` during a call to `MatSetFromOptions()` 1702 1703 Level: beginner 1704 1705 .seealso: [](ch_matrices), `Mat`, `MatCreateDense()`, `MATSEQDENSE`, `MATDENSE` 1706 M*/ 1707 PetscErrorCode MatCreate_MPIDense(Mat mat) 1708 { 1709 Mat_MPIDense *a; 1710 1711 PetscFunctionBegin; 1712 PetscCall(PetscNew(&a)); 1713 mat->data = (void *)a; 1714 mat->ops[0] = MatOps_Values; 1715 1716 mat->insertmode = NOT_SET_VALUES; 1717 1718 /* build cache for off array entries formed */ 1719 a->donotstash = PETSC_FALSE; 1720 1721 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)mat), 1, &mat->stash)); 1722 1723 /* stuff used for matrix vector multiply */ 1724 a->lvec = NULL; 1725 a->Mvctx = NULL; 1726 a->roworiented = PETSC_TRUE; 1727 1728 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetLDA_C", MatDenseGetLDA_MPIDense)); 1729 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseSetLDA_C", MatDenseSetLDA_MPIDense)); 1730 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArray_C", MatDenseGetArray_MPIDense)); 1731 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArray_C", MatDenseRestoreArray_MPIDense)); 1732 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayRead_C", MatDenseGetArrayRead_MPIDense)); 1733 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayRead_C", MatDenseRestoreArrayRead_MPIDense)); 1734 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayWrite_C", MatDenseGetArrayWrite_MPIDense)); 1735 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayWrite_C", MatDenseRestoreArrayWrite_MPIDense)); 1736 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDensePlaceArray_C", MatDensePlaceArray_MPIDense)); 1737 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseResetArray_C", MatDenseResetArray_MPIDense)); 1738 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseReplaceArray_C", MatDenseReplaceArray_MPIDense)); 1739 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", MatDenseGetColumnVec_MPIDense)); 1740 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", MatDenseRestoreColumnVec_MPIDense)); 1741 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", MatDenseGetColumnVecRead_MPIDense)); 1742 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", MatDenseRestoreColumnVecRead_MPIDense)); 1743 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", MatDenseGetColumnVecWrite_MPIDense)); 1744 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", MatDenseRestoreColumnVecWrite_MPIDense)); 1745 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetSubMatrix_C", MatDenseGetSubMatrix_MPIDense)); 1746 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreSubMatrix_C", MatDenseRestoreSubMatrix_MPIDense)); 1747 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", MatConvert_MPIAIJ_MPIDense)); 1748 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpiaij_C", MatConvert_MPIDense_MPIAIJ)); 1749 #if defined(PETSC_HAVE_ELEMENTAL) 1750 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_elemental_C", MatConvert_MPIDense_Elemental)); 1751 #endif 1752 #if defined(PETSC_HAVE_SCALAPACK) 1753 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_scalapack_C", MatConvert_Dense_ScaLAPACK)); 1754 #endif 1755 #if defined(PETSC_HAVE_CUDA) 1756 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensecuda_C", MatConvert_MPIDense_MPIDenseCUDA)); 1757 #endif 1758 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIDenseSetPreallocation_C", MatMPIDenseSetPreallocation_MPIDense)); 1759 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense)); 1760 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaij_C", MatProductSetFromOptions_MPIDense_MPIAIJ)); 1761 #if defined(PETSC_HAVE_CUDA) 1762 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense)); 1763 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijcusparse_C", MatProductSetFromOptions_MPIDense_MPIAIJ)); 1764 #endif 1765 #if defined(PETSC_HAVE_HIP) 1766 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensehip_C", MatConvert_MPIDense_MPIDenseHIP)); 1767 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense)); 1768 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijhipsparse_C", MatProductSetFromOptions_MPIDense_MPIAIJ)); 1769 #endif 1770 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumn_C", MatDenseGetColumn_MPIDense)); 1771 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumn_C", MatDenseRestoreColumn_MPIDense)); 1772 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultAddColumnRange_C", MatMultAddColumnRange_MPIDense)); 1773 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultHermitianTransposeColumnRange_C", MatMultHermitianTransposeColumnRange_MPIDense)); 1774 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultHermitianTransposeAddColumnRange_C", MatMultHermitianTransposeAddColumnRange_MPIDense)); 1775 PetscCall(PetscObjectChangeTypeName((PetscObject)mat, MATMPIDENSE)); 1776 PetscFunctionReturn(PETSC_SUCCESS); 1777 } 1778 1779 /*MC 1780 MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices. 1781 1782 This matrix type is identical to `MATSEQDENSE` when constructed with a single process communicator, 1783 and `MATMPIDENSE` otherwise. 1784 1785 Options Database Key: 1786 . -mat_type dense - sets the matrix type to `MATDENSE` during a call to `MatSetFromOptions()` 1787 1788 Level: beginner 1789 1790 .seealso: [](ch_matrices), `Mat`, `MATSEQDENSE`, `MATMPIDENSE`, `MATDENSECUDA`, `MATDENSEHIP` 1791 M*/ 1792 1793 /*@ 1794 MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries 1795 1796 Collective 1797 1798 Input Parameters: 1799 + B - the matrix 1800 - data - optional location of matrix data. Set to `NULL` for PETSc 1801 to control all matrix memory allocation. 1802 1803 Level: intermediate 1804 1805 Notes: 1806 The dense format is fully compatible with standard Fortran 1807 storage by columns. 1808 1809 The data input variable is intended primarily for Fortran programmers 1810 who wish to allocate their own matrix memory space. Most users should 1811 set `data` to `NULL`. 1812 1813 .seealso: [](ch_matrices), `Mat`, `MATMPIDENSE`, `MatCreate()`, `MatCreateSeqDense()`, `MatSetValues()` 1814 @*/ 1815 PetscErrorCode MatMPIDenseSetPreallocation(Mat B, PetscScalar *data) 1816 { 1817 PetscFunctionBegin; 1818 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 1819 PetscTryMethod(B, "MatMPIDenseSetPreallocation_C", (Mat, PetscScalar *), (B, data)); 1820 PetscFunctionReturn(PETSC_SUCCESS); 1821 } 1822 1823 /*@ 1824 MatDensePlaceArray - Allows one to replace the array in a `MATDENSE` matrix with an 1825 array provided by the user. This is useful to avoid copying an array 1826 into a matrix 1827 1828 Not Collective 1829 1830 Input Parameters: 1831 + mat - the matrix 1832 - array - the array in column major order 1833 1834 Level: developer 1835 1836 Note: 1837 You can return to the original array with a call to `MatDenseResetArray()`. The user is responsible for freeing this array; it will not be 1838 freed when the matrix is destroyed. 1839 1840 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseGetArray()`, `MatDenseResetArray()`, `VecPlaceArray()`, `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`, `VecResetArray()`, 1841 `MatDenseReplaceArray()` 1842 @*/ 1843 PetscErrorCode MatDensePlaceArray(Mat mat, const PetscScalar *array) 1844 { 1845 PetscFunctionBegin; 1846 PetscValidHeaderSpecific(mat, MAT_CLASSID, 1); 1847 PetscUseMethod(mat, "MatDensePlaceArray_C", (Mat, const PetscScalar *), (mat, array)); 1848 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 1849 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 1850 mat->offloadmask = PETSC_OFFLOAD_CPU; 1851 #endif 1852 PetscFunctionReturn(PETSC_SUCCESS); 1853 } 1854 1855 /*@ 1856 MatDenseResetArray - Resets the matrix array to that it previously had before the call to `MatDensePlaceArray()` 1857 1858 Not Collective 1859 1860 Input Parameter: 1861 . mat - the matrix 1862 1863 Level: developer 1864 1865 Note: 1866 You can only call this after a call to `MatDensePlaceArray()` 1867 1868 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseGetArray()`, `MatDensePlaceArray()`, `VecPlaceArray()`, `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`, `VecResetArray()` 1869 @*/ 1870 PetscErrorCode MatDenseResetArray(Mat mat) 1871 { 1872 PetscFunctionBegin; 1873 PetscValidHeaderSpecific(mat, MAT_CLASSID, 1); 1874 PetscUseMethod(mat, "MatDenseResetArray_C", (Mat), (mat)); 1875 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 1876 PetscFunctionReturn(PETSC_SUCCESS); 1877 } 1878 1879 /*@ 1880 MatDenseReplaceArray - Allows one to replace the array in a dense matrix with an 1881 array provided by the user. This is useful to avoid copying an array 1882 into a matrix 1883 1884 Not Collective 1885 1886 Input Parameters: 1887 + mat - the matrix 1888 - array - the array in column major order 1889 1890 Level: developer 1891 1892 Note: 1893 The memory passed in MUST be obtained with `PetscMalloc()` and CANNOT be 1894 freed by the user. It will be freed when the matrix is destroyed. 1895 1896 .seealso: [](ch_matrices), `Mat`, `MatDensePlaceArray()`, `MatDenseGetArray()`, `VecReplaceArray()` 1897 @*/ 1898 PetscErrorCode MatDenseReplaceArray(Mat mat, const PetscScalar *array) 1899 { 1900 PetscFunctionBegin; 1901 PetscValidHeaderSpecific(mat, MAT_CLASSID, 1); 1902 PetscUseMethod(mat, "MatDenseReplaceArray_C", (Mat, const PetscScalar *), (mat, array)); 1903 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 1904 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 1905 mat->offloadmask = PETSC_OFFLOAD_CPU; 1906 #endif 1907 PetscFunctionReturn(PETSC_SUCCESS); 1908 } 1909 1910 /*@ 1911 MatCreateDense - Creates a matrix in `MATDENSE` format. 1912 1913 Collective 1914 1915 Input Parameters: 1916 + comm - MPI communicator 1917 . m - number of local rows (or `PETSC_DECIDE` to have calculated if `M` is given) 1918 . n - number of local columns (or `PETSC_DECIDE` to have calculated if `N` is given) 1919 . M - number of global rows (or `PETSC_DECIDE` to have calculated if `m` is given) 1920 . N - number of global columns (or `PETSC_DECIDE` to have calculated if `n` is given) 1921 - data - optional location of matrix data. Set data to `NULL` (`PETSC_NULL_SCALAR` for Fortran users) for PETSc 1922 to control all matrix memory allocation. 1923 1924 Output Parameter: 1925 . A - the matrix 1926 1927 Level: intermediate 1928 1929 Notes: 1930 The dense format is fully compatible with standard Fortran 1931 storage by columns. 1932 1933 Although local portions of the matrix are stored in column-major 1934 order, the matrix is partitioned across MPI ranks by row. 1935 1936 The data input variable is intended primarily for Fortran programmers 1937 who wish to allocate their own matrix memory space. Most users should 1938 set `data` to `NULL` (`PETSC_NULL_SCALAR` for Fortran users). 1939 1940 The user MUST specify either the local or global matrix dimensions 1941 (possibly both). 1942 1943 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatCreate()`, `MatCreateSeqDense()`, `MatSetValues()` 1944 @*/ 1945 PetscErrorCode MatCreateDense(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscScalar *data, Mat *A) 1946 { 1947 PetscFunctionBegin; 1948 PetscCall(MatCreate(comm, A)); 1949 PetscCall(MatSetSizes(*A, m, n, M, N)); 1950 PetscCall(MatSetType(*A, MATDENSE)); 1951 PetscCall(MatSeqDenseSetPreallocation(*A, data)); 1952 PetscCall(MatMPIDenseSetPreallocation(*A, data)); 1953 PetscFunctionReturn(PETSC_SUCCESS); 1954 } 1955 1956 static PetscErrorCode MatDuplicate_MPIDense(Mat A, MatDuplicateOption cpvalues, Mat *newmat) 1957 { 1958 Mat mat; 1959 Mat_MPIDense *a, *oldmat = (Mat_MPIDense *)A->data; 1960 1961 PetscFunctionBegin; 1962 *newmat = NULL; 1963 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat)); 1964 PetscCall(MatSetSizes(mat, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 1965 PetscCall(MatSetType(mat, ((PetscObject)A)->type_name)); 1966 a = (Mat_MPIDense *)mat->data; 1967 1968 mat->factortype = A->factortype; 1969 mat->assembled = PETSC_TRUE; 1970 mat->preallocated = PETSC_TRUE; 1971 1972 mat->insertmode = NOT_SET_VALUES; 1973 a->donotstash = oldmat->donotstash; 1974 1975 PetscCall(PetscLayoutReference(A->rmap, &mat->rmap)); 1976 PetscCall(PetscLayoutReference(A->cmap, &mat->cmap)); 1977 1978 PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A)); 1979 1980 *newmat = mat; 1981 PetscFunctionReturn(PETSC_SUCCESS); 1982 } 1983 1984 static PetscErrorCode MatLoad_MPIDense(Mat newMat, PetscViewer viewer) 1985 { 1986 PetscBool isbinary; 1987 #if defined(PETSC_HAVE_HDF5) 1988 PetscBool ishdf5; 1989 #endif 1990 1991 PetscFunctionBegin; 1992 PetscValidHeaderSpecific(newMat, MAT_CLASSID, 1); 1993 PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2); 1994 /* force binary viewer to load .info file if it has not yet done so */ 1995 PetscCall(PetscViewerSetUp(viewer)); 1996 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 1997 #if defined(PETSC_HAVE_HDF5) 1998 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5)); 1999 #endif 2000 if (isbinary) { 2001 PetscCall(MatLoad_Dense_Binary(newMat, viewer)); 2002 #if defined(PETSC_HAVE_HDF5) 2003 } else if (ishdf5) { 2004 PetscCall(MatLoad_Dense_HDF5(newMat, viewer)); 2005 #endif 2006 } else SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name); 2007 PetscFunctionReturn(PETSC_SUCCESS); 2008 } 2009 2010 static PetscErrorCode MatEqual_MPIDense(Mat A, Mat B, PetscBool *flag) 2011 { 2012 Mat_MPIDense *matB = (Mat_MPIDense *)B->data, *matA = (Mat_MPIDense *)A->data; 2013 Mat a, b; 2014 2015 PetscFunctionBegin; 2016 a = matA->A; 2017 b = matB->A; 2018 PetscCall(MatEqual(a, b, flag)); 2019 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A))); 2020 PetscFunctionReturn(PETSC_SUCCESS); 2021 } 2022 2023 static PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(void *data) 2024 { 2025 Mat_TransMatMultDense *atb = (Mat_TransMatMultDense *)data; 2026 2027 PetscFunctionBegin; 2028 PetscCall(PetscFree2(atb->sendbuf, atb->recvcounts)); 2029 PetscCall(MatDestroy(&atb->atb)); 2030 PetscCall(PetscFree(atb)); 2031 PetscFunctionReturn(PETSC_SUCCESS); 2032 } 2033 2034 static PetscErrorCode MatDestroy_MatMatTransMult_MPIDense_MPIDense(void *data) 2035 { 2036 Mat_MatTransMultDense *abt = (Mat_MatTransMultDense *)data; 2037 2038 PetscFunctionBegin; 2039 PetscCall(PetscFree2(abt->buf[0], abt->buf[1])); 2040 PetscCall(PetscFree2(abt->recvcounts, abt->recvdispls)); 2041 PetscCall(PetscFree(abt)); 2042 PetscFunctionReturn(PETSC_SUCCESS); 2043 } 2044 2045 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C) 2046 { 2047 Mat_MPIDense *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data; 2048 Mat_TransMatMultDense *atb; 2049 MPI_Comm comm; 2050 PetscMPIInt size, *recvcounts; 2051 PetscScalar *carray, *sendbuf; 2052 const PetscScalar *atbarray; 2053 PetscInt i, cN = C->cmap->N, proc, k, j, lda; 2054 const PetscInt *ranges; 2055 2056 PetscFunctionBegin; 2057 MatCheckProduct(C, 3); 2058 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 2059 atb = (Mat_TransMatMultDense *)C->product->data; 2060 recvcounts = atb->recvcounts; 2061 sendbuf = atb->sendbuf; 2062 2063 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 2064 PetscCallMPI(MPI_Comm_size(comm, &size)); 2065 2066 /* compute atbarray = aseq^T * bseq */ 2067 PetscCall(MatTransposeMatMult(a->A, b->A, atb->atb ? MAT_REUSE_MATRIX : MAT_INITIAL_MATRIX, PETSC_DETERMINE, &atb->atb)); 2068 2069 PetscCall(MatGetOwnershipRanges(C, &ranges)); 2070 2071 /* arrange atbarray into sendbuf */ 2072 PetscCall(MatDenseGetArrayRead(atb->atb, &atbarray)); 2073 PetscCall(MatDenseGetLDA(atb->atb, &lda)); 2074 for (proc = 0, k = 0; proc < size; proc++) { 2075 for (j = 0; j < cN; j++) { 2076 for (i = ranges[proc]; i < ranges[proc + 1]; i++) sendbuf[k++] = atbarray[i + j * lda]; 2077 } 2078 } 2079 PetscCall(MatDenseRestoreArrayRead(atb->atb, &atbarray)); 2080 2081 /* sum all atbarray to local values of C */ 2082 PetscCall(MatDenseGetArrayWrite(c->A, &carray)); 2083 PetscCallMPI(MPI_Reduce_scatter(sendbuf, carray, recvcounts, MPIU_SCALAR, MPIU_SUM, comm)); 2084 PetscCall(MatDenseRestoreArrayWrite(c->A, &carray)); 2085 PetscCall(MatSetOption(C, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 2086 PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY)); 2087 PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY)); 2088 PetscFunctionReturn(PETSC_SUCCESS); 2089 } 2090 2091 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C) 2092 { 2093 MPI_Comm comm; 2094 PetscMPIInt size; 2095 PetscInt cm = A->cmap->n, cM, cN = B->cmap->N; 2096 Mat_TransMatMultDense *atb; 2097 PetscBool cisdense = PETSC_FALSE; 2098 const PetscInt *ranges; 2099 2100 PetscFunctionBegin; 2101 MatCheckProduct(C, 4); 2102 PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty"); 2103 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 2104 if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) { 2105 SETERRQ(comm, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend); 2106 } 2107 2108 /* create matrix product C */ 2109 PetscCall(MatSetSizes(C, cm, B->cmap->n, A->cmap->N, B->cmap->N)); 2110 #if defined(PETSC_HAVE_CUDA) 2111 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSECUDA, "")); 2112 #endif 2113 #if defined(PETSC_HAVE_HIP) 2114 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSEHIP, "")); 2115 #endif 2116 if (!cisdense) PetscCall(MatSetType(C, ((PetscObject)A)->type_name)); 2117 PetscCall(MatSetUp(C)); 2118 2119 /* create data structure for reuse C */ 2120 PetscCallMPI(MPI_Comm_size(comm, &size)); 2121 PetscCall(PetscNew(&atb)); 2122 cM = C->rmap->N; 2123 PetscCall(PetscMalloc2(cM * cN, &atb->sendbuf, size, &atb->recvcounts)); 2124 PetscCall(MatGetOwnershipRanges(C, &ranges)); 2125 for (PetscMPIInt i = 0; i < size; i++) PetscCall(PetscMPIIntCast((ranges[i + 1] - ranges[i]) * cN, &atb->recvcounts[i])); 2126 C->product->data = atb; 2127 C->product->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense; 2128 PetscFunctionReturn(PETSC_SUCCESS); 2129 } 2130 2131 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C) 2132 { 2133 MPI_Comm comm; 2134 PetscMPIInt i, size; 2135 PetscInt maxRows, bufsiz; 2136 PetscMPIInt tag; 2137 PetscInt alg; 2138 Mat_MatTransMultDense *abt; 2139 Mat_Product *product = C->product; 2140 PetscBool flg; 2141 2142 PetscFunctionBegin; 2143 MatCheckProduct(C, 4); 2144 PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty"); 2145 /* check local size of A and B */ 2146 PetscCheck(A->cmap->n == B->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local column dimensions are incompatible, A (%" PetscInt_FMT ") != B (%" PetscInt_FMT ")", A->cmap->n, B->cmap->n); 2147 2148 PetscCall(PetscStrcmp(product->alg, "allgatherv", &flg)); 2149 alg = flg ? 0 : 1; 2150 2151 /* setup matrix product C */ 2152 PetscCall(MatSetSizes(C, A->rmap->n, B->rmap->n, A->rmap->N, B->rmap->N)); 2153 PetscCall(MatSetType(C, MATMPIDENSE)); 2154 PetscCall(MatSetUp(C)); 2155 PetscCall(PetscObjectGetNewTag((PetscObject)C, &tag)); 2156 2157 /* create data structure for reuse C */ 2158 PetscCall(PetscObjectGetComm((PetscObject)C, &comm)); 2159 PetscCallMPI(MPI_Comm_size(comm, &size)); 2160 PetscCall(PetscNew(&abt)); 2161 abt->tag = tag; 2162 abt->alg = alg; 2163 switch (alg) { 2164 case 1: /* alg: "cyclic" */ 2165 for (maxRows = 0, i = 0; i < size; i++) maxRows = PetscMax(maxRows, B->rmap->range[i + 1] - B->rmap->range[i]); 2166 bufsiz = A->cmap->N * maxRows; 2167 PetscCall(PetscMalloc2(bufsiz, &abt->buf[0], bufsiz, &abt->buf[1])); 2168 break; 2169 default: /* alg: "allgatherv" */ 2170 PetscCall(PetscMalloc2(B->rmap->n * B->cmap->N, &abt->buf[0], B->rmap->N * B->cmap->N, &abt->buf[1])); 2171 PetscCall(PetscMalloc2(size, &abt->recvcounts, size + 1, &abt->recvdispls)); 2172 for (i = 0; i <= size; i++) PetscCall(PetscMPIIntCast(B->rmap->range[i] * A->cmap->N, &abt->recvdispls[i])); 2173 for (i = 0; i < size; i++) PetscCall(PetscMPIIntCast(abt->recvdispls[i + 1] - abt->recvdispls[i], &abt->recvcounts[i])); 2174 break; 2175 } 2176 2177 C->product->data = abt; 2178 C->product->destroy = MatDestroy_MatMatTransMult_MPIDense_MPIDense; 2179 C->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_MPIDense_MPIDense; 2180 PetscFunctionReturn(PETSC_SUCCESS); 2181 } 2182 2183 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(Mat A, Mat B, Mat C) 2184 { 2185 Mat_MPIDense *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data; 2186 Mat_MatTransMultDense *abt; 2187 MPI_Comm comm; 2188 PetscMPIInt rank, size, sendto, recvfrom, recvisfrom; 2189 PetscScalar *sendbuf, *recvbuf = NULL, *cv; 2190 PetscInt i, cK = A->cmap->N, sendsiz, recvsiz, k, j, bn; 2191 PetscScalar _DOne = 1.0, _DZero = 0.0; 2192 const PetscScalar *av, *bv; 2193 PetscBLASInt cm, cn, ck, alda, blda = 0, clda; 2194 MPI_Request reqs[2]; 2195 const PetscInt *ranges; 2196 2197 PetscFunctionBegin; 2198 MatCheckProduct(C, 3); 2199 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 2200 abt = (Mat_MatTransMultDense *)C->product->data; 2201 PetscCall(PetscObjectGetComm((PetscObject)C, &comm)); 2202 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 2203 PetscCallMPI(MPI_Comm_size(comm, &size)); 2204 PetscCall(MatDenseGetArrayRead(a->A, &av)); 2205 PetscCall(MatDenseGetArrayRead(b->A, &bv)); 2206 PetscCall(MatDenseGetArrayWrite(c->A, &cv)); 2207 PetscCall(MatDenseGetLDA(a->A, &i)); 2208 PetscCall(PetscBLASIntCast(i, &alda)); 2209 PetscCall(MatDenseGetLDA(b->A, &i)); 2210 PetscCall(PetscBLASIntCast(i, &blda)); 2211 PetscCall(MatDenseGetLDA(c->A, &i)); 2212 PetscCall(PetscBLASIntCast(i, &clda)); 2213 PetscCall(MatGetOwnershipRanges(B, &ranges)); 2214 bn = B->rmap->n; 2215 if (blda == bn) { 2216 sendbuf = (PetscScalar *)bv; 2217 } else { 2218 sendbuf = abt->buf[0]; 2219 for (k = 0, i = 0; i < cK; i++) { 2220 for (j = 0; j < bn; j++, k++) sendbuf[k] = bv[i * blda + j]; 2221 } 2222 } 2223 if (size > 1) { 2224 sendto = (rank + size - 1) % size; 2225 recvfrom = (rank + size + 1) % size; 2226 } else { 2227 sendto = recvfrom = 0; 2228 } 2229 PetscCall(PetscBLASIntCast(cK, &ck)); 2230 PetscCall(PetscBLASIntCast(c->A->rmap->n, &cm)); 2231 recvisfrom = rank; 2232 for (i = 0; i < size; i++) { 2233 /* we have finished receiving in sending, bufs can be read/modified */ 2234 PetscMPIInt nextrecvisfrom = (recvisfrom + 1) % size; /* which process the next recvbuf will originate on */ 2235 PetscInt nextbn = ranges[nextrecvisfrom + 1] - ranges[nextrecvisfrom]; 2236 2237 if (nextrecvisfrom != rank) { 2238 /* start the cyclic sends from sendbuf, to recvbuf (which will switch to sendbuf) */ 2239 sendsiz = cK * bn; 2240 recvsiz = cK * nextbn; 2241 recvbuf = (i & 1) ? abt->buf[0] : abt->buf[1]; 2242 PetscCallMPI(MPIU_Isend(sendbuf, sendsiz, MPIU_SCALAR, sendto, abt->tag, comm, &reqs[0])); 2243 PetscCallMPI(MPIU_Irecv(recvbuf, recvsiz, MPIU_SCALAR, recvfrom, abt->tag, comm, &reqs[1])); 2244 } 2245 2246 /* local aseq * sendbuf^T */ 2247 PetscCall(PetscBLASIntCast(ranges[recvisfrom + 1] - ranges[recvisfrom], &cn)); 2248 if (cm && cn && ck) PetscCallBLAS("BLASgemm", BLASgemm_("N", "T", &cm, &cn, &ck, &_DOne, av, &alda, sendbuf, &cn, &_DZero, cv + clda * ranges[recvisfrom], &clda)); 2249 2250 if (nextrecvisfrom != rank) { 2251 /* wait for the sends and receives to complete, swap sendbuf and recvbuf */ 2252 PetscCallMPI(MPI_Waitall(2, reqs, MPI_STATUSES_IGNORE)); 2253 } 2254 bn = nextbn; 2255 recvisfrom = nextrecvisfrom; 2256 sendbuf = recvbuf; 2257 } 2258 PetscCall(MatDenseRestoreArrayRead(a->A, &av)); 2259 PetscCall(MatDenseRestoreArrayRead(b->A, &bv)); 2260 PetscCall(MatDenseRestoreArrayWrite(c->A, &cv)); 2261 PetscCall(MatSetOption(C, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 2262 PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY)); 2263 PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY)); 2264 PetscFunctionReturn(PETSC_SUCCESS); 2265 } 2266 2267 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(Mat A, Mat B, Mat C) 2268 { 2269 Mat_MPIDense *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data; 2270 Mat_MatTransMultDense *abt; 2271 MPI_Comm comm; 2272 PetscMPIInt size, ibn; 2273 PetscScalar *cv, *sendbuf, *recvbuf; 2274 const PetscScalar *av, *bv; 2275 PetscInt blda, i, cK = A->cmap->N, k, j, bn; 2276 PetscScalar _DOne = 1.0, _DZero = 0.0; 2277 PetscBLASInt cm, cn, ck, alda, clda; 2278 2279 PetscFunctionBegin; 2280 MatCheckProduct(C, 3); 2281 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 2282 abt = (Mat_MatTransMultDense *)C->product->data; 2283 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 2284 PetscCallMPI(MPI_Comm_size(comm, &size)); 2285 PetscCall(MatDenseGetArrayRead(a->A, &av)); 2286 PetscCall(MatDenseGetArrayRead(b->A, &bv)); 2287 PetscCall(MatDenseGetArrayWrite(c->A, &cv)); 2288 PetscCall(MatDenseGetLDA(a->A, &i)); 2289 PetscCall(PetscBLASIntCast(i, &alda)); 2290 PetscCall(MatDenseGetLDA(b->A, &blda)); 2291 PetscCall(MatDenseGetLDA(c->A, &i)); 2292 PetscCall(PetscBLASIntCast(i, &clda)); 2293 /* copy transpose of B into buf[0] */ 2294 bn = B->rmap->n; 2295 sendbuf = abt->buf[0]; 2296 recvbuf = abt->buf[1]; 2297 for (k = 0, j = 0; j < bn; j++) { 2298 for (i = 0; i < cK; i++, k++) sendbuf[k] = bv[i * blda + j]; 2299 } 2300 PetscCall(MatDenseRestoreArrayRead(b->A, &bv)); 2301 PetscCall(PetscMPIIntCast(bn * cK, &ibn)); 2302 PetscCallMPI(MPI_Allgatherv(sendbuf, ibn, MPIU_SCALAR, recvbuf, abt->recvcounts, abt->recvdispls, MPIU_SCALAR, comm)); 2303 PetscCall(PetscBLASIntCast(cK, &ck)); 2304 PetscCall(PetscBLASIntCast(c->A->rmap->n, &cm)); 2305 PetscCall(PetscBLASIntCast(c->A->cmap->n, &cn)); 2306 if (cm && cn && ck) PetscCallBLAS("BLASgemm", BLASgemm_("N", "N", &cm, &cn, &ck, &_DOne, av, &alda, recvbuf, &ck, &_DZero, cv, &clda)); 2307 PetscCall(MatDenseRestoreArrayRead(a->A, &av)); 2308 PetscCall(MatDenseRestoreArrayRead(b->A, &bv)); 2309 PetscCall(MatDenseRestoreArrayWrite(c->A, &cv)); 2310 PetscCall(MatSetOption(C, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 2311 PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY)); 2312 PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY)); 2313 PetscFunctionReturn(PETSC_SUCCESS); 2314 } 2315 2316 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C) 2317 { 2318 Mat_MatTransMultDense *abt; 2319 2320 PetscFunctionBegin; 2321 MatCheckProduct(C, 3); 2322 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 2323 abt = (Mat_MatTransMultDense *)C->product->data; 2324 switch (abt->alg) { 2325 case 1: 2326 PetscCall(MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(A, B, C)); 2327 break; 2328 default: 2329 PetscCall(MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(A, B, C)); 2330 break; 2331 } 2332 PetscFunctionReturn(PETSC_SUCCESS); 2333 } 2334 2335 static PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(void *data) 2336 { 2337 Mat_MatMultDense *ab = (Mat_MatMultDense *)data; 2338 2339 PetscFunctionBegin; 2340 PetscCall(MatDestroy(&ab->Ce)); 2341 PetscCall(MatDestroy(&ab->Ae)); 2342 PetscCall(MatDestroy(&ab->Be)); 2343 PetscCall(PetscFree(ab)); 2344 PetscFunctionReturn(PETSC_SUCCESS); 2345 } 2346 2347 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C) 2348 { 2349 Mat_MatMultDense *ab; 2350 Mat_MPIDense *mdn = (Mat_MPIDense *)A->data; 2351 2352 PetscFunctionBegin; 2353 MatCheckProduct(C, 3); 2354 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Missing product data"); 2355 ab = (Mat_MatMultDense *)C->product->data; 2356 if (ab->Ae && ab->Ce) { 2357 #if PetscDefined(HAVE_ELEMENTAL) 2358 PetscCall(MatConvert_MPIDense_Elemental(A, MATELEMENTAL, MAT_REUSE_MATRIX, &ab->Ae)); 2359 PetscCall(MatConvert_MPIDense_Elemental(B, MATELEMENTAL, MAT_REUSE_MATRIX, &ab->Be)); 2360 PetscCall(MatMatMultNumeric_Elemental(ab->Ae, ab->Be, ab->Ce)); 2361 PetscCall(MatConvert(ab->Ce, MATMPIDENSE, MAT_REUSE_MATRIX, &C)); 2362 #else 2363 SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "PETSC_HAVE_ELEMENTAL not defined"); 2364 #endif 2365 } else { 2366 const PetscScalar *read; 2367 PetscScalar *write; 2368 PetscInt lda; 2369 2370 PetscCall(MatDenseGetLDA(B, &lda)); 2371 PetscCall(MatDenseGetArrayRead(B, &read)); 2372 PetscCall(MatDenseGetArrayWrite(ab->Be, &write)); 2373 if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A)); /* cannot be done during the symbolic phase because of possible calls to MatProductReplaceMats() */ 2374 for (PetscInt i = 0; i < C->cmap->N; ++i) { 2375 PetscCall(PetscSFBcastBegin(mdn->Mvctx, MPIU_SCALAR, read + i * lda, write + i * ab->Be->rmap->n, MPI_REPLACE)); 2376 PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, read + i * lda, write + i * ab->Be->rmap->n, MPI_REPLACE)); 2377 } 2378 PetscCall(MatDenseRestoreArrayWrite(ab->Be, &write)); 2379 PetscCall(MatDenseRestoreArrayRead(B, &read)); 2380 PetscCall(MatMatMultNumeric_SeqDense_SeqDense(((Mat_MPIDense *)A->data)->A, ab->Be, ((Mat_MPIDense *)C->data)->A)); 2381 } 2382 PetscFunctionReturn(PETSC_SUCCESS); 2383 } 2384 2385 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C) 2386 { 2387 Mat_Product *product = C->product; 2388 PetscInt alg; 2389 Mat_MatMultDense *ab; 2390 PetscBool flg; 2391 2392 PetscFunctionBegin; 2393 MatCheckProduct(C, 4); 2394 PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty"); 2395 /* check local size of A and B */ 2396 PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ", %" PetscInt_FMT ")", 2397 A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend); 2398 2399 PetscCall(PetscStrcmp(product->alg, "petsc", &flg)); 2400 alg = flg ? 0 : 1; 2401 2402 /* setup C */ 2403 PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, A->rmap->N, B->cmap->N)); 2404 PetscCall(MatSetType(C, MATMPIDENSE)); 2405 PetscCall(MatSetUp(C)); 2406 2407 /* create data structure for reuse Cdense */ 2408 PetscCall(PetscNew(&ab)); 2409 2410 switch (alg) { 2411 case 1: /* alg: "elemental" */ 2412 #if PetscDefined(HAVE_ELEMENTAL) 2413 /* create elemental matrices Ae and Be */ 2414 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &ab->Ae)); 2415 PetscCall(MatSetSizes(ab->Ae, PETSC_DECIDE, PETSC_DECIDE, A->rmap->N, A->cmap->N)); 2416 PetscCall(MatSetType(ab->Ae, MATELEMENTAL)); 2417 PetscCall(MatSetUp(ab->Ae)); 2418 PetscCall(MatSetOption(ab->Ae, MAT_ROW_ORIENTED, PETSC_FALSE)); 2419 2420 PetscCall(MatCreate(PetscObjectComm((PetscObject)B), &ab->Be)); 2421 PetscCall(MatSetSizes(ab->Be, PETSC_DECIDE, PETSC_DECIDE, B->rmap->N, B->cmap->N)); 2422 PetscCall(MatSetType(ab->Be, MATELEMENTAL)); 2423 PetscCall(MatSetUp(ab->Be)); 2424 PetscCall(MatSetOption(ab->Be, MAT_ROW_ORIENTED, PETSC_FALSE)); 2425 2426 /* compute symbolic Ce = Ae*Be */ 2427 PetscCall(MatCreate(PetscObjectComm((PetscObject)C), &ab->Ce)); 2428 PetscCall(MatMatMultSymbolic_Elemental(ab->Ae, ab->Be, fill, ab->Ce)); 2429 #else 2430 SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "PETSC_HAVE_ELEMENTAL not defined"); 2431 #endif 2432 break; 2433 default: /* alg: "petsc" */ 2434 ab->Ae = NULL; 2435 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, A->cmap->N, B->cmap->N, NULL, &ab->Be)); 2436 ab->Ce = NULL; 2437 break; 2438 } 2439 2440 C->product->data = ab; 2441 C->product->destroy = MatDestroy_MatMatMult_MPIDense_MPIDense; 2442 C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense; 2443 PetscFunctionReturn(PETSC_SUCCESS); 2444 } 2445 2446 static PetscErrorCode MatProductSetFromOptions_MPIDense_AB(Mat C) 2447 { 2448 Mat_Product *product = C->product; 2449 const char *algTypes[2] = {"petsc", "elemental"}; 2450 PetscInt alg, nalg = PetscDefined(HAVE_ELEMENTAL) ? 2 : 1; 2451 PetscBool flg = PETSC_FALSE; 2452 2453 PetscFunctionBegin; 2454 /* Set default algorithm */ 2455 alg = 0; /* default is petsc */ 2456 PetscCall(PetscStrcmp(product->alg, "default", &flg)); 2457 if (flg) PetscCall(MatProductSetAlgorithm(C, algTypes[alg])); 2458 2459 /* Get runtime option */ 2460 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_AB", "Mat"); 2461 PetscCall(PetscOptionsEList("-mat_product_algorithm", "Algorithmic approach", "MatProduct_AB", algTypes, nalg, algTypes[alg], &alg, &flg)); 2462 PetscOptionsEnd(); 2463 if (flg) PetscCall(MatProductSetAlgorithm(C, algTypes[alg])); 2464 2465 C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIDense; 2466 C->ops->productsymbolic = MatProductSymbolic_AB; 2467 PetscFunctionReturn(PETSC_SUCCESS); 2468 } 2469 2470 static PetscErrorCode MatProductSetFromOptions_MPIDense_AtB(Mat C) 2471 { 2472 Mat_Product *product = C->product; 2473 Mat A = product->A, B = product->B; 2474 2475 PetscFunctionBegin; 2476 PetscCheck(A->rmap->rstart == B->rmap->rstart && A->rmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", 2477 A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend); 2478 C->ops->transposematmultsymbolic = MatTransposeMatMultSymbolic_MPIDense_MPIDense; 2479 C->ops->productsymbolic = MatProductSymbolic_AtB; 2480 PetscFunctionReturn(PETSC_SUCCESS); 2481 } 2482 2483 static PetscErrorCode MatProductSetFromOptions_MPIDense_ABt(Mat C) 2484 { 2485 Mat_Product *product = C->product; 2486 const char *algTypes[2] = {"allgatherv", "cyclic"}; 2487 PetscInt alg, nalg = 2; 2488 PetscBool flg = PETSC_FALSE; 2489 2490 PetscFunctionBegin; 2491 /* Set default algorithm */ 2492 alg = 0; /* default is allgatherv */ 2493 PetscCall(PetscStrcmp(product->alg, "default", &flg)); 2494 if (flg) PetscCall(MatProductSetAlgorithm(C, algTypes[alg])); 2495 2496 /* Get runtime option */ 2497 if (product->api_user) { 2498 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatMatTransposeMult", "Mat"); 2499 PetscCall(PetscOptionsEList("-matmattransmult_mpidense_mpidense_via", "Algorithmic approach", "MatMatTransposeMult", algTypes, nalg, algTypes[alg], &alg, &flg)); 2500 PetscOptionsEnd(); 2501 } else { 2502 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_ABt", "Mat"); 2503 PetscCall(PetscOptionsEList("-mat_product_algorithm", "Algorithmic approach", "MatProduct_ABt", algTypes, nalg, algTypes[alg], &alg, &flg)); 2504 PetscOptionsEnd(); 2505 } 2506 if (flg) PetscCall(MatProductSetAlgorithm(C, algTypes[alg])); 2507 2508 C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_MPIDense_MPIDense; 2509 C->ops->productsymbolic = MatProductSymbolic_ABt; 2510 PetscFunctionReturn(PETSC_SUCCESS); 2511 } 2512 2513 static PetscErrorCode MatProductSetFromOptions_MPIDense(Mat C) 2514 { 2515 Mat_Product *product = C->product; 2516 2517 PetscFunctionBegin; 2518 switch (product->type) { 2519 case MATPRODUCT_AB: 2520 PetscCall(MatProductSetFromOptions_MPIDense_AB(C)); 2521 break; 2522 case MATPRODUCT_AtB: 2523 PetscCall(MatProductSetFromOptions_MPIDense_AtB(C)); 2524 break; 2525 case MATPRODUCT_ABt: 2526 PetscCall(MatProductSetFromOptions_MPIDense_ABt(C)); 2527 break; 2528 default: 2529 break; 2530 } 2531 PetscFunctionReturn(PETSC_SUCCESS); 2532 } 2533