1 /* 2 Basic functions for basic parallel dense matrices. 3 Portions of this code are under: 4 Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. 5 */ 6 7 #include <../src/mat/impls/dense/mpi/mpidense.h> /*I "petscmat.h" I*/ 8 #include <../src/mat/impls/aij/mpi/mpiaij.h> 9 #include <petscblaslapack.h> 10 #include <petsc/private/vecimpl.h> 11 12 /*@ 13 MatDenseGetLocalMatrix - For a `MATMPIDENSE` or `MATSEQDENSE` matrix returns the sequential 14 matrix that represents the operator. For sequential matrices it returns itself. 15 16 Input Parameter: 17 . A - the sequential or MPI `MATDENSE` matrix 18 19 Output Parameter: 20 . B - the inner matrix 21 22 Level: intermediate 23 24 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MATMPIDENSE`, `MATSEQDENSE` 25 @*/ 26 PetscErrorCode MatDenseGetLocalMatrix(Mat A, Mat *B) 27 { 28 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 29 PetscBool flg; 30 31 PetscFunctionBegin; 32 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 33 PetscAssertPointer(B, 2); 34 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIDENSE, &flg)); 35 if (flg) *B = mat->A; 36 else { 37 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQDENSE, &flg)); 38 PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Not for matrix type %s", ((PetscObject)A)->type_name); 39 *B = A; 40 } 41 PetscFunctionReturn(PETSC_SUCCESS); 42 } 43 44 static PetscErrorCode MatCopy_MPIDense(Mat A, Mat B, MatStructure s) 45 { 46 Mat_MPIDense *Amat = (Mat_MPIDense *)A->data; 47 Mat_MPIDense *Bmat = (Mat_MPIDense *)B->data; 48 49 PetscFunctionBegin; 50 PetscCall(MatCopy(Amat->A, Bmat->A, s)); 51 PetscFunctionReturn(PETSC_SUCCESS); 52 } 53 54 PetscErrorCode MatShift_MPIDense(Mat A, PetscScalar alpha) 55 { 56 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 57 PetscInt j, lda, rstart = A->rmap->rstart, rend = A->rmap->rend, rend2; 58 PetscScalar *v; 59 60 PetscFunctionBegin; 61 PetscCall(MatDenseGetArray(mat->A, &v)); 62 PetscCall(MatDenseGetLDA(mat->A, &lda)); 63 rend2 = PetscMin(rend, A->cmap->N); 64 if (rend2 > rstart) { 65 for (j = rstart; j < rend2; j++) v[j - rstart + j * lda] += alpha; 66 PetscCall(PetscLogFlops(rend2 - rstart)); 67 } 68 PetscCall(MatDenseRestoreArray(mat->A, &v)); 69 PetscFunctionReturn(PETSC_SUCCESS); 70 } 71 72 static PetscErrorCode MatGetRow_MPIDense(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 73 { 74 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 75 PetscInt lrow, rstart = A->rmap->rstart, rend = A->rmap->rend; 76 77 PetscFunctionBegin; 78 PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "only local rows"); 79 lrow = row - rstart; 80 PetscCall(MatGetRow(mat->A, lrow, nz, (const PetscInt **)idx, (const PetscScalar **)v)); 81 PetscFunctionReturn(PETSC_SUCCESS); 82 } 83 84 static PetscErrorCode MatRestoreRow_MPIDense(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 85 { 86 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 87 PetscInt lrow, rstart = A->rmap->rstart, rend = A->rmap->rend; 88 89 PetscFunctionBegin; 90 PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "only local rows"); 91 lrow = row - rstart; 92 PetscCall(MatRestoreRow(mat->A, lrow, nz, (const PetscInt **)idx, (const PetscScalar **)v)); 93 PetscFunctionReturn(PETSC_SUCCESS); 94 } 95 96 static PetscErrorCode MatGetDiagonalBlock_MPIDense(Mat A, Mat *a) 97 { 98 Mat_MPIDense *mdn = (Mat_MPIDense *)A->data; 99 PetscInt m = A->rmap->n, rstart = A->rmap->rstart; 100 PetscScalar *array; 101 MPI_Comm comm; 102 PetscBool flg; 103 Mat B; 104 105 PetscFunctionBegin; 106 PetscCall(MatHasCongruentLayouts(A, &flg)); 107 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only square matrices supported."); 108 PetscCall(PetscObjectQuery((PetscObject)A, "DiagonalBlock", (PetscObject *)&B)); 109 if (!B) { /* This should use MatDenseGetSubMatrix (not create), but we would need a call like MatRestoreDiagonalBlock */ 110 #if PetscDefined(HAVE_CUDA) 111 PetscCall(PetscObjectTypeCompare((PetscObject)mdn->A, MATSEQDENSECUDA, &flg)); 112 PetscCheck(!flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature", MATSEQDENSECUDA); 113 #elif PetscDefined(HAVE_HIP) 114 PetscCall(PetscObjectTypeCompare((PetscObject)mdn->A, MATSEQDENSEHIP, &flg)); 115 PetscCheck(!flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature", MATSEQDENSEHIP); 116 #endif 117 PetscCall(PetscObjectGetComm((PetscObject)mdn->A, &comm)); 118 PetscCall(MatCreate(comm, &B)); 119 PetscCall(MatSetSizes(B, m, m, m, m)); 120 PetscCall(MatSetType(B, ((PetscObject)mdn->A)->type_name)); 121 PetscCall(MatDenseGetArrayRead(mdn->A, (const PetscScalar **)&array)); 122 PetscCall(MatSeqDenseSetPreallocation(B, array + m * rstart)); 123 PetscCall(MatDenseRestoreArrayRead(mdn->A, (const PetscScalar **)&array)); 124 PetscCall(PetscObjectCompose((PetscObject)A, "DiagonalBlock", (PetscObject)B)); 125 *a = B; 126 PetscCall(MatDestroy(&B)); 127 } else *a = B; 128 PetscFunctionReturn(PETSC_SUCCESS); 129 } 130 131 static PetscErrorCode MatSetValues_MPIDense(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], const PetscScalar v[], InsertMode addv) 132 { 133 Mat_MPIDense *A = (Mat_MPIDense *)mat->data; 134 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend, row; 135 PetscBool roworiented = A->roworiented; 136 137 PetscFunctionBegin; 138 for (i = 0; i < m; i++) { 139 if (idxm[i] < 0) continue; 140 PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large"); 141 if (idxm[i] >= rstart && idxm[i] < rend) { 142 row = idxm[i] - rstart; 143 if (roworiented) { 144 PetscCall(MatSetValues(A->A, 1, &row, n, idxn, PetscSafePointerPlusOffset(v, i * n), addv)); 145 } else { 146 for (j = 0; j < n; j++) { 147 if (idxn[j] < 0) continue; 148 PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large"); 149 PetscCall(MatSetValues(A->A, 1, &row, 1, &idxn[j], PetscSafePointerPlusOffset(v, i + j * m), addv)); 150 } 151 } 152 } else if (!A->donotstash) { 153 mat->assembled = PETSC_FALSE; 154 if (roworiented) { 155 PetscCall(MatStashValuesRow_Private(&mat->stash, idxm[i], n, idxn, PetscSafePointerPlusOffset(v, i * n), PETSC_FALSE)); 156 } else { 157 PetscCall(MatStashValuesCol_Private(&mat->stash, idxm[i], n, idxn, PetscSafePointerPlusOffset(v, i), m, PETSC_FALSE)); 158 } 159 } 160 } 161 PetscFunctionReturn(PETSC_SUCCESS); 162 } 163 164 static PetscErrorCode MatGetValues_MPIDense(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[]) 165 { 166 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 167 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend, row; 168 169 PetscFunctionBegin; 170 for (i = 0; i < m; i++) { 171 if (idxm[i] < 0) continue; /* negative row */ 172 PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large"); 173 if (idxm[i] >= rstart && idxm[i] < rend) { 174 row = idxm[i] - rstart; 175 for (j = 0; j < n; j++) { 176 if (idxn[j] < 0) continue; /* negative column */ 177 PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large"); 178 PetscCall(MatGetValues(mdn->A, 1, &row, 1, &idxn[j], v + i * n + j)); 179 } 180 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported"); 181 } 182 PetscFunctionReturn(PETSC_SUCCESS); 183 } 184 185 static PetscErrorCode MatDenseGetLDA_MPIDense(Mat A, PetscInt *lda) 186 { 187 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 188 189 PetscFunctionBegin; 190 PetscCall(MatDenseGetLDA(a->A, lda)); 191 PetscFunctionReturn(PETSC_SUCCESS); 192 } 193 194 static PetscErrorCode MatDenseSetLDA_MPIDense(Mat A, PetscInt lda) 195 { 196 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 197 MatType mtype = MATSEQDENSE; 198 199 PetscFunctionBegin; 200 if (!a->A) { 201 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 202 PetscCall(PetscLayoutSetUp(A->rmap)); 203 PetscCall(PetscLayoutSetUp(A->cmap)); 204 PetscCall(MatCreate(PETSC_COMM_SELF, &a->A)); 205 PetscCall(MatSetSizes(a->A, A->rmap->n, A->cmap->N, A->rmap->n, A->cmap->N)); 206 #if PetscDefined(HAVE_CUDA) 207 PetscBool iscuda; 208 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIDENSECUDA, &iscuda)); 209 if (iscuda) mtype = MATSEQDENSECUDA; 210 #elif PetscDefined(HAVE_HIP) 211 PetscBool iship; 212 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIDENSEHIP, &iship)); 213 if (iship) mtype = MATSEQDENSEHIP; 214 #endif 215 PetscCall(MatSetType(a->A, mtype)); 216 } 217 PetscCall(MatDenseSetLDA(a->A, lda)); 218 PetscFunctionReturn(PETSC_SUCCESS); 219 } 220 221 static PetscErrorCode MatDenseGetArray_MPIDense(Mat A, PetscScalar **array) 222 { 223 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 224 225 PetscFunctionBegin; 226 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 227 PetscCall(MatDenseGetArray(a->A, array)); 228 PetscFunctionReturn(PETSC_SUCCESS); 229 } 230 231 static PetscErrorCode MatDenseGetArrayRead_MPIDense(Mat A, PetscScalar **array) 232 { 233 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 234 235 PetscFunctionBegin; 236 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 237 PetscCall(MatDenseGetArrayRead(a->A, (const PetscScalar **)array)); 238 PetscFunctionReturn(PETSC_SUCCESS); 239 } 240 241 static PetscErrorCode MatDenseGetArrayWrite_MPIDense(Mat A, PetscScalar **array) 242 { 243 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 244 245 PetscFunctionBegin; 246 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 247 PetscCall(MatDenseGetArrayWrite(a->A, array)); 248 PetscFunctionReturn(PETSC_SUCCESS); 249 } 250 251 static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A, const PetscScalar *array) 252 { 253 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 254 255 PetscFunctionBegin; 256 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 257 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 258 PetscCall(MatDensePlaceArray(a->A, array)); 259 PetscFunctionReturn(PETSC_SUCCESS); 260 } 261 262 static PetscErrorCode MatDenseResetArray_MPIDense(Mat A) 263 { 264 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 265 266 PetscFunctionBegin; 267 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 268 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 269 PetscCall(MatDenseResetArray(a->A)); 270 PetscFunctionReturn(PETSC_SUCCESS); 271 } 272 273 static PetscErrorCode MatDenseReplaceArray_MPIDense(Mat A, const PetscScalar *array) 274 { 275 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 276 277 PetscFunctionBegin; 278 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 279 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 280 PetscCall(MatDenseReplaceArray(a->A, array)); 281 PetscFunctionReturn(PETSC_SUCCESS); 282 } 283 284 static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A, IS isrow, IS iscol, MatReuse scall, Mat *B) 285 { 286 Mat_MPIDense *mat = (Mat_MPIDense *)A->data, *newmatd; 287 PetscInt lda, i, j, rstart, rend, nrows, ncols, Ncols, nlrows, nlcols; 288 const PetscInt *irow, *icol; 289 const PetscScalar *v; 290 PetscScalar *bv; 291 Mat newmat; 292 IS iscol_local; 293 MPI_Comm comm_is, comm_mat; 294 295 PetscFunctionBegin; 296 PetscCall(PetscObjectGetComm((PetscObject)A, &comm_mat)); 297 PetscCall(PetscObjectGetComm((PetscObject)iscol, &comm_is)); 298 PetscCheck(comm_mat == comm_is, PETSC_COMM_SELF, PETSC_ERR_ARG_NOTSAMECOMM, "IS communicator must match matrix communicator"); 299 300 PetscCall(ISAllGather(iscol, &iscol_local)); 301 PetscCall(ISGetIndices(isrow, &irow)); 302 PetscCall(ISGetIndices(iscol_local, &icol)); 303 PetscCall(ISGetLocalSize(isrow, &nrows)); 304 PetscCall(ISGetLocalSize(iscol, &ncols)); 305 PetscCall(ISGetSize(iscol, &Ncols)); /* global number of columns, size of iscol_local */ 306 307 /* No parallel redistribution currently supported! Should really check each index set 308 to confirm that it is OK. ... Currently supports only submatrix same partitioning as 309 original matrix! */ 310 311 PetscCall(MatGetLocalSize(A, &nlrows, &nlcols)); 312 PetscCall(MatGetOwnershipRange(A, &rstart, &rend)); 313 314 /* Check submatrix call */ 315 if (scall == MAT_REUSE_MATRIX) { 316 /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */ 317 /* Really need to test rows and column sizes! */ 318 newmat = *B; 319 } else { 320 /* Create and fill new matrix */ 321 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &newmat)); 322 PetscCall(MatSetSizes(newmat, nrows, ncols, PETSC_DECIDE, Ncols)); 323 PetscCall(MatSetType(newmat, ((PetscObject)A)->type_name)); 324 PetscCall(MatMPIDenseSetPreallocation(newmat, NULL)); 325 } 326 327 /* Now extract the data pointers and do the copy, column at a time */ 328 newmatd = (Mat_MPIDense *)newmat->data; 329 PetscCall(MatDenseGetArray(newmatd->A, &bv)); 330 PetscCall(MatDenseGetArrayRead(mat->A, &v)); 331 PetscCall(MatDenseGetLDA(mat->A, &lda)); 332 for (i = 0; i < Ncols; i++) { 333 const PetscScalar *av = v + lda * icol[i]; 334 for (j = 0; j < nrows; j++) *bv++ = av[irow[j] - rstart]; 335 } 336 PetscCall(MatDenseRestoreArrayRead(mat->A, &v)); 337 PetscCall(MatDenseRestoreArray(newmatd->A, &bv)); 338 339 /* Assemble the matrices so that the correct flags are set */ 340 PetscCall(MatAssemblyBegin(newmat, MAT_FINAL_ASSEMBLY)); 341 PetscCall(MatAssemblyEnd(newmat, MAT_FINAL_ASSEMBLY)); 342 343 /* Free work space */ 344 PetscCall(ISRestoreIndices(isrow, &irow)); 345 PetscCall(ISRestoreIndices(iscol_local, &icol)); 346 PetscCall(ISDestroy(&iscol_local)); 347 *B = newmat; 348 PetscFunctionReturn(PETSC_SUCCESS); 349 } 350 351 static PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A, PetscScalar **array) 352 { 353 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 354 355 PetscFunctionBegin; 356 PetscCall(MatDenseRestoreArray(a->A, array)); 357 PetscFunctionReturn(PETSC_SUCCESS); 358 } 359 360 static PetscErrorCode MatDenseRestoreArrayRead_MPIDense(Mat A, PetscScalar **array) 361 { 362 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 363 364 PetscFunctionBegin; 365 PetscCall(MatDenseRestoreArrayRead(a->A, (const PetscScalar **)array)); 366 PetscFunctionReturn(PETSC_SUCCESS); 367 } 368 369 static PetscErrorCode MatDenseRestoreArrayWrite_MPIDense(Mat A, PetscScalar **array) 370 { 371 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 372 373 PetscFunctionBegin; 374 PetscCall(MatDenseRestoreArrayWrite(a->A, array)); 375 PetscFunctionReturn(PETSC_SUCCESS); 376 } 377 378 static PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat, MatAssemblyType mode) 379 { 380 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 381 PetscInt nstash, reallocs; 382 383 PetscFunctionBegin; 384 if (mdn->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS); 385 386 PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range)); 387 PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs)); 388 PetscCall(PetscInfo(mdn->A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 389 PetscFunctionReturn(PETSC_SUCCESS); 390 } 391 392 static PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat, MatAssemblyType mode) 393 { 394 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 395 PetscInt i, *row, *col, flg, j, rstart, ncols; 396 PetscMPIInt n; 397 PetscScalar *val; 398 399 PetscFunctionBegin; 400 if (!mdn->donotstash && !mat->nooffprocentries) { 401 /* wait on receives */ 402 while (1) { 403 PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg)); 404 if (!flg) break; 405 406 for (i = 0; i < n;) { 407 /* Now identify the consecutive vals belonging to the same row */ 408 for (j = i, rstart = row[j]; j < n; j++) { 409 if (row[j] != rstart) break; 410 } 411 if (j < n) ncols = j - i; 412 else ncols = n - i; 413 /* Now assemble all these values with a single function call */ 414 PetscCall(MatSetValues_MPIDense(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode)); 415 i = j; 416 } 417 } 418 PetscCall(MatStashScatterEnd_Private(&mat->stash)); 419 } 420 421 PetscCall(MatAssemblyBegin(mdn->A, mode)); 422 PetscCall(MatAssemblyEnd(mdn->A, mode)); 423 PetscFunctionReturn(PETSC_SUCCESS); 424 } 425 426 static PetscErrorCode MatZeroEntries_MPIDense(Mat A) 427 { 428 Mat_MPIDense *l = (Mat_MPIDense *)A->data; 429 430 PetscFunctionBegin; 431 PetscCall(MatZeroEntries(l->A)); 432 PetscFunctionReturn(PETSC_SUCCESS); 433 } 434 435 static PetscErrorCode MatZeroRows_MPIDense(Mat A, PetscInt n, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 436 { 437 Mat_MPIDense *l = (Mat_MPIDense *)A->data; 438 PetscInt i, len, *lrows; 439 440 PetscFunctionBegin; 441 /* get locally owned rows */ 442 PetscCall(PetscLayoutMapLocal(A->rmap, n, rows, &len, &lrows, NULL)); 443 /* fix right-hand side if needed */ 444 if (x && b) { 445 const PetscScalar *xx; 446 PetscScalar *bb; 447 448 PetscCall(VecGetArrayRead(x, &xx)); 449 PetscCall(VecGetArrayWrite(b, &bb)); 450 for (i = 0; i < len; ++i) bb[lrows[i]] = diag * xx[lrows[i]]; 451 PetscCall(VecRestoreArrayRead(x, &xx)); 452 PetscCall(VecRestoreArrayWrite(b, &bb)); 453 } 454 PetscCall(MatZeroRows(l->A, len, lrows, 0.0, NULL, NULL)); 455 if (diag != 0.0) { 456 Vec d; 457 458 PetscCall(MatCreateVecs(A, NULL, &d)); 459 PetscCall(VecSet(d, diag)); 460 PetscCall(MatDiagonalSet(A, d, INSERT_VALUES)); 461 PetscCall(VecDestroy(&d)); 462 } 463 PetscCall(PetscFree(lrows)); 464 PetscFunctionReturn(PETSC_SUCCESS); 465 } 466 467 PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat, Vec, Vec); 468 PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat, Vec, Vec, Vec); 469 PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat, Vec, Vec); 470 PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat, Vec, Vec, Vec); 471 472 static PetscErrorCode MatMult_MPIDense(Mat mat, Vec xx, Vec yy) 473 { 474 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 475 const PetscScalar *ax; 476 PetscScalar *ay; 477 PetscMemType axmtype, aymtype; 478 479 PetscFunctionBegin; 480 if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat)); 481 PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype)); 482 PetscCall(VecGetArrayAndMemType(mdn->lvec, &ay, &aymtype)); 483 PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE)); 484 PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE)); 485 PetscCall(VecRestoreArrayAndMemType(mdn->lvec, &ay)); 486 PetscCall(VecRestoreArrayReadAndMemType(xx, &ax)); 487 PetscCall((*mdn->A->ops->mult)(mdn->A, mdn->lvec, yy)); 488 PetscFunctionReturn(PETSC_SUCCESS); 489 } 490 491 static PetscErrorCode MatMultAddColumnRange_MPIDense(Mat mat, Vec xx, Vec yy, Vec zz, PetscInt c_start, PetscInt c_end) 492 { 493 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 494 const PetscScalar *ax; 495 PetscScalar *ay; 496 PetscMemType axmtype, aymtype; 497 498 PetscFunctionBegin; 499 if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat)); 500 PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype)); 501 PetscCall(VecGetArrayAndMemType(mdn->lvec, &ay, &aymtype)); 502 PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE)); 503 PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE)); 504 PetscCall(VecRestoreArrayAndMemType(mdn->lvec, &ay)); 505 PetscCall(VecRestoreArrayReadAndMemType(xx, &ax)); 506 PetscUseMethod(mdn->A, "MatMultAddColumnRange_C", (Mat, Vec, Vec, Vec, PetscInt, PetscInt), (mdn->A, mdn->lvec, yy, zz, c_start, c_end)); 507 PetscFunctionReturn(PETSC_SUCCESS); 508 } 509 510 static PetscErrorCode MatMultAdd_MPIDense(Mat mat, Vec xx, Vec yy, Vec zz) 511 { 512 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 513 const PetscScalar *ax; 514 PetscScalar *ay; 515 PetscMemType axmtype, aymtype; 516 517 PetscFunctionBegin; 518 if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat)); 519 PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype)); 520 PetscCall(VecGetArrayAndMemType(mdn->lvec, &ay, &aymtype)); 521 PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE)); 522 PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE)); 523 PetscCall(VecRestoreArrayAndMemType(mdn->lvec, &ay)); 524 PetscCall(VecRestoreArrayReadAndMemType(xx, &ax)); 525 PetscCall((*mdn->A->ops->multadd)(mdn->A, mdn->lvec, yy, zz)); 526 PetscFunctionReturn(PETSC_SUCCESS); 527 } 528 529 static PetscErrorCode MatMultHermitianTransposeColumnRange_MPIDense(Mat A, Vec xx, Vec yy, PetscInt c_start, PetscInt c_end) 530 { 531 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 532 const PetscScalar *ax; 533 PetscScalar *ay; 534 PetscMemType axmtype, aymtype; 535 536 PetscFunctionBegin; 537 if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A)); 538 PetscCall(VecSet(yy, 0.0)); 539 PetscUseMethod(a->A, "MatMultHermitianTransposeColumnRange_C", (Mat, Vec, Vec, PetscInt, PetscInt), (a->A, xx, a->lvec, c_start, c_end)); 540 PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype)); 541 PetscCall(VecGetArrayAndMemType(yy, &ay, &aymtype)); 542 PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM)); 543 PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM)); 544 PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax)); 545 PetscCall(VecRestoreArrayAndMemType(yy, &ay)); 546 PetscFunctionReturn(PETSC_SUCCESS); 547 } 548 549 static PetscErrorCode MatMultTransposeKernel_MPIDense(Mat A, Vec xx, Vec yy, PetscBool herm) 550 { 551 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 552 const PetscScalar *ax; 553 PetscScalar *ay; 554 PetscMemType axmtype, aymtype; 555 556 PetscFunctionBegin; 557 if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A)); 558 PetscCall(VecSet(yy, 0.0)); 559 if (herm) PetscCall((*a->A->ops->multhermitiantranspose)(a->A, xx, a->lvec)); 560 else PetscCall((*a->A->ops->multtranspose)(a->A, xx, a->lvec)); 561 PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype)); 562 PetscCall(VecGetArrayAndMemType(yy, &ay, &aymtype)); 563 PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM)); 564 PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM)); 565 PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax)); 566 PetscCall(VecRestoreArrayAndMemType(yy, &ay)); 567 PetscFunctionReturn(PETSC_SUCCESS); 568 } 569 570 static PetscErrorCode MatMultHermitianTransposeAddColumnRange_MPIDense(Mat A, Vec xx, Vec yy, Vec zz, PetscInt c_start, PetscInt c_end) 571 { 572 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 573 const PetscScalar *ax; 574 PetscScalar *ay; 575 PetscMemType axmtype, aymtype; 576 577 PetscFunctionBegin; 578 if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A)); 579 PetscCall(VecCopy(yy, zz)); 580 PetscMPIInt rank; 581 PetscCallMPI(MPI_Comm_rank(MPI_COMM_WORLD, &rank)); 582 PetscUseMethod(a->A, "MatMultHermitianTransposeColumnRange_C", (Mat, Vec, Vec, PetscInt, PetscInt), (a->A, xx, a->lvec, c_start, c_end)); 583 PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype)); 584 PetscCall(VecGetArrayAndMemType(zz, &ay, &aymtype)); 585 PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM)); 586 PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM)); 587 PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax)); 588 PetscCall(VecRestoreArrayAndMemType(zz, &ay)); 589 PetscFunctionReturn(PETSC_SUCCESS); 590 } 591 592 static PetscErrorCode MatMultTransposeAddKernel_MPIDense(Mat A, Vec xx, Vec yy, Vec zz, PetscBool herm) 593 { 594 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 595 const PetscScalar *ax; 596 PetscScalar *ay; 597 PetscMemType axmtype, aymtype; 598 599 PetscFunctionBegin; 600 if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A)); 601 PetscCall(VecCopy(yy, zz)); 602 if (herm) PetscCall((*a->A->ops->multhermitiantranspose)(a->A, xx, a->lvec)); 603 else PetscCall((*a->A->ops->multtranspose)(a->A, xx, a->lvec)); 604 PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype)); 605 PetscCall(VecGetArrayAndMemType(zz, &ay, &aymtype)); 606 PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM)); 607 PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM)); 608 PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax)); 609 PetscCall(VecRestoreArrayAndMemType(zz, &ay)); 610 PetscFunctionReturn(PETSC_SUCCESS); 611 } 612 613 static PetscErrorCode MatMultTranspose_MPIDense(Mat A, Vec xx, Vec yy) 614 { 615 PetscFunctionBegin; 616 PetscCall(MatMultTransposeKernel_MPIDense(A, xx, yy, PETSC_FALSE)); 617 PetscFunctionReturn(PETSC_SUCCESS); 618 } 619 620 static PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A, Vec xx, Vec yy, Vec zz) 621 { 622 PetscFunctionBegin; 623 PetscCall(MatMultTransposeAddKernel_MPIDense(A, xx, yy, zz, PETSC_FALSE)); 624 PetscFunctionReturn(PETSC_SUCCESS); 625 } 626 627 static PetscErrorCode MatMultHermitianTranspose_MPIDense(Mat A, Vec xx, Vec yy) 628 { 629 PetscFunctionBegin; 630 PetscCall(MatMultTransposeKernel_MPIDense(A, xx, yy, PETSC_TRUE)); 631 PetscFunctionReturn(PETSC_SUCCESS); 632 } 633 634 static PetscErrorCode MatMultHermitianTransposeAdd_MPIDense(Mat A, Vec xx, Vec yy, Vec zz) 635 { 636 PetscFunctionBegin; 637 PetscCall(MatMultTransposeAddKernel_MPIDense(A, xx, yy, zz, PETSC_TRUE)); 638 PetscFunctionReturn(PETSC_SUCCESS); 639 } 640 641 PetscErrorCode MatGetDiagonal_MPIDense(Mat A, Vec v) 642 { 643 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 644 PetscInt lda, len, i, nl, ng, m = A->rmap->n, radd; 645 PetscScalar *x; 646 const PetscScalar *av; 647 648 PetscFunctionBegin; 649 PetscCall(VecGetArray(v, &x)); 650 PetscCall(VecGetSize(v, &ng)); 651 PetscCheck(ng == A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming mat and vec"); 652 PetscCall(VecGetLocalSize(v, &nl)); 653 len = PetscMin(a->A->rmap->n, a->A->cmap->n); 654 radd = A->rmap->rstart * m; 655 PetscCall(MatDenseGetArrayRead(a->A, &av)); 656 PetscCall(MatDenseGetLDA(a->A, &lda)); 657 for (i = 0; i < len; i++) x[i] = av[radd + i * lda + i]; 658 PetscCall(MatDenseRestoreArrayRead(a->A, &av)); 659 if (nl - i > 0) PetscCall(PetscArrayzero(x + i, nl - i)); 660 PetscCall(VecRestoreArray(v, &x)); 661 PetscFunctionReturn(PETSC_SUCCESS); 662 } 663 664 static PetscErrorCode MatDestroy_MPIDense(Mat mat) 665 { 666 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 667 668 PetscFunctionBegin; 669 PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N)); 670 PetscCall(MatStashDestroy_Private(&mat->stash)); 671 PetscCheck(!mdn->vecinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 672 PetscCheck(!mdn->matinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 673 PetscCall(MatDestroy(&mdn->A)); 674 PetscCall(VecDestroy(&mdn->lvec)); 675 PetscCall(PetscSFDestroy(&mdn->Mvctx)); 676 PetscCall(VecDestroy(&mdn->cvec)); 677 PetscCall(MatDestroy(&mdn->cmat)); 678 679 PetscCall(PetscFree(mat->data)); 680 PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL)); 681 682 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetLDA_C", NULL)); 683 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseSetLDA_C", NULL)); 684 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArray_C", NULL)); 685 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArray_C", NULL)); 686 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayRead_C", NULL)); 687 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayRead_C", NULL)); 688 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayWrite_C", NULL)); 689 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayWrite_C", NULL)); 690 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDensePlaceArray_C", NULL)); 691 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseResetArray_C", NULL)); 692 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseReplaceArray_C", NULL)); 693 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", NULL)); 694 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpiaij_C", NULL)); 695 #if defined(PETSC_HAVE_ELEMENTAL) 696 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_elemental_C", NULL)); 697 #endif 698 #if defined(PETSC_HAVE_SCALAPACK) 699 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_scalapack_C", NULL)); 700 #endif 701 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIDenseSetPreallocation_C", NULL)); 702 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidense_C", NULL)); 703 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaij_C", NULL)); 704 #if defined(PETSC_HAVE_CUDA) 705 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidense_C", NULL)); 706 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijcusparse_C", NULL)); 707 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensecuda_C", NULL)); 708 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidensecuda_mpidense_C", NULL)); 709 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidensecuda_C", NULL)); 710 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C", NULL)); 711 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensecuda_mpiaij_C", NULL)); 712 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C", NULL)); 713 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArray_C", NULL)); 714 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArrayRead_C", NULL)); 715 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArrayWrite_C", NULL)); 716 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArray_C", NULL)); 717 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArrayRead_C", NULL)); 718 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArrayWrite_C", NULL)); 719 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAPlaceArray_C", NULL)); 720 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAResetArray_C", NULL)); 721 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAReplaceArray_C", NULL)); 722 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDASetPreallocation_C", NULL)); 723 #endif 724 #if defined(PETSC_HAVE_HIP) 725 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidense_C", NULL)); 726 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijhipsparse_C", NULL)); 727 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensehip_C", NULL)); 728 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidensehip_mpidense_C", NULL)); 729 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidensehip_C", NULL)); 730 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidensehip_C", NULL)); 731 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensehip_mpiaij_C", NULL)); 732 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensehip_mpiaijhipsparse_C", NULL)); 733 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArray_C", NULL)); 734 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArrayRead_C", NULL)); 735 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArrayWrite_C", NULL)); 736 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArray_C", NULL)); 737 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArrayRead_C", NULL)); 738 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArrayWrite_C", NULL)); 739 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPPlaceArray_C", NULL)); 740 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPResetArray_C", NULL)); 741 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPReplaceArray_C", NULL)); 742 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPSetPreallocation_C", NULL)); 743 #endif 744 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumn_C", NULL)); 745 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumn_C", NULL)); 746 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", NULL)); 747 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", NULL)); 748 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", NULL)); 749 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", NULL)); 750 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", NULL)); 751 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", NULL)); 752 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetSubMatrix_C", NULL)); 753 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreSubMatrix_C", NULL)); 754 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultAddColumnRange_C", NULL)); 755 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultHermitianTransposeColumnRange_C", NULL)); 756 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultHermitianTransposeAddColumnRange_C", NULL)); 757 758 PetscCall(PetscObjectCompose((PetscObject)mat, "DiagonalBlock", NULL)); 759 PetscFunctionReturn(PETSC_SUCCESS); 760 } 761 762 #include <petscdraw.h> 763 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat, PetscViewer viewer) 764 { 765 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 766 PetscMPIInt rank; 767 PetscViewerType vtype; 768 PetscBool iascii, isdraw; 769 PetscViewer sviewer; 770 PetscViewerFormat format; 771 772 PetscFunctionBegin; 773 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 774 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 775 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 776 if (iascii) { 777 PetscCall(PetscViewerGetType(viewer, &vtype)); 778 PetscCall(PetscViewerGetFormat(viewer, &format)); 779 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 780 MatInfo info; 781 PetscCall(MatGetInfo(mat, MAT_LOCAL, &info)); 782 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 783 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %" PetscInt_FMT " \n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, 784 (PetscInt)info.memory)); 785 PetscCall(PetscViewerFlush(viewer)); 786 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 787 if (mdn->Mvctx) PetscCall(PetscSFView(mdn->Mvctx, viewer)); 788 PetscFunctionReturn(PETSC_SUCCESS); 789 } else if (format == PETSC_VIEWER_ASCII_INFO) { 790 PetscFunctionReturn(PETSC_SUCCESS); 791 } 792 } else if (isdraw) { 793 PetscDraw draw; 794 PetscBool isnull; 795 796 PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw)); 797 PetscCall(PetscDrawIsNull(draw, &isnull)); 798 if (isnull) PetscFunctionReturn(PETSC_SUCCESS); 799 } 800 801 { 802 /* assemble the entire matrix onto first processor. */ 803 Mat A; 804 PetscInt M = mat->rmap->N, N = mat->cmap->N, m, row, i, nz; 805 PetscInt *cols; 806 PetscScalar *vals; 807 808 PetscCall(MatCreate(PetscObjectComm((PetscObject)mat), &A)); 809 if (rank == 0) { 810 PetscCall(MatSetSizes(A, M, N, M, N)); 811 } else { 812 PetscCall(MatSetSizes(A, 0, 0, M, N)); 813 } 814 /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */ 815 PetscCall(MatSetType(A, MATMPIDENSE)); 816 PetscCall(MatMPIDenseSetPreallocation(A, NULL)); 817 818 /* Copy the matrix ... This isn't the most efficient means, 819 but it's quick for now */ 820 A->insertmode = INSERT_VALUES; 821 822 row = mat->rmap->rstart; 823 m = mdn->A->rmap->n; 824 for (i = 0; i < m; i++) { 825 PetscCall(MatGetRow_MPIDense(mat, row, &nz, &cols, &vals)); 826 PetscCall(MatSetValues_MPIDense(A, 1, &row, nz, cols, vals, INSERT_VALUES)); 827 PetscCall(MatRestoreRow_MPIDense(mat, row, &nz, &cols, &vals)); 828 row++; 829 } 830 831 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 832 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 833 PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 834 if (rank == 0) { 835 PetscCall(PetscObjectSetName((PetscObject)((Mat_MPIDense *)A->data)->A, ((PetscObject)mat)->name)); 836 PetscCall(MatView_SeqDense(((Mat_MPIDense *)A->data)->A, sviewer)); 837 } 838 PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 839 PetscCall(MatDestroy(&A)); 840 } 841 PetscFunctionReturn(PETSC_SUCCESS); 842 } 843 844 static PetscErrorCode MatView_MPIDense(Mat mat, PetscViewer viewer) 845 { 846 PetscBool iascii, isbinary, isdraw, issocket; 847 848 PetscFunctionBegin; 849 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 850 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 851 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket)); 852 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 853 854 if (iascii || issocket || isdraw) { 855 PetscCall(MatView_MPIDense_ASCIIorDraworSocket(mat, viewer)); 856 } else if (isbinary) PetscCall(MatView_Dense_Binary(mat, viewer)); 857 PetscFunctionReturn(PETSC_SUCCESS); 858 } 859 860 static PetscErrorCode MatGetInfo_MPIDense(Mat A, MatInfoType flag, MatInfo *info) 861 { 862 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 863 Mat mdn = mat->A; 864 PetscLogDouble isend[5], irecv[5]; 865 866 PetscFunctionBegin; 867 info->block_size = 1.0; 868 869 PetscCall(MatGetInfo(mdn, MAT_LOCAL, info)); 870 871 isend[0] = info->nz_used; 872 isend[1] = info->nz_allocated; 873 isend[2] = info->nz_unneeded; 874 isend[3] = info->memory; 875 isend[4] = info->mallocs; 876 if (flag == MAT_LOCAL) { 877 info->nz_used = isend[0]; 878 info->nz_allocated = isend[1]; 879 info->nz_unneeded = isend[2]; 880 info->memory = isend[3]; 881 info->mallocs = isend[4]; 882 } else if (flag == MAT_GLOBAL_MAX) { 883 PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)A))); 884 885 info->nz_used = irecv[0]; 886 info->nz_allocated = irecv[1]; 887 info->nz_unneeded = irecv[2]; 888 info->memory = irecv[3]; 889 info->mallocs = irecv[4]; 890 } else if (flag == MAT_GLOBAL_SUM) { 891 PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)A))); 892 893 info->nz_used = irecv[0]; 894 info->nz_allocated = irecv[1]; 895 info->nz_unneeded = irecv[2]; 896 info->memory = irecv[3]; 897 info->mallocs = irecv[4]; 898 } 899 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 900 info->fill_ratio_needed = 0; 901 info->factor_mallocs = 0; 902 PetscFunctionReturn(PETSC_SUCCESS); 903 } 904 905 static PetscErrorCode MatSetOption_MPIDense(Mat A, MatOption op, PetscBool flg) 906 { 907 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 908 909 PetscFunctionBegin; 910 switch (op) { 911 case MAT_NEW_NONZERO_LOCATIONS: 912 case MAT_NEW_NONZERO_LOCATION_ERR: 913 case MAT_NEW_NONZERO_ALLOCATION_ERR: 914 MatCheckPreallocated(A, 1); 915 PetscCall(MatSetOption(a->A, op, flg)); 916 break; 917 case MAT_ROW_ORIENTED: 918 MatCheckPreallocated(A, 1); 919 a->roworiented = flg; 920 PetscCall(MatSetOption(a->A, op, flg)); 921 break; 922 case MAT_FORCE_DIAGONAL_ENTRIES: 923 case MAT_KEEP_NONZERO_PATTERN: 924 case MAT_USE_HASH_TABLE: 925 case MAT_SORTED_FULL: 926 PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op])); 927 break; 928 case MAT_IGNORE_OFF_PROC_ENTRIES: 929 a->donotstash = flg; 930 break; 931 case MAT_SYMMETRIC: 932 case MAT_STRUCTURALLY_SYMMETRIC: 933 case MAT_HERMITIAN: 934 case MAT_SYMMETRY_ETERNAL: 935 case MAT_STRUCTURAL_SYMMETRY_ETERNAL: 936 case MAT_SPD: 937 case MAT_IGNORE_LOWER_TRIANGULAR: 938 case MAT_IGNORE_ZERO_ENTRIES: 939 case MAT_SPD_ETERNAL: 940 /* if the diagonal matrix is square it inherits some of the properties above */ 941 PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op])); 942 break; 943 default: 944 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %s", MatOptions[op]); 945 } 946 PetscFunctionReturn(PETSC_SUCCESS); 947 } 948 949 static PetscErrorCode MatDiagonalScale_MPIDense(Mat A, Vec ll, Vec rr) 950 { 951 Mat_MPIDense *mdn = (Mat_MPIDense *)A->data; 952 const PetscScalar *l; 953 PetscScalar x, *v, *vv, *r; 954 PetscInt i, j, s2a, s3a, s2, s3, m = mdn->A->rmap->n, n = mdn->A->cmap->n, lda; 955 956 PetscFunctionBegin; 957 PetscCall(MatDenseGetArray(mdn->A, &vv)); 958 PetscCall(MatDenseGetLDA(mdn->A, &lda)); 959 PetscCall(MatGetLocalSize(A, &s2, &s3)); 960 if (ll) { 961 PetscCall(VecGetLocalSize(ll, &s2a)); 962 PetscCheck(s2a == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Left scaling vector non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT, s2a, s2); 963 PetscCall(VecGetArrayRead(ll, &l)); 964 for (i = 0; i < m; i++) { 965 x = l[i]; 966 v = vv + i; 967 for (j = 0; j < n; j++) { 968 (*v) *= x; 969 v += lda; 970 } 971 } 972 PetscCall(VecRestoreArrayRead(ll, &l)); 973 PetscCall(PetscLogFlops(1.0 * n * m)); 974 } 975 if (rr) { 976 const PetscScalar *ar; 977 978 PetscCall(VecGetLocalSize(rr, &s3a)); 979 PetscCheck(s3a == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Right scaling vec non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT ".", s3a, s3); 980 PetscCall(VecGetArrayRead(rr, &ar)); 981 if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A)); 982 PetscCall(VecGetArray(mdn->lvec, &r)); 983 PetscCall(PetscSFBcastBegin(mdn->Mvctx, MPIU_SCALAR, ar, r, MPI_REPLACE)); 984 PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ar, r, MPI_REPLACE)); 985 PetscCall(VecRestoreArrayRead(rr, &ar)); 986 for (i = 0; i < n; i++) { 987 x = r[i]; 988 v = vv + i * lda; 989 for (j = 0; j < m; j++) (*v++) *= x; 990 } 991 PetscCall(VecRestoreArray(mdn->lvec, &r)); 992 PetscCall(PetscLogFlops(1.0 * n * m)); 993 } 994 PetscCall(MatDenseRestoreArray(mdn->A, &vv)); 995 PetscFunctionReturn(PETSC_SUCCESS); 996 } 997 998 static PetscErrorCode MatNorm_MPIDense(Mat A, NormType type, PetscReal *nrm) 999 { 1000 Mat_MPIDense *mdn = (Mat_MPIDense *)A->data; 1001 PetscInt i, j; 1002 PetscMPIInt size, iN; 1003 PetscReal sum = 0.0; 1004 const PetscScalar *av, *v; 1005 1006 PetscFunctionBegin; 1007 PetscCall(MatDenseGetArrayRead(mdn->A, &av)); 1008 v = av; 1009 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 1010 if (size == 1) { 1011 PetscCall(MatNorm(mdn->A, type, nrm)); 1012 } else { 1013 if (type == NORM_FROBENIUS) { 1014 for (i = 0; i < mdn->A->cmap->n * mdn->A->rmap->n; i++) { 1015 sum += PetscRealPart(PetscConj(*v) * (*v)); 1016 v++; 1017 } 1018 PetscCallMPI(MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A))); 1019 *nrm = PetscSqrtReal(*nrm); 1020 PetscCall(PetscLogFlops(2.0 * mdn->A->cmap->n * mdn->A->rmap->n)); 1021 } else if (type == NORM_1) { 1022 PetscReal *tmp, *tmp2; 1023 PetscCall(PetscCalloc2(A->cmap->N, &tmp, A->cmap->N, &tmp2)); 1024 *nrm = 0.0; 1025 v = av; 1026 for (j = 0; j < mdn->A->cmap->n; j++) { 1027 for (i = 0; i < mdn->A->rmap->n; i++) { 1028 tmp[j] += PetscAbsScalar(*v); 1029 v++; 1030 } 1031 } 1032 PetscCall(PetscMPIIntCast(A->cmap->N, &iN)); 1033 PetscCallMPI(MPIU_Allreduce(tmp, tmp2, iN, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A))); 1034 for (j = 0; j < A->cmap->N; j++) { 1035 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 1036 } 1037 PetscCall(PetscFree2(tmp, tmp2)); 1038 PetscCall(PetscLogFlops(A->cmap->n * A->rmap->n)); 1039 } else if (type == NORM_INFINITY) { /* max row norm */ 1040 PetscReal ntemp; 1041 PetscCall(MatNorm(mdn->A, type, &ntemp)); 1042 PetscCallMPI(MPIU_Allreduce(&ntemp, nrm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A))); 1043 } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "No support for two norm"); 1044 } 1045 PetscCall(MatDenseRestoreArrayRead(mdn->A, &av)); 1046 PetscFunctionReturn(PETSC_SUCCESS); 1047 } 1048 1049 static PetscErrorCode MatTranspose_MPIDense(Mat A, MatReuse reuse, Mat *matout) 1050 { 1051 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1052 Mat B; 1053 PetscInt M = A->rmap->N, N = A->cmap->N, m, n, *rwork, rstart = A->rmap->rstart; 1054 PetscInt j, i, lda; 1055 PetscScalar *v; 1056 1057 PetscFunctionBegin; 1058 if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout)); 1059 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) { 1060 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 1061 PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M)); 1062 PetscCall(MatSetType(B, ((PetscObject)A)->type_name)); 1063 PetscCall(MatMPIDenseSetPreallocation(B, NULL)); 1064 } else B = *matout; 1065 1066 m = a->A->rmap->n; 1067 n = a->A->cmap->n; 1068 PetscCall(MatDenseGetArrayRead(a->A, (const PetscScalar **)&v)); 1069 PetscCall(MatDenseGetLDA(a->A, &lda)); 1070 PetscCall(PetscMalloc1(m, &rwork)); 1071 for (i = 0; i < m; i++) rwork[i] = rstart + i; 1072 for (j = 0; j < n; j++) { 1073 PetscCall(MatSetValues(B, 1, &j, m, rwork, v, INSERT_VALUES)); 1074 v = PetscSafePointerPlusOffset(v, lda); 1075 } 1076 PetscCall(MatDenseRestoreArrayRead(a->A, (const PetscScalar **)&v)); 1077 PetscCall(PetscFree(rwork)); 1078 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 1079 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 1080 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 1081 *matout = B; 1082 } else { 1083 PetscCall(MatHeaderMerge(A, &B)); 1084 } 1085 PetscFunctionReturn(PETSC_SUCCESS); 1086 } 1087 1088 static PetscErrorCode MatDuplicate_MPIDense(Mat, MatDuplicateOption, Mat *); 1089 PETSC_INTERN PetscErrorCode MatScale_MPIDense(Mat, PetscScalar); 1090 1091 static PetscErrorCode MatSetUp_MPIDense(Mat A) 1092 { 1093 PetscFunctionBegin; 1094 PetscCall(PetscLayoutSetUp(A->rmap)); 1095 PetscCall(PetscLayoutSetUp(A->cmap)); 1096 if (!A->preallocated) PetscCall(MatMPIDenseSetPreallocation(A, NULL)); 1097 PetscFunctionReturn(PETSC_SUCCESS); 1098 } 1099 1100 static PetscErrorCode MatAXPY_MPIDense(Mat Y, PetscScalar alpha, Mat X, MatStructure str) 1101 { 1102 Mat_MPIDense *A = (Mat_MPIDense *)Y->data, *B = (Mat_MPIDense *)X->data; 1103 1104 PetscFunctionBegin; 1105 PetscCall(MatAXPY(A->A, alpha, B->A, str)); 1106 PetscFunctionReturn(PETSC_SUCCESS); 1107 } 1108 1109 static PetscErrorCode MatConjugate_MPIDense(Mat mat) 1110 { 1111 Mat_MPIDense *a = (Mat_MPIDense *)mat->data; 1112 1113 PetscFunctionBegin; 1114 PetscCall(MatConjugate(a->A)); 1115 PetscFunctionReturn(PETSC_SUCCESS); 1116 } 1117 1118 static PetscErrorCode MatRealPart_MPIDense(Mat A) 1119 { 1120 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1121 1122 PetscFunctionBegin; 1123 PetscCall(MatRealPart(a->A)); 1124 PetscFunctionReturn(PETSC_SUCCESS); 1125 } 1126 1127 static PetscErrorCode MatImaginaryPart_MPIDense(Mat A) 1128 { 1129 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1130 1131 PetscFunctionBegin; 1132 PetscCall(MatImaginaryPart(a->A)); 1133 PetscFunctionReturn(PETSC_SUCCESS); 1134 } 1135 1136 static PetscErrorCode MatGetColumnVector_MPIDense(Mat A, Vec v, PetscInt col) 1137 { 1138 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1139 1140 PetscFunctionBegin; 1141 PetscCheck(a->A, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Missing local matrix"); 1142 PetscCheck(a->A->ops->getcolumnvector, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Missing get column operation"); 1143 PetscCall((*a->A->ops->getcolumnvector)(a->A, v, col)); 1144 PetscFunctionReturn(PETSC_SUCCESS); 1145 } 1146 1147 PETSC_INTERN PetscErrorCode MatGetColumnReductions_SeqDense(Mat, PetscInt, PetscReal *); 1148 1149 static PetscErrorCode MatGetColumnReductions_MPIDense(Mat A, PetscInt type, PetscReal *reductions) 1150 { 1151 PetscInt i, m, n; 1152 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1153 PetscReal *work; 1154 PetscMPIInt in; 1155 1156 PetscFunctionBegin; 1157 PetscCall(MatGetSize(A, &m, &n)); 1158 PetscCall(PetscMalloc1(n, &work)); 1159 if (type == REDUCTION_MEAN_REALPART) { 1160 PetscCall(MatGetColumnReductions_SeqDense(a->A, (PetscInt)REDUCTION_SUM_REALPART, work)); 1161 } else if (type == REDUCTION_MEAN_IMAGINARYPART) { 1162 PetscCall(MatGetColumnReductions_SeqDense(a->A, (PetscInt)REDUCTION_SUM_IMAGINARYPART, work)); 1163 } else { 1164 PetscCall(MatGetColumnReductions_SeqDense(a->A, type, work)); 1165 } 1166 if (type == NORM_2) { 1167 for (i = 0; i < n; i++) work[i] *= work[i]; 1168 } 1169 PetscCall(PetscMPIIntCast(n, &in)); 1170 if (type == NORM_INFINITY) { 1171 PetscCallMPI(MPIU_Allreduce(work, reductions, in, MPIU_REAL, MPIU_MAX, A->hdr.comm)); 1172 } else { 1173 PetscCallMPI(MPIU_Allreduce(work, reductions, in, MPIU_REAL, MPIU_SUM, A->hdr.comm)); 1174 } 1175 PetscCall(PetscFree(work)); 1176 if (type == NORM_2) { 1177 for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]); 1178 } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 1179 for (i = 0; i < n; i++) reductions[i] /= m; 1180 } 1181 PetscFunctionReturn(PETSC_SUCCESS); 1182 } 1183 1184 static PetscErrorCode MatSetRandom_MPIDense(Mat x, PetscRandom rctx) 1185 { 1186 Mat_MPIDense *d = (Mat_MPIDense *)x->data; 1187 1188 PetscFunctionBegin; 1189 PetscCall(MatSetRandom(d->A, rctx)); 1190 #if defined(PETSC_HAVE_DEVICE) 1191 x->offloadmask = d->A->offloadmask; 1192 #endif 1193 PetscFunctionReturn(PETSC_SUCCESS); 1194 } 1195 1196 static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A, PetscBool *missing, PetscInt *d) 1197 { 1198 PetscFunctionBegin; 1199 *missing = PETSC_FALSE; 1200 PetscFunctionReturn(PETSC_SUCCESS); 1201 } 1202 1203 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat, Mat, PetscReal, Mat); 1204 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat, Mat, Mat); 1205 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat, Mat, PetscReal, Mat); 1206 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat, Mat, Mat); 1207 static PetscErrorCode MatEqual_MPIDense(Mat, Mat, PetscBool *); 1208 static PetscErrorCode MatLoad_MPIDense(Mat, PetscViewer); 1209 static PetscErrorCode MatProductSetFromOptions_MPIDense(Mat); 1210 1211 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense, 1212 MatGetRow_MPIDense, 1213 MatRestoreRow_MPIDense, 1214 MatMult_MPIDense, 1215 /* 4*/ MatMultAdd_MPIDense, 1216 MatMultTranspose_MPIDense, 1217 MatMultTransposeAdd_MPIDense, 1218 NULL, 1219 NULL, 1220 NULL, 1221 /* 10*/ NULL, 1222 NULL, 1223 NULL, 1224 NULL, 1225 MatTranspose_MPIDense, 1226 /* 15*/ MatGetInfo_MPIDense, 1227 MatEqual_MPIDense, 1228 MatGetDiagonal_MPIDense, 1229 MatDiagonalScale_MPIDense, 1230 MatNorm_MPIDense, 1231 /* 20*/ MatAssemblyBegin_MPIDense, 1232 MatAssemblyEnd_MPIDense, 1233 MatSetOption_MPIDense, 1234 MatZeroEntries_MPIDense, 1235 /* 24*/ MatZeroRows_MPIDense, 1236 NULL, 1237 NULL, 1238 NULL, 1239 NULL, 1240 /* 29*/ MatSetUp_MPIDense, 1241 NULL, 1242 NULL, 1243 MatGetDiagonalBlock_MPIDense, 1244 NULL, 1245 /* 34*/ MatDuplicate_MPIDense, 1246 NULL, 1247 NULL, 1248 NULL, 1249 NULL, 1250 /* 39*/ MatAXPY_MPIDense, 1251 MatCreateSubMatrices_MPIDense, 1252 NULL, 1253 MatGetValues_MPIDense, 1254 MatCopy_MPIDense, 1255 /* 44*/ NULL, 1256 MatScale_MPIDense, 1257 MatShift_MPIDense, 1258 NULL, 1259 NULL, 1260 /* 49*/ MatSetRandom_MPIDense, 1261 NULL, 1262 NULL, 1263 NULL, 1264 NULL, 1265 /* 54*/ NULL, 1266 NULL, 1267 NULL, 1268 NULL, 1269 NULL, 1270 /* 59*/ MatCreateSubMatrix_MPIDense, 1271 MatDestroy_MPIDense, 1272 MatView_MPIDense, 1273 NULL, 1274 NULL, 1275 /* 64*/ NULL, 1276 NULL, 1277 NULL, 1278 NULL, 1279 NULL, 1280 /* 69*/ NULL, 1281 NULL, 1282 NULL, 1283 NULL, 1284 NULL, 1285 /* 74*/ NULL, 1286 NULL, 1287 NULL, 1288 NULL, 1289 NULL, 1290 /* 79*/ NULL, 1291 NULL, 1292 NULL, 1293 NULL, 1294 /* 83*/ MatLoad_MPIDense, 1295 NULL, 1296 NULL, 1297 NULL, 1298 NULL, 1299 NULL, 1300 /* 89*/ NULL, 1301 NULL, 1302 NULL, 1303 NULL, 1304 NULL, 1305 /* 94*/ NULL, 1306 NULL, 1307 MatMatTransposeMultSymbolic_MPIDense_MPIDense, 1308 MatMatTransposeMultNumeric_MPIDense_MPIDense, 1309 NULL, 1310 /* 99*/ MatProductSetFromOptions_MPIDense, 1311 NULL, 1312 NULL, 1313 MatConjugate_MPIDense, 1314 NULL, 1315 /*104*/ NULL, 1316 MatRealPart_MPIDense, 1317 MatImaginaryPart_MPIDense, 1318 NULL, 1319 NULL, 1320 /*109*/ NULL, 1321 NULL, 1322 NULL, 1323 MatGetColumnVector_MPIDense, 1324 MatMissingDiagonal_MPIDense, 1325 /*114*/ NULL, 1326 NULL, 1327 NULL, 1328 NULL, 1329 NULL, 1330 /*119*/ NULL, 1331 NULL, 1332 MatMultHermitianTranspose_MPIDense, 1333 MatMultHermitianTransposeAdd_MPIDense, 1334 NULL, 1335 /*124*/ NULL, 1336 MatGetColumnReductions_MPIDense, 1337 NULL, 1338 NULL, 1339 NULL, 1340 /*129*/ NULL, 1341 NULL, 1342 MatTransposeMatMultSymbolic_MPIDense_MPIDense, 1343 MatTransposeMatMultNumeric_MPIDense_MPIDense, 1344 NULL, 1345 /*134*/ NULL, 1346 NULL, 1347 NULL, 1348 NULL, 1349 NULL, 1350 /*139*/ NULL, 1351 NULL, 1352 NULL, 1353 NULL, 1354 NULL, 1355 MatCreateMPIMatConcatenateSeqMat_MPIDense, 1356 /*145*/ NULL, 1357 NULL, 1358 NULL, 1359 NULL, 1360 NULL, 1361 /*150*/ NULL, 1362 NULL, 1363 NULL, 1364 NULL, 1365 NULL, 1366 NULL}; 1367 1368 static PetscErrorCode MatMPIDenseSetPreallocation_MPIDense(Mat mat, PetscScalar *data) 1369 { 1370 Mat_MPIDense *a = (Mat_MPIDense *)mat->data; 1371 MatType mtype = MATSEQDENSE; 1372 1373 PetscFunctionBegin; 1374 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1375 PetscCall(PetscLayoutSetUp(mat->rmap)); 1376 PetscCall(PetscLayoutSetUp(mat->cmap)); 1377 if (!a->A) { 1378 PetscCall(MatCreate(PETSC_COMM_SELF, &a->A)); 1379 PetscCall(MatSetSizes(a->A, mat->rmap->n, mat->cmap->N, mat->rmap->n, mat->cmap->N)); 1380 } 1381 #if defined(PETSC_HAVE_CUDA) 1382 PetscBool iscuda; 1383 PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPIDENSECUDA, &iscuda)); 1384 if (iscuda) mtype = MATSEQDENSECUDA; 1385 #endif 1386 #if defined(PETSC_HAVE_HIP) 1387 PetscBool iship; 1388 PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPIDENSEHIP, &iship)); 1389 if (iship) mtype = MATSEQDENSEHIP; 1390 #endif 1391 PetscCall(MatSetType(a->A, mtype)); 1392 PetscCall(MatSeqDenseSetPreallocation(a->A, data)); 1393 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 1394 mat->offloadmask = a->A->offloadmask; 1395 #endif 1396 mat->preallocated = PETSC_TRUE; 1397 mat->assembled = PETSC_TRUE; 1398 PetscFunctionReturn(PETSC_SUCCESS); 1399 } 1400 1401 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIDense(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 1402 { 1403 Mat B, C; 1404 1405 PetscFunctionBegin; 1406 PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &C)); 1407 PetscCall(MatConvert_SeqAIJ_SeqDense(C, MATSEQDENSE, MAT_INITIAL_MATRIX, &B)); 1408 PetscCall(MatDestroy(&C)); 1409 if (reuse == MAT_REUSE_MATRIX) { 1410 C = *newmat; 1411 } else C = NULL; 1412 PetscCall(MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A), B, A->cmap->n, !C ? MAT_INITIAL_MATRIX : MAT_REUSE_MATRIX, &C)); 1413 PetscCall(MatDestroy(&B)); 1414 if (reuse == MAT_INPLACE_MATRIX) { 1415 PetscCall(MatHeaderReplace(A, &C)); 1416 } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C; 1417 PetscFunctionReturn(PETSC_SUCCESS); 1418 } 1419 1420 static PetscErrorCode MatConvert_MPIDense_MPIAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 1421 { 1422 Mat B, C; 1423 1424 PetscFunctionBegin; 1425 PetscCall(MatDenseGetLocalMatrix(A, &C)); 1426 PetscCall(MatConvert_SeqDense_SeqAIJ(C, MATSEQAIJ, MAT_INITIAL_MATRIX, &B)); 1427 if (reuse == MAT_REUSE_MATRIX) { 1428 C = *newmat; 1429 } else C = NULL; 1430 PetscCall(MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A), B, A->cmap->n, !C ? MAT_INITIAL_MATRIX : MAT_REUSE_MATRIX, &C)); 1431 PetscCall(MatDestroy(&B)); 1432 if (reuse == MAT_INPLACE_MATRIX) { 1433 PetscCall(MatHeaderReplace(A, &C)); 1434 } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C; 1435 PetscFunctionReturn(PETSC_SUCCESS); 1436 } 1437 1438 #if defined(PETSC_HAVE_ELEMENTAL) 1439 PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 1440 { 1441 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1442 Mat mat_elemental; 1443 PetscScalar *v; 1444 PetscInt m = A->rmap->n, N = A->cmap->N, rstart = A->rmap->rstart, i, *rows, *cols, lda; 1445 1446 PetscFunctionBegin; 1447 if (reuse == MAT_REUSE_MATRIX) { 1448 mat_elemental = *newmat; 1449 PetscCall(MatZeroEntries(*newmat)); 1450 } else { 1451 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental)); 1452 PetscCall(MatSetSizes(mat_elemental, PETSC_DECIDE, PETSC_DECIDE, A->rmap->N, A->cmap->N)); 1453 PetscCall(MatSetType(mat_elemental, MATELEMENTAL)); 1454 PetscCall(MatSetUp(mat_elemental)); 1455 PetscCall(MatSetOption(mat_elemental, MAT_ROW_ORIENTED, PETSC_FALSE)); 1456 } 1457 1458 PetscCall(PetscMalloc2(m, &rows, N, &cols)); 1459 for (i = 0; i < N; i++) cols[i] = i; 1460 for (i = 0; i < m; i++) rows[i] = rstart + i; 1461 1462 /* PETSc-Elemental interface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */ 1463 PetscCall(MatDenseGetArray(A, &v)); 1464 PetscCall(MatDenseGetLDA(a->A, &lda)); 1465 if (lda == m) PetscCall(MatSetValues(mat_elemental, m, rows, N, cols, v, ADD_VALUES)); 1466 else { 1467 for (i = 0; i < N; i++) PetscCall(MatSetValues(mat_elemental, m, rows, 1, &i, v + lda * i, ADD_VALUES)); 1468 } 1469 PetscCall(MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY)); 1470 PetscCall(MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY)); 1471 PetscCall(MatDenseRestoreArray(A, &v)); 1472 PetscCall(PetscFree2(rows, cols)); 1473 1474 if (reuse == MAT_INPLACE_MATRIX) { 1475 PetscCall(MatHeaderReplace(A, &mat_elemental)); 1476 } else { 1477 *newmat = mat_elemental; 1478 } 1479 PetscFunctionReturn(PETSC_SUCCESS); 1480 } 1481 #endif 1482 1483 static PetscErrorCode MatDenseGetColumn_MPIDense(Mat A, PetscInt col, PetscScalar **vals) 1484 { 1485 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 1486 1487 PetscFunctionBegin; 1488 PetscCall(MatDenseGetColumn(mat->A, col, vals)); 1489 PetscFunctionReturn(PETSC_SUCCESS); 1490 } 1491 1492 static PetscErrorCode MatDenseRestoreColumn_MPIDense(Mat A, PetscScalar **vals) 1493 { 1494 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 1495 1496 PetscFunctionBegin; 1497 PetscCall(MatDenseRestoreColumn(mat->A, vals)); 1498 PetscFunctionReturn(PETSC_SUCCESS); 1499 } 1500 1501 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIDense(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat) 1502 { 1503 Mat_MPIDense *mat; 1504 PetscInt m, nloc, N; 1505 1506 PetscFunctionBegin; 1507 PetscCall(MatGetSize(inmat, &m, &N)); 1508 PetscCall(MatGetLocalSize(inmat, NULL, &nloc)); 1509 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 1510 PetscInt sum; 1511 1512 if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnership(comm, &n, &N)); 1513 /* Check sum(n) = N */ 1514 PetscCallMPI(MPIU_Allreduce(&n, &sum, 1, MPIU_INT, MPI_SUM, comm)); 1515 PetscCheck(sum == N, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT, sum, N); 1516 1517 PetscCall(MatCreateDense(comm, m, n, PETSC_DETERMINE, N, NULL, outmat)); 1518 PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 1519 } 1520 1521 /* numeric phase */ 1522 mat = (Mat_MPIDense *)(*outmat)->data; 1523 PetscCall(MatCopy(inmat, mat->A, SAME_NONZERO_PATTERN)); 1524 PetscFunctionReturn(PETSC_SUCCESS); 1525 } 1526 1527 PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat A, PetscInt col, Vec *v) 1528 { 1529 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1530 PetscInt lda; 1531 1532 PetscFunctionBegin; 1533 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1534 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1535 if (!a->cvec) PetscCall(MatDenseCreateColumnVec_Private(A, &a->cvec)); 1536 a->vecinuse = col + 1; 1537 PetscCall(MatDenseGetLDA(a->A, &lda)); 1538 PetscCall(MatDenseGetArray(a->A, (PetscScalar **)&a->ptrinuse)); 1539 PetscCall(VecPlaceArray(a->cvec, PetscSafePointerPlusOffset(a->ptrinuse, (size_t)col * (size_t)lda))); 1540 *v = a->cvec; 1541 PetscFunctionReturn(PETSC_SUCCESS); 1542 } 1543 1544 PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat A, PetscInt col, Vec *v) 1545 { 1546 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1547 1548 PetscFunctionBegin; 1549 PetscCheck(a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first"); 1550 PetscCheck(a->cvec, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing internal column vector"); 1551 VecCheckAssembled(a->cvec); 1552 a->vecinuse = 0; 1553 PetscCall(MatDenseRestoreArray(a->A, (PetscScalar **)&a->ptrinuse)); 1554 PetscCall(VecResetArray(a->cvec)); 1555 if (v) *v = NULL; 1556 PetscFunctionReturn(PETSC_SUCCESS); 1557 } 1558 1559 PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat A, PetscInt col, Vec *v) 1560 { 1561 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1562 PetscInt lda; 1563 1564 PetscFunctionBegin; 1565 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1566 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1567 if (!a->cvec) PetscCall(MatDenseCreateColumnVec_Private(A, &a->cvec)); 1568 a->vecinuse = col + 1; 1569 PetscCall(MatDenseGetLDA(a->A, &lda)); 1570 PetscCall(MatDenseGetArrayRead(a->A, &a->ptrinuse)); 1571 PetscCall(VecPlaceArray(a->cvec, PetscSafePointerPlusOffset(a->ptrinuse, (size_t)col * (size_t)lda))); 1572 PetscCall(VecLockReadPush(a->cvec)); 1573 *v = a->cvec; 1574 PetscFunctionReturn(PETSC_SUCCESS); 1575 } 1576 1577 PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat A, PetscInt col, Vec *v) 1578 { 1579 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1580 1581 PetscFunctionBegin; 1582 PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first"); 1583 PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector"); 1584 VecCheckAssembled(a->cvec); 1585 a->vecinuse = 0; 1586 PetscCall(MatDenseRestoreArrayRead(a->A, &a->ptrinuse)); 1587 PetscCall(VecLockReadPop(a->cvec)); 1588 PetscCall(VecResetArray(a->cvec)); 1589 if (v) *v = NULL; 1590 PetscFunctionReturn(PETSC_SUCCESS); 1591 } 1592 1593 PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat A, PetscInt col, Vec *v) 1594 { 1595 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1596 PetscInt lda; 1597 1598 PetscFunctionBegin; 1599 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1600 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1601 if (!a->cvec) PetscCall(MatDenseCreateColumnVec_Private(A, &a->cvec)); 1602 a->vecinuse = col + 1; 1603 PetscCall(MatDenseGetLDA(a->A, &lda)); 1604 PetscCall(MatDenseGetArrayWrite(a->A, (PetscScalar **)&a->ptrinuse)); 1605 PetscCall(VecPlaceArray(a->cvec, PetscSafePointerPlusOffset(a->ptrinuse, (size_t)col * (size_t)lda))); 1606 *v = a->cvec; 1607 PetscFunctionReturn(PETSC_SUCCESS); 1608 } 1609 1610 PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat A, PetscInt col, Vec *v) 1611 { 1612 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1613 1614 PetscFunctionBegin; 1615 PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first"); 1616 PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector"); 1617 VecCheckAssembled(a->cvec); 1618 a->vecinuse = 0; 1619 PetscCall(MatDenseRestoreArrayWrite(a->A, (PetscScalar **)&a->ptrinuse)); 1620 PetscCall(VecResetArray(a->cvec)); 1621 if (v) *v = NULL; 1622 PetscFunctionReturn(PETSC_SUCCESS); 1623 } 1624 1625 static PetscErrorCode MatDenseGetSubMatrix_MPIDense(Mat A, PetscInt rbegin, PetscInt rend, PetscInt cbegin, PetscInt cend, Mat *v) 1626 { 1627 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1628 Mat_MPIDense *c; 1629 MPI_Comm comm; 1630 PetscInt pbegin, pend; 1631 1632 PetscFunctionBegin; 1633 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 1634 PetscCheck(!a->vecinuse, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1635 PetscCheck(!a->matinuse, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1636 pbegin = PetscMax(0, PetscMin(A->rmap->rend, rbegin) - A->rmap->rstart); 1637 pend = PetscMin(A->rmap->n, PetscMax(0, rend - A->rmap->rstart)); 1638 if (!a->cmat) { 1639 PetscCall(MatCreate(comm, &a->cmat)); 1640 PetscCall(MatSetType(a->cmat, ((PetscObject)A)->type_name)); 1641 if (rend - rbegin == A->rmap->N) PetscCall(PetscLayoutReference(A->rmap, &a->cmat->rmap)); 1642 else { 1643 PetscCall(PetscLayoutSetLocalSize(a->cmat->rmap, pend - pbegin)); 1644 PetscCall(PetscLayoutSetSize(a->cmat->rmap, rend - rbegin)); 1645 PetscCall(PetscLayoutSetUp(a->cmat->rmap)); 1646 } 1647 PetscCall(PetscLayoutSetSize(a->cmat->cmap, cend - cbegin)); 1648 PetscCall(PetscLayoutSetUp(a->cmat->cmap)); 1649 } else { 1650 PetscBool same = (PetscBool)(rend - rbegin == a->cmat->rmap->N); 1651 if (same && a->cmat->rmap->N != A->rmap->N) { 1652 same = (PetscBool)(pend - pbegin == a->cmat->rmap->n); 1653 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &same, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A))); 1654 } 1655 if (!same) { 1656 PetscCall(PetscLayoutDestroy(&a->cmat->rmap)); 1657 PetscCall(PetscLayoutCreate(comm, &a->cmat->rmap)); 1658 PetscCall(PetscLayoutSetLocalSize(a->cmat->rmap, pend - pbegin)); 1659 PetscCall(PetscLayoutSetSize(a->cmat->rmap, rend - rbegin)); 1660 PetscCall(PetscLayoutSetUp(a->cmat->rmap)); 1661 } 1662 if (cend - cbegin != a->cmat->cmap->N) { 1663 PetscCall(PetscLayoutDestroy(&a->cmat->cmap)); 1664 PetscCall(PetscLayoutCreate(comm, &a->cmat->cmap)); 1665 PetscCall(PetscLayoutSetSize(a->cmat->cmap, cend - cbegin)); 1666 PetscCall(PetscLayoutSetUp(a->cmat->cmap)); 1667 } 1668 } 1669 c = (Mat_MPIDense *)a->cmat->data; 1670 PetscCheck(!c->A, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1671 PetscCall(MatDenseGetSubMatrix(a->A, pbegin, pend, cbegin, cend, &c->A)); 1672 1673 a->cmat->preallocated = PETSC_TRUE; 1674 a->cmat->assembled = PETSC_TRUE; 1675 #if defined(PETSC_HAVE_DEVICE) 1676 a->cmat->offloadmask = c->A->offloadmask; 1677 #endif 1678 a->matinuse = cbegin + 1; 1679 *v = a->cmat; 1680 PetscFunctionReturn(PETSC_SUCCESS); 1681 } 1682 1683 static PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat A, Mat *v) 1684 { 1685 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1686 Mat_MPIDense *c; 1687 1688 PetscFunctionBegin; 1689 PetscCheck(a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetSubMatrix() first"); 1690 PetscCheck(a->cmat, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal matrix"); 1691 PetscCheck(*v == a->cmat, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Not the matrix obtained from MatDenseGetSubMatrix()"); 1692 a->matinuse = 0; 1693 c = (Mat_MPIDense *)a->cmat->data; 1694 PetscCall(MatDenseRestoreSubMatrix(a->A, &c->A)); 1695 if (v) *v = NULL; 1696 #if defined(PETSC_HAVE_DEVICE) 1697 A->offloadmask = a->A->offloadmask; 1698 #endif 1699 PetscFunctionReturn(PETSC_SUCCESS); 1700 } 1701 1702 /*MC 1703 MATMPIDENSE - MATMPIDENSE = "mpidense" - A matrix type to be used for distributed dense matrices. 1704 1705 Options Database Key: 1706 . -mat_type mpidense - sets the matrix type to `MATMPIDENSE` during a call to `MatSetFromOptions()` 1707 1708 Level: beginner 1709 1710 .seealso: [](ch_matrices), `Mat`, `MatCreateDense()`, `MATSEQDENSE`, `MATDENSE` 1711 M*/ 1712 PetscErrorCode MatCreate_MPIDense(Mat mat) 1713 { 1714 Mat_MPIDense *a; 1715 1716 PetscFunctionBegin; 1717 PetscCall(PetscNew(&a)); 1718 mat->data = (void *)a; 1719 mat->ops[0] = MatOps_Values; 1720 1721 mat->insertmode = NOT_SET_VALUES; 1722 1723 /* build cache for off array entries formed */ 1724 a->donotstash = PETSC_FALSE; 1725 1726 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)mat), 1, &mat->stash)); 1727 1728 /* stuff used for matrix vector multiply */ 1729 a->lvec = NULL; 1730 a->Mvctx = NULL; 1731 a->roworiented = PETSC_TRUE; 1732 1733 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetLDA_C", MatDenseGetLDA_MPIDense)); 1734 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseSetLDA_C", MatDenseSetLDA_MPIDense)); 1735 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArray_C", MatDenseGetArray_MPIDense)); 1736 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArray_C", MatDenseRestoreArray_MPIDense)); 1737 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayRead_C", MatDenseGetArrayRead_MPIDense)); 1738 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayRead_C", MatDenseRestoreArrayRead_MPIDense)); 1739 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayWrite_C", MatDenseGetArrayWrite_MPIDense)); 1740 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayWrite_C", MatDenseRestoreArrayWrite_MPIDense)); 1741 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDensePlaceArray_C", MatDensePlaceArray_MPIDense)); 1742 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseResetArray_C", MatDenseResetArray_MPIDense)); 1743 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseReplaceArray_C", MatDenseReplaceArray_MPIDense)); 1744 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", MatDenseGetColumnVec_MPIDense)); 1745 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", MatDenseRestoreColumnVec_MPIDense)); 1746 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", MatDenseGetColumnVecRead_MPIDense)); 1747 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", MatDenseRestoreColumnVecRead_MPIDense)); 1748 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", MatDenseGetColumnVecWrite_MPIDense)); 1749 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", MatDenseRestoreColumnVecWrite_MPIDense)); 1750 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetSubMatrix_C", MatDenseGetSubMatrix_MPIDense)); 1751 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreSubMatrix_C", MatDenseRestoreSubMatrix_MPIDense)); 1752 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", MatConvert_MPIAIJ_MPIDense)); 1753 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpiaij_C", MatConvert_MPIDense_MPIAIJ)); 1754 #if defined(PETSC_HAVE_ELEMENTAL) 1755 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_elemental_C", MatConvert_MPIDense_Elemental)); 1756 #endif 1757 #if defined(PETSC_HAVE_SCALAPACK) 1758 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_scalapack_C", MatConvert_Dense_ScaLAPACK)); 1759 #endif 1760 #if defined(PETSC_HAVE_CUDA) 1761 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensecuda_C", MatConvert_MPIDense_MPIDenseCUDA)); 1762 #endif 1763 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIDenseSetPreallocation_C", MatMPIDenseSetPreallocation_MPIDense)); 1764 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense)); 1765 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaij_C", MatProductSetFromOptions_MPIDense_MPIAIJ)); 1766 #if defined(PETSC_HAVE_CUDA) 1767 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense)); 1768 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijcusparse_C", MatProductSetFromOptions_MPIDense_MPIAIJ)); 1769 #endif 1770 #if defined(PETSC_HAVE_HIP) 1771 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensehip_C", MatConvert_MPIDense_MPIDenseHIP)); 1772 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense)); 1773 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijhipsparse_C", MatProductSetFromOptions_MPIDense_MPIAIJ)); 1774 #endif 1775 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumn_C", MatDenseGetColumn_MPIDense)); 1776 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumn_C", MatDenseRestoreColumn_MPIDense)); 1777 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultAddColumnRange_C", MatMultAddColumnRange_MPIDense)); 1778 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultHermitianTransposeColumnRange_C", MatMultHermitianTransposeColumnRange_MPIDense)); 1779 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultHermitianTransposeAddColumnRange_C", MatMultHermitianTransposeAddColumnRange_MPIDense)); 1780 PetscCall(PetscObjectChangeTypeName((PetscObject)mat, MATMPIDENSE)); 1781 PetscFunctionReturn(PETSC_SUCCESS); 1782 } 1783 1784 /*MC 1785 MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices. 1786 1787 This matrix type is identical to `MATSEQDENSE` when constructed with a single process communicator, 1788 and `MATMPIDENSE` otherwise. 1789 1790 Options Database Key: 1791 . -mat_type dense - sets the matrix type to `MATDENSE` during a call to `MatSetFromOptions()` 1792 1793 Level: beginner 1794 1795 .seealso: [](ch_matrices), `Mat`, `MATSEQDENSE`, `MATMPIDENSE`, `MATDENSECUDA`, `MATDENSEHIP` 1796 M*/ 1797 1798 /*@ 1799 MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries 1800 1801 Collective 1802 1803 Input Parameters: 1804 + B - the matrix 1805 - data - optional location of matrix data. Set to `NULL` for PETSc 1806 to control all matrix memory allocation. 1807 1808 Level: intermediate 1809 1810 Notes: 1811 The dense format is fully compatible with standard Fortran 1812 storage by columns. 1813 1814 The data input variable is intended primarily for Fortran programmers 1815 who wish to allocate their own matrix memory space. Most users should 1816 set `data` to `NULL`. 1817 1818 .seealso: [](ch_matrices), `Mat`, `MATMPIDENSE`, `MatCreate()`, `MatCreateSeqDense()`, `MatSetValues()` 1819 @*/ 1820 PetscErrorCode MatMPIDenseSetPreallocation(Mat B, PetscScalar *data) 1821 { 1822 PetscFunctionBegin; 1823 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 1824 PetscTryMethod(B, "MatMPIDenseSetPreallocation_C", (Mat, PetscScalar *), (B, data)); 1825 PetscFunctionReturn(PETSC_SUCCESS); 1826 } 1827 1828 /*@ 1829 MatDensePlaceArray - Allows one to replace the array in a `MATDENSE` matrix with an 1830 array provided by the user. This is useful to avoid copying an array 1831 into a matrix 1832 1833 Not Collective 1834 1835 Input Parameters: 1836 + mat - the matrix 1837 - array - the array in column major order 1838 1839 Level: developer 1840 1841 Note: 1842 You can return to the original array with a call to `MatDenseResetArray()`. The user is responsible for freeing this array; it will not be 1843 freed when the matrix is destroyed. 1844 1845 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseGetArray()`, `MatDenseResetArray()`, `VecPlaceArray()`, `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`, `VecResetArray()`, 1846 `MatDenseReplaceArray()` 1847 @*/ 1848 PetscErrorCode MatDensePlaceArray(Mat mat, const PetscScalar *array) 1849 { 1850 PetscFunctionBegin; 1851 PetscValidHeaderSpecific(mat, MAT_CLASSID, 1); 1852 PetscUseMethod(mat, "MatDensePlaceArray_C", (Mat, const PetscScalar *), (mat, array)); 1853 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 1854 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 1855 mat->offloadmask = PETSC_OFFLOAD_CPU; 1856 #endif 1857 PetscFunctionReturn(PETSC_SUCCESS); 1858 } 1859 1860 /*@ 1861 MatDenseResetArray - Resets the matrix array to that it previously had before the call to `MatDensePlaceArray()` 1862 1863 Not Collective 1864 1865 Input Parameter: 1866 . mat - the matrix 1867 1868 Level: developer 1869 1870 Note: 1871 You can only call this after a call to `MatDensePlaceArray()` 1872 1873 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseGetArray()`, `MatDensePlaceArray()`, `VecPlaceArray()`, `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`, `VecResetArray()` 1874 @*/ 1875 PetscErrorCode MatDenseResetArray(Mat mat) 1876 { 1877 PetscFunctionBegin; 1878 PetscValidHeaderSpecific(mat, MAT_CLASSID, 1); 1879 PetscUseMethod(mat, "MatDenseResetArray_C", (Mat), (mat)); 1880 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 1881 PetscFunctionReturn(PETSC_SUCCESS); 1882 } 1883 1884 /*@ 1885 MatDenseReplaceArray - Allows one to replace the array in a dense matrix with an 1886 array provided by the user. This is useful to avoid copying an array 1887 into a matrix 1888 1889 Not Collective 1890 1891 Input Parameters: 1892 + mat - the matrix 1893 - array - the array in column major order 1894 1895 Level: developer 1896 1897 Note: 1898 The memory passed in MUST be obtained with `PetscMalloc()` and CANNOT be 1899 freed by the user. It will be freed when the matrix is destroyed. 1900 1901 .seealso: [](ch_matrices), `Mat`, `MatDensePlaceArray()`, `MatDenseGetArray()`, `VecReplaceArray()` 1902 @*/ 1903 PetscErrorCode MatDenseReplaceArray(Mat mat, const PetscScalar *array) 1904 { 1905 PetscFunctionBegin; 1906 PetscValidHeaderSpecific(mat, MAT_CLASSID, 1); 1907 PetscUseMethod(mat, "MatDenseReplaceArray_C", (Mat, const PetscScalar *), (mat, array)); 1908 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 1909 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 1910 mat->offloadmask = PETSC_OFFLOAD_CPU; 1911 #endif 1912 PetscFunctionReturn(PETSC_SUCCESS); 1913 } 1914 1915 /*@ 1916 MatCreateDense - Creates a matrix in `MATDENSE` format. 1917 1918 Collective 1919 1920 Input Parameters: 1921 + comm - MPI communicator 1922 . m - number of local rows (or `PETSC_DECIDE` to have calculated if `M` is given) 1923 . n - number of local columns (or `PETSC_DECIDE` to have calculated if `N` is given) 1924 . M - number of global rows (or `PETSC_DECIDE` to have calculated if `m` is given) 1925 . N - number of global columns (or `PETSC_DECIDE` to have calculated if `n` is given) 1926 - data - optional location of matrix data. Set data to `NULL` (`PETSC_NULL_SCALAR` for Fortran users) for PETSc 1927 to control all matrix memory allocation. 1928 1929 Output Parameter: 1930 . A - the matrix 1931 1932 Level: intermediate 1933 1934 Notes: 1935 The dense format is fully compatible with standard Fortran 1936 storage by columns. 1937 1938 Although local portions of the matrix are stored in column-major 1939 order, the matrix is partitioned across MPI ranks by row. 1940 1941 The data input variable is intended primarily for Fortran programmers 1942 who wish to allocate their own matrix memory space. Most users should 1943 set `data` to `NULL` (`PETSC_NULL_SCALAR` for Fortran users). 1944 1945 The user MUST specify either the local or global matrix dimensions 1946 (possibly both). 1947 1948 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatCreate()`, `MatCreateSeqDense()`, `MatSetValues()` 1949 @*/ 1950 PetscErrorCode MatCreateDense(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscScalar *data, Mat *A) 1951 { 1952 PetscFunctionBegin; 1953 PetscCall(MatCreate(comm, A)); 1954 PetscCall(MatSetSizes(*A, m, n, M, N)); 1955 PetscCall(MatSetType(*A, MATDENSE)); 1956 PetscCall(MatSeqDenseSetPreallocation(*A, data)); 1957 PetscCall(MatMPIDenseSetPreallocation(*A, data)); 1958 PetscFunctionReturn(PETSC_SUCCESS); 1959 } 1960 1961 static PetscErrorCode MatDuplicate_MPIDense(Mat A, MatDuplicateOption cpvalues, Mat *newmat) 1962 { 1963 Mat mat; 1964 Mat_MPIDense *a, *oldmat = (Mat_MPIDense *)A->data; 1965 1966 PetscFunctionBegin; 1967 *newmat = NULL; 1968 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat)); 1969 PetscCall(MatSetSizes(mat, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 1970 PetscCall(MatSetType(mat, ((PetscObject)A)->type_name)); 1971 a = (Mat_MPIDense *)mat->data; 1972 1973 mat->factortype = A->factortype; 1974 mat->assembled = PETSC_TRUE; 1975 mat->preallocated = PETSC_TRUE; 1976 1977 mat->insertmode = NOT_SET_VALUES; 1978 a->donotstash = oldmat->donotstash; 1979 1980 PetscCall(PetscLayoutReference(A->rmap, &mat->rmap)); 1981 PetscCall(PetscLayoutReference(A->cmap, &mat->cmap)); 1982 1983 PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A)); 1984 1985 *newmat = mat; 1986 PetscFunctionReturn(PETSC_SUCCESS); 1987 } 1988 1989 static PetscErrorCode MatLoad_MPIDense(Mat newMat, PetscViewer viewer) 1990 { 1991 PetscBool isbinary; 1992 #if defined(PETSC_HAVE_HDF5) 1993 PetscBool ishdf5; 1994 #endif 1995 1996 PetscFunctionBegin; 1997 PetscValidHeaderSpecific(newMat, MAT_CLASSID, 1); 1998 PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2); 1999 /* force binary viewer to load .info file if it has not yet done so */ 2000 PetscCall(PetscViewerSetUp(viewer)); 2001 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 2002 #if defined(PETSC_HAVE_HDF5) 2003 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5)); 2004 #endif 2005 if (isbinary) { 2006 PetscCall(MatLoad_Dense_Binary(newMat, viewer)); 2007 #if defined(PETSC_HAVE_HDF5) 2008 } else if (ishdf5) { 2009 PetscCall(MatLoad_Dense_HDF5(newMat, viewer)); 2010 #endif 2011 } else SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name); 2012 PetscFunctionReturn(PETSC_SUCCESS); 2013 } 2014 2015 static PetscErrorCode MatEqual_MPIDense(Mat A, Mat B, PetscBool *flag) 2016 { 2017 Mat_MPIDense *matB = (Mat_MPIDense *)B->data, *matA = (Mat_MPIDense *)A->data; 2018 Mat a, b; 2019 2020 PetscFunctionBegin; 2021 a = matA->A; 2022 b = matB->A; 2023 PetscCall(MatEqual(a, b, flag)); 2024 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A))); 2025 PetscFunctionReturn(PETSC_SUCCESS); 2026 } 2027 2028 static PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(void *data) 2029 { 2030 Mat_TransMatMultDense *atb = (Mat_TransMatMultDense *)data; 2031 2032 PetscFunctionBegin; 2033 PetscCall(PetscFree2(atb->sendbuf, atb->recvcounts)); 2034 PetscCall(MatDestroy(&atb->atb)); 2035 PetscCall(PetscFree(atb)); 2036 PetscFunctionReturn(PETSC_SUCCESS); 2037 } 2038 2039 static PetscErrorCode MatDestroy_MatMatTransMult_MPIDense_MPIDense(void *data) 2040 { 2041 Mat_MatTransMultDense *abt = (Mat_MatTransMultDense *)data; 2042 2043 PetscFunctionBegin; 2044 PetscCall(PetscFree2(abt->buf[0], abt->buf[1])); 2045 PetscCall(PetscFree2(abt->recvcounts, abt->recvdispls)); 2046 PetscCall(PetscFree(abt)); 2047 PetscFunctionReturn(PETSC_SUCCESS); 2048 } 2049 2050 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C) 2051 { 2052 Mat_MPIDense *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data; 2053 Mat_TransMatMultDense *atb; 2054 MPI_Comm comm; 2055 PetscMPIInt size, *recvcounts; 2056 PetscScalar *carray, *sendbuf; 2057 const PetscScalar *atbarray; 2058 PetscInt i, cN = C->cmap->N, proc, k, j, lda; 2059 const PetscInt *ranges; 2060 2061 PetscFunctionBegin; 2062 MatCheckProduct(C, 3); 2063 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 2064 atb = (Mat_TransMatMultDense *)C->product->data; 2065 recvcounts = atb->recvcounts; 2066 sendbuf = atb->sendbuf; 2067 2068 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 2069 PetscCallMPI(MPI_Comm_size(comm, &size)); 2070 2071 /* compute atbarray = aseq^T * bseq */ 2072 PetscCall(MatTransposeMatMult(a->A, b->A, atb->atb ? MAT_REUSE_MATRIX : MAT_INITIAL_MATRIX, PETSC_DETERMINE, &atb->atb)); 2073 2074 PetscCall(MatGetOwnershipRanges(C, &ranges)); 2075 2076 /* arrange atbarray into sendbuf */ 2077 PetscCall(MatDenseGetArrayRead(atb->atb, &atbarray)); 2078 PetscCall(MatDenseGetLDA(atb->atb, &lda)); 2079 for (proc = 0, k = 0; proc < size; proc++) { 2080 for (j = 0; j < cN; j++) { 2081 for (i = ranges[proc]; i < ranges[proc + 1]; i++) sendbuf[k++] = atbarray[i + j * lda]; 2082 } 2083 } 2084 PetscCall(MatDenseRestoreArrayRead(atb->atb, &atbarray)); 2085 2086 /* sum all atbarray to local values of C */ 2087 PetscCall(MatDenseGetArrayWrite(c->A, &carray)); 2088 PetscCallMPI(MPI_Reduce_scatter(sendbuf, carray, recvcounts, MPIU_SCALAR, MPIU_SUM, comm)); 2089 PetscCall(MatDenseRestoreArrayWrite(c->A, &carray)); 2090 PetscCall(MatSetOption(C, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 2091 PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY)); 2092 PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY)); 2093 PetscFunctionReturn(PETSC_SUCCESS); 2094 } 2095 2096 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C) 2097 { 2098 MPI_Comm comm; 2099 PetscMPIInt size; 2100 PetscInt cm = A->cmap->n, cM, cN = B->cmap->N; 2101 Mat_TransMatMultDense *atb; 2102 PetscBool cisdense = PETSC_FALSE; 2103 const PetscInt *ranges; 2104 2105 PetscFunctionBegin; 2106 MatCheckProduct(C, 4); 2107 PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty"); 2108 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 2109 if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) { 2110 SETERRQ(comm, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend); 2111 } 2112 2113 /* create matrix product C */ 2114 PetscCall(MatSetSizes(C, cm, B->cmap->n, A->cmap->N, B->cmap->N)); 2115 #if defined(PETSC_HAVE_CUDA) 2116 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSECUDA, "")); 2117 #endif 2118 #if defined(PETSC_HAVE_HIP) 2119 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSEHIP, "")); 2120 #endif 2121 if (!cisdense) PetscCall(MatSetType(C, ((PetscObject)A)->type_name)); 2122 PetscCall(MatSetUp(C)); 2123 2124 /* create data structure for reuse C */ 2125 PetscCallMPI(MPI_Comm_size(comm, &size)); 2126 PetscCall(PetscNew(&atb)); 2127 cM = C->rmap->N; 2128 PetscCall(PetscMalloc2(cM * cN, &atb->sendbuf, size, &atb->recvcounts)); 2129 PetscCall(MatGetOwnershipRanges(C, &ranges)); 2130 for (PetscMPIInt i = 0; i < size; i++) PetscCall(PetscMPIIntCast((ranges[i + 1] - ranges[i]) * cN, &atb->recvcounts[i])); 2131 C->product->data = atb; 2132 C->product->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense; 2133 PetscFunctionReturn(PETSC_SUCCESS); 2134 } 2135 2136 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C) 2137 { 2138 MPI_Comm comm; 2139 PetscMPIInt i, size; 2140 PetscInt maxRows, bufsiz; 2141 PetscMPIInt tag; 2142 PetscInt alg; 2143 Mat_MatTransMultDense *abt; 2144 Mat_Product *product = C->product; 2145 PetscBool flg; 2146 2147 PetscFunctionBegin; 2148 MatCheckProduct(C, 4); 2149 PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty"); 2150 /* check local size of A and B */ 2151 PetscCheck(A->cmap->n == B->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local column dimensions are incompatible, A (%" PetscInt_FMT ") != B (%" PetscInt_FMT ")", A->cmap->n, B->cmap->n); 2152 2153 PetscCall(PetscStrcmp(product->alg, "allgatherv", &flg)); 2154 alg = flg ? 0 : 1; 2155 2156 /* setup matrix product C */ 2157 PetscCall(MatSetSizes(C, A->rmap->n, B->rmap->n, A->rmap->N, B->rmap->N)); 2158 PetscCall(MatSetType(C, MATMPIDENSE)); 2159 PetscCall(MatSetUp(C)); 2160 PetscCall(PetscObjectGetNewTag((PetscObject)C, &tag)); 2161 2162 /* create data structure for reuse C */ 2163 PetscCall(PetscObjectGetComm((PetscObject)C, &comm)); 2164 PetscCallMPI(MPI_Comm_size(comm, &size)); 2165 PetscCall(PetscNew(&abt)); 2166 abt->tag = tag; 2167 abt->alg = alg; 2168 switch (alg) { 2169 case 1: /* alg: "cyclic" */ 2170 for (maxRows = 0, i = 0; i < size; i++) maxRows = PetscMax(maxRows, (B->rmap->range[i + 1] - B->rmap->range[i])); 2171 bufsiz = A->cmap->N * maxRows; 2172 PetscCall(PetscMalloc2(bufsiz, &abt->buf[0], bufsiz, &abt->buf[1])); 2173 break; 2174 default: /* alg: "allgatherv" */ 2175 PetscCall(PetscMalloc2(B->rmap->n * B->cmap->N, &abt->buf[0], B->rmap->N * B->cmap->N, &abt->buf[1])); 2176 PetscCall(PetscMalloc2(size, &abt->recvcounts, size + 1, &abt->recvdispls)); 2177 for (i = 0; i <= size; i++) PetscCall(PetscMPIIntCast(B->rmap->range[i] * A->cmap->N, &abt->recvdispls[i])); 2178 for (i = 0; i < size; i++) PetscCall(PetscMPIIntCast(abt->recvdispls[i + 1] - abt->recvdispls[i], &abt->recvcounts[i])); 2179 break; 2180 } 2181 2182 C->product->data = abt; 2183 C->product->destroy = MatDestroy_MatMatTransMult_MPIDense_MPIDense; 2184 C->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_MPIDense_MPIDense; 2185 PetscFunctionReturn(PETSC_SUCCESS); 2186 } 2187 2188 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(Mat A, Mat B, Mat C) 2189 { 2190 Mat_MPIDense *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data; 2191 Mat_MatTransMultDense *abt; 2192 MPI_Comm comm; 2193 PetscMPIInt rank, size, sendto, recvfrom, recvisfrom; 2194 PetscScalar *sendbuf, *recvbuf = NULL, *cv; 2195 PetscInt i, cK = A->cmap->N, sendsiz, recvsiz, k, j, bn; 2196 PetscScalar _DOne = 1.0, _DZero = 0.0; 2197 const PetscScalar *av, *bv; 2198 PetscBLASInt cm, cn, ck, alda, blda = 0, clda; 2199 MPI_Request reqs[2]; 2200 const PetscInt *ranges; 2201 2202 PetscFunctionBegin; 2203 MatCheckProduct(C, 3); 2204 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 2205 abt = (Mat_MatTransMultDense *)C->product->data; 2206 PetscCall(PetscObjectGetComm((PetscObject)C, &comm)); 2207 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 2208 PetscCallMPI(MPI_Comm_size(comm, &size)); 2209 PetscCall(MatDenseGetArrayRead(a->A, &av)); 2210 PetscCall(MatDenseGetArrayRead(b->A, &bv)); 2211 PetscCall(MatDenseGetArrayWrite(c->A, &cv)); 2212 PetscCall(MatDenseGetLDA(a->A, &i)); 2213 PetscCall(PetscBLASIntCast(i, &alda)); 2214 PetscCall(MatDenseGetLDA(b->A, &i)); 2215 PetscCall(PetscBLASIntCast(i, &blda)); 2216 PetscCall(MatDenseGetLDA(c->A, &i)); 2217 PetscCall(PetscBLASIntCast(i, &clda)); 2218 PetscCall(MatGetOwnershipRanges(B, &ranges)); 2219 bn = B->rmap->n; 2220 if (blda == bn) { 2221 sendbuf = (PetscScalar *)bv; 2222 } else { 2223 sendbuf = abt->buf[0]; 2224 for (k = 0, i = 0; i < cK; i++) { 2225 for (j = 0; j < bn; j++, k++) sendbuf[k] = bv[i * blda + j]; 2226 } 2227 } 2228 if (size > 1) { 2229 sendto = (rank + size - 1) % size; 2230 recvfrom = (rank + size + 1) % size; 2231 } else { 2232 sendto = recvfrom = 0; 2233 } 2234 PetscCall(PetscBLASIntCast(cK, &ck)); 2235 PetscCall(PetscBLASIntCast(c->A->rmap->n, &cm)); 2236 recvisfrom = rank; 2237 for (i = 0; i < size; i++) { 2238 /* we have finished receiving in sending, bufs can be read/modified */ 2239 PetscMPIInt nextrecvisfrom = (recvisfrom + 1) % size; /* which process the next recvbuf will originate on */ 2240 PetscInt nextbn = ranges[nextrecvisfrom + 1] - ranges[nextrecvisfrom]; 2241 2242 if (nextrecvisfrom != rank) { 2243 /* start the cyclic sends from sendbuf, to recvbuf (which will switch to sendbuf) */ 2244 sendsiz = cK * bn; 2245 recvsiz = cK * nextbn; 2246 recvbuf = (i & 1) ? abt->buf[0] : abt->buf[1]; 2247 PetscCallMPI(MPIU_Isend(sendbuf, sendsiz, MPIU_SCALAR, sendto, abt->tag, comm, &reqs[0])); 2248 PetscCallMPI(MPIU_Irecv(recvbuf, recvsiz, MPIU_SCALAR, recvfrom, abt->tag, comm, &reqs[1])); 2249 } 2250 2251 /* local aseq * sendbuf^T */ 2252 PetscCall(PetscBLASIntCast(ranges[recvisfrom + 1] - ranges[recvisfrom], &cn)); 2253 if (cm && cn && ck) PetscCallBLAS("BLASgemm", BLASgemm_("N", "T", &cm, &cn, &ck, &_DOne, av, &alda, sendbuf, &cn, &_DZero, cv + clda * ranges[recvisfrom], &clda)); 2254 2255 if (nextrecvisfrom != rank) { 2256 /* wait for the sends and receives to complete, swap sendbuf and recvbuf */ 2257 PetscCallMPI(MPI_Waitall(2, reqs, MPI_STATUSES_IGNORE)); 2258 } 2259 bn = nextbn; 2260 recvisfrom = nextrecvisfrom; 2261 sendbuf = recvbuf; 2262 } 2263 PetscCall(MatDenseRestoreArrayRead(a->A, &av)); 2264 PetscCall(MatDenseRestoreArrayRead(b->A, &bv)); 2265 PetscCall(MatDenseRestoreArrayWrite(c->A, &cv)); 2266 PetscCall(MatSetOption(C, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 2267 PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY)); 2268 PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY)); 2269 PetscFunctionReturn(PETSC_SUCCESS); 2270 } 2271 2272 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(Mat A, Mat B, Mat C) 2273 { 2274 Mat_MPIDense *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data; 2275 Mat_MatTransMultDense *abt; 2276 MPI_Comm comm; 2277 PetscMPIInt size, ibn; 2278 PetscScalar *cv, *sendbuf, *recvbuf; 2279 const PetscScalar *av, *bv; 2280 PetscInt blda, i, cK = A->cmap->N, k, j, bn; 2281 PetscScalar _DOne = 1.0, _DZero = 0.0; 2282 PetscBLASInt cm, cn, ck, alda, clda; 2283 2284 PetscFunctionBegin; 2285 MatCheckProduct(C, 3); 2286 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 2287 abt = (Mat_MatTransMultDense *)C->product->data; 2288 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 2289 PetscCallMPI(MPI_Comm_size(comm, &size)); 2290 PetscCall(MatDenseGetArrayRead(a->A, &av)); 2291 PetscCall(MatDenseGetArrayRead(b->A, &bv)); 2292 PetscCall(MatDenseGetArrayWrite(c->A, &cv)); 2293 PetscCall(MatDenseGetLDA(a->A, &i)); 2294 PetscCall(PetscBLASIntCast(i, &alda)); 2295 PetscCall(MatDenseGetLDA(b->A, &blda)); 2296 PetscCall(MatDenseGetLDA(c->A, &i)); 2297 PetscCall(PetscBLASIntCast(i, &clda)); 2298 /* copy transpose of B into buf[0] */ 2299 bn = B->rmap->n; 2300 sendbuf = abt->buf[0]; 2301 recvbuf = abt->buf[1]; 2302 for (k = 0, j = 0; j < bn; j++) { 2303 for (i = 0; i < cK; i++, k++) sendbuf[k] = bv[i * blda + j]; 2304 } 2305 PetscCall(MatDenseRestoreArrayRead(b->A, &bv)); 2306 PetscCall(PetscMPIIntCast(bn * cK, &ibn)); 2307 PetscCallMPI(MPI_Allgatherv(sendbuf, ibn, MPIU_SCALAR, recvbuf, abt->recvcounts, abt->recvdispls, MPIU_SCALAR, comm)); 2308 PetscCall(PetscBLASIntCast(cK, &ck)); 2309 PetscCall(PetscBLASIntCast(c->A->rmap->n, &cm)); 2310 PetscCall(PetscBLASIntCast(c->A->cmap->n, &cn)); 2311 if (cm && cn && ck) PetscCallBLAS("BLASgemm", BLASgemm_("N", "N", &cm, &cn, &ck, &_DOne, av, &alda, recvbuf, &ck, &_DZero, cv, &clda)); 2312 PetscCall(MatDenseRestoreArrayRead(a->A, &av)); 2313 PetscCall(MatDenseRestoreArrayRead(b->A, &bv)); 2314 PetscCall(MatDenseRestoreArrayWrite(c->A, &cv)); 2315 PetscCall(MatSetOption(C, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 2316 PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY)); 2317 PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY)); 2318 PetscFunctionReturn(PETSC_SUCCESS); 2319 } 2320 2321 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C) 2322 { 2323 Mat_MatTransMultDense *abt; 2324 2325 PetscFunctionBegin; 2326 MatCheckProduct(C, 3); 2327 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 2328 abt = (Mat_MatTransMultDense *)C->product->data; 2329 switch (abt->alg) { 2330 case 1: 2331 PetscCall(MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(A, B, C)); 2332 break; 2333 default: 2334 PetscCall(MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(A, B, C)); 2335 break; 2336 } 2337 PetscFunctionReturn(PETSC_SUCCESS); 2338 } 2339 2340 static PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(void *data) 2341 { 2342 Mat_MatMultDense *ab = (Mat_MatMultDense *)data; 2343 2344 PetscFunctionBegin; 2345 PetscCall(MatDestroy(&ab->Ce)); 2346 PetscCall(MatDestroy(&ab->Ae)); 2347 PetscCall(MatDestroy(&ab->Be)); 2348 PetscCall(PetscFree(ab)); 2349 PetscFunctionReturn(PETSC_SUCCESS); 2350 } 2351 2352 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C) 2353 { 2354 Mat_MatMultDense *ab; 2355 Mat_MPIDense *mdn = (Mat_MPIDense *)A->data; 2356 2357 PetscFunctionBegin; 2358 MatCheckProduct(C, 3); 2359 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Missing product data"); 2360 ab = (Mat_MatMultDense *)C->product->data; 2361 if (ab->Ae && ab->Ce) { 2362 #if PetscDefined(HAVE_ELEMENTAL) 2363 PetscCall(MatConvert_MPIDense_Elemental(A, MATELEMENTAL, MAT_REUSE_MATRIX, &ab->Ae)); 2364 PetscCall(MatConvert_MPIDense_Elemental(B, MATELEMENTAL, MAT_REUSE_MATRIX, &ab->Be)); 2365 PetscCall(MatMatMultNumeric_Elemental(ab->Ae, ab->Be, ab->Ce)); 2366 PetscCall(MatConvert(ab->Ce, MATMPIDENSE, MAT_REUSE_MATRIX, &C)); 2367 #else 2368 SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "PETSC_HAVE_ELEMENTAL not defined"); 2369 #endif 2370 } else { 2371 const PetscScalar *read; 2372 PetscScalar *write; 2373 PetscInt lda; 2374 2375 PetscCall(MatDenseGetLDA(B, &lda)); 2376 PetscCall(MatDenseGetArrayRead(B, &read)); 2377 PetscCall(MatDenseGetArrayWrite(ab->Be, &write)); 2378 if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A)); /* cannot be done during the symbolic phase because of possible calls to MatProductReplaceMats() */ 2379 for (PetscInt i = 0; i < C->cmap->N; ++i) { 2380 PetscCall(PetscSFBcastBegin(mdn->Mvctx, MPIU_SCALAR, read + i * lda, write + i * ab->Be->rmap->n, MPI_REPLACE)); 2381 PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, read + i * lda, write + i * ab->Be->rmap->n, MPI_REPLACE)); 2382 } 2383 PetscCall(MatDenseRestoreArrayWrite(ab->Be, &write)); 2384 PetscCall(MatDenseRestoreArrayRead(B, &read)); 2385 PetscCall(MatMatMultNumeric_SeqDense_SeqDense(((Mat_MPIDense *)A->data)->A, ab->Be, ((Mat_MPIDense *)C->data)->A)); 2386 } 2387 PetscFunctionReturn(PETSC_SUCCESS); 2388 } 2389 2390 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C) 2391 { 2392 Mat_Product *product = C->product; 2393 PetscInt alg; 2394 Mat_MatMultDense *ab; 2395 PetscBool flg; 2396 2397 PetscFunctionBegin; 2398 MatCheckProduct(C, 4); 2399 PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty"); 2400 /* check local size of A and B */ 2401 PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ", %" PetscInt_FMT ")", 2402 A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend); 2403 2404 PetscCall(PetscStrcmp(product->alg, "petsc", &flg)); 2405 alg = flg ? 0 : 1; 2406 2407 /* setup C */ 2408 PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, A->rmap->N, B->cmap->N)); 2409 PetscCall(MatSetType(C, MATMPIDENSE)); 2410 PetscCall(MatSetUp(C)); 2411 2412 /* create data structure for reuse Cdense */ 2413 PetscCall(PetscNew(&ab)); 2414 2415 switch (alg) { 2416 case 1: /* alg: "elemental" */ 2417 #if PetscDefined(HAVE_ELEMENTAL) 2418 /* create elemental matrices Ae and Be */ 2419 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &ab->Ae)); 2420 PetscCall(MatSetSizes(ab->Ae, PETSC_DECIDE, PETSC_DECIDE, A->rmap->N, A->cmap->N)); 2421 PetscCall(MatSetType(ab->Ae, MATELEMENTAL)); 2422 PetscCall(MatSetUp(ab->Ae)); 2423 PetscCall(MatSetOption(ab->Ae, MAT_ROW_ORIENTED, PETSC_FALSE)); 2424 2425 PetscCall(MatCreate(PetscObjectComm((PetscObject)B), &ab->Be)); 2426 PetscCall(MatSetSizes(ab->Be, PETSC_DECIDE, PETSC_DECIDE, B->rmap->N, B->cmap->N)); 2427 PetscCall(MatSetType(ab->Be, MATELEMENTAL)); 2428 PetscCall(MatSetUp(ab->Be)); 2429 PetscCall(MatSetOption(ab->Be, MAT_ROW_ORIENTED, PETSC_FALSE)); 2430 2431 /* compute symbolic Ce = Ae*Be */ 2432 PetscCall(MatCreate(PetscObjectComm((PetscObject)C), &ab->Ce)); 2433 PetscCall(MatMatMultSymbolic_Elemental(ab->Ae, ab->Be, fill, ab->Ce)); 2434 #else 2435 SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "PETSC_HAVE_ELEMENTAL not defined"); 2436 #endif 2437 break; 2438 default: /* alg: "petsc" */ 2439 ab->Ae = NULL; 2440 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, A->cmap->N, B->cmap->N, NULL, &ab->Be)); 2441 ab->Ce = NULL; 2442 break; 2443 } 2444 2445 C->product->data = ab; 2446 C->product->destroy = MatDestroy_MatMatMult_MPIDense_MPIDense; 2447 C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense; 2448 PetscFunctionReturn(PETSC_SUCCESS); 2449 } 2450 2451 static PetscErrorCode MatProductSetFromOptions_MPIDense_AB(Mat C) 2452 { 2453 Mat_Product *product = C->product; 2454 const char *algTypes[2] = {"petsc", "elemental"}; 2455 PetscInt alg, nalg = PetscDefined(HAVE_ELEMENTAL) ? 2 : 1; 2456 PetscBool flg = PETSC_FALSE; 2457 2458 PetscFunctionBegin; 2459 /* Set default algorithm */ 2460 alg = 0; /* default is petsc */ 2461 PetscCall(PetscStrcmp(product->alg, "default", &flg)); 2462 if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg])); 2463 2464 /* Get runtime option */ 2465 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_AB", "Mat"); 2466 PetscCall(PetscOptionsEList("-mat_product_algorithm", "Algorithmic approach", "MatProduct_AB", algTypes, nalg, algTypes[alg], &alg, &flg)); 2467 PetscOptionsEnd(); 2468 if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg])); 2469 2470 C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIDense; 2471 C->ops->productsymbolic = MatProductSymbolic_AB; 2472 PetscFunctionReturn(PETSC_SUCCESS); 2473 } 2474 2475 static PetscErrorCode MatProductSetFromOptions_MPIDense_AtB(Mat C) 2476 { 2477 Mat_Product *product = C->product; 2478 Mat A = product->A, B = product->B; 2479 2480 PetscFunctionBegin; 2481 PetscCheck(A->rmap->rstart == B->rmap->rstart && A->rmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", 2482 A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend); 2483 C->ops->transposematmultsymbolic = MatTransposeMatMultSymbolic_MPIDense_MPIDense; 2484 C->ops->productsymbolic = MatProductSymbolic_AtB; 2485 PetscFunctionReturn(PETSC_SUCCESS); 2486 } 2487 2488 static PetscErrorCode MatProductSetFromOptions_MPIDense_ABt(Mat C) 2489 { 2490 Mat_Product *product = C->product; 2491 const char *algTypes[2] = {"allgatherv", "cyclic"}; 2492 PetscInt alg, nalg = 2; 2493 PetscBool flg = PETSC_FALSE; 2494 2495 PetscFunctionBegin; 2496 /* Set default algorithm */ 2497 alg = 0; /* default is allgatherv */ 2498 PetscCall(PetscStrcmp(product->alg, "default", &flg)); 2499 if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg])); 2500 2501 /* Get runtime option */ 2502 if (product->api_user) { 2503 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatMatTransposeMult", "Mat"); 2504 PetscCall(PetscOptionsEList("-matmattransmult_mpidense_mpidense_via", "Algorithmic approach", "MatMatTransposeMult", algTypes, nalg, algTypes[alg], &alg, &flg)); 2505 PetscOptionsEnd(); 2506 } else { 2507 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_ABt", "Mat"); 2508 PetscCall(PetscOptionsEList("-mat_product_algorithm", "Algorithmic approach", "MatProduct_ABt", algTypes, nalg, algTypes[alg], &alg, &flg)); 2509 PetscOptionsEnd(); 2510 } 2511 if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg])); 2512 2513 C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_MPIDense_MPIDense; 2514 C->ops->productsymbolic = MatProductSymbolic_ABt; 2515 PetscFunctionReturn(PETSC_SUCCESS); 2516 } 2517 2518 static PetscErrorCode MatProductSetFromOptions_MPIDense(Mat C) 2519 { 2520 Mat_Product *product = C->product; 2521 2522 PetscFunctionBegin; 2523 switch (product->type) { 2524 case MATPRODUCT_AB: 2525 PetscCall(MatProductSetFromOptions_MPIDense_AB(C)); 2526 break; 2527 case MATPRODUCT_AtB: 2528 PetscCall(MatProductSetFromOptions_MPIDense_AtB(C)); 2529 break; 2530 case MATPRODUCT_ABt: 2531 PetscCall(MatProductSetFromOptions_MPIDense_ABt(C)); 2532 break; 2533 default: 2534 break; 2535 } 2536 PetscFunctionReturn(PETSC_SUCCESS); 2537 } 2538