1 /* 2 Basic functions for basic parallel dense matrices. 3 Portions of this code are under: 4 Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. 5 */ 6 7 #include <../src/mat/impls/dense/mpi/mpidense.h> /*I "petscmat.h" I*/ 8 #include <../src/mat/impls/aij/mpi/mpiaij.h> 9 #include <petscblaslapack.h> 10 11 /*@ 12 MatDenseGetLocalMatrix - For a `MATMPIDENSE` or `MATSEQDENSE` matrix returns the sequential 13 matrix that represents the operator. For sequential matrices it returns itself. 14 15 Input Parameter: 16 . A - the sequential or MPI `MATDENSE` matrix 17 18 Output Parameter: 19 . B - the inner matrix 20 21 Level: intermediate 22 23 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MATMPIDENSE`, `MATSEQDENSE` 24 @*/ 25 PetscErrorCode MatDenseGetLocalMatrix(Mat A, Mat *B) 26 { 27 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 28 PetscBool flg; 29 30 PetscFunctionBegin; 31 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 32 PetscAssertPointer(B, 2); 33 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIDENSE, &flg)); 34 if (flg) *B = mat->A; 35 else { 36 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQDENSE, &flg)); 37 PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Not for matrix type %s", ((PetscObject)A)->type_name); 38 *B = A; 39 } 40 PetscFunctionReturn(PETSC_SUCCESS); 41 } 42 43 static PetscErrorCode MatCopy_MPIDense(Mat A, Mat B, MatStructure s) 44 { 45 Mat_MPIDense *Amat = (Mat_MPIDense *)A->data; 46 Mat_MPIDense *Bmat = (Mat_MPIDense *)B->data; 47 48 PetscFunctionBegin; 49 PetscCall(MatCopy(Amat->A, Bmat->A, s)); 50 PetscFunctionReturn(PETSC_SUCCESS); 51 } 52 53 PetscErrorCode MatShift_MPIDense(Mat A, PetscScalar alpha) 54 { 55 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 56 PetscInt j, lda, rstart = A->rmap->rstart, rend = A->rmap->rend, rend2; 57 PetscScalar *v; 58 59 PetscFunctionBegin; 60 PetscCall(MatDenseGetArray(mat->A, &v)); 61 PetscCall(MatDenseGetLDA(mat->A, &lda)); 62 rend2 = PetscMin(rend, A->cmap->N); 63 if (rend2 > rstart) { 64 for (j = rstart; j < rend2; j++) v[j - rstart + j * lda] += alpha; 65 PetscCall(PetscLogFlops(rend2 - rstart)); 66 } 67 PetscCall(MatDenseRestoreArray(mat->A, &v)); 68 PetscFunctionReturn(PETSC_SUCCESS); 69 } 70 71 static PetscErrorCode MatGetRow_MPIDense(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 72 { 73 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 74 PetscInt lrow, rstart = A->rmap->rstart, rend = A->rmap->rend; 75 76 PetscFunctionBegin; 77 PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "only local rows"); 78 lrow = row - rstart; 79 PetscCall(MatGetRow(mat->A, lrow, nz, (const PetscInt **)idx, (const PetscScalar **)v)); 80 PetscFunctionReturn(PETSC_SUCCESS); 81 } 82 83 static PetscErrorCode MatRestoreRow_MPIDense(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 84 { 85 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 86 PetscInt lrow, rstart = A->rmap->rstart, rend = A->rmap->rend; 87 88 PetscFunctionBegin; 89 PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "only local rows"); 90 lrow = row - rstart; 91 PetscCall(MatRestoreRow(mat->A, lrow, nz, (const PetscInt **)idx, (const PetscScalar **)v)); 92 PetscFunctionReturn(PETSC_SUCCESS); 93 } 94 95 static PetscErrorCode MatGetDiagonalBlock_MPIDense(Mat A, Mat *a) 96 { 97 Mat_MPIDense *mdn = (Mat_MPIDense *)A->data; 98 PetscInt m = A->rmap->n, rstart = A->rmap->rstart; 99 PetscScalar *array; 100 MPI_Comm comm; 101 PetscBool flg; 102 Mat B; 103 104 PetscFunctionBegin; 105 PetscCall(MatHasCongruentLayouts(A, &flg)); 106 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only square matrices supported."); 107 PetscCall(PetscObjectQuery((PetscObject)A, "DiagonalBlock", (PetscObject *)&B)); 108 if (!B) { /* This should use MatDenseGetSubMatrix (not create), but we would need a call like MatRestoreDiagonalBlock */ 109 #if PetscDefined(HAVE_CUDA) 110 PetscCall(PetscObjectTypeCompare((PetscObject)mdn->A, MATSEQDENSECUDA, &flg)); 111 PetscCheck(!flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature", MATSEQDENSECUDA); 112 #elif PetscDefined(HAVE_HIP) 113 PetscCall(PetscObjectTypeCompare((PetscObject)mdn->A, MATSEQDENSEHIP, &flg)); 114 PetscCheck(!flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature", MATSEQDENSEHIP); 115 #endif 116 PetscCall(PetscObjectGetComm((PetscObject)mdn->A, &comm)); 117 PetscCall(MatCreate(comm, &B)); 118 PetscCall(MatSetSizes(B, m, m, m, m)); 119 PetscCall(MatSetType(B, ((PetscObject)mdn->A)->type_name)); 120 PetscCall(MatDenseGetArrayRead(mdn->A, (const PetscScalar **)&array)); 121 PetscCall(MatSeqDenseSetPreallocation(B, array + m * rstart)); 122 PetscCall(MatDenseRestoreArrayRead(mdn->A, (const PetscScalar **)&array)); 123 PetscCall(PetscObjectCompose((PetscObject)A, "DiagonalBlock", (PetscObject)B)); 124 *a = B; 125 PetscCall(MatDestroy(&B)); 126 } else *a = B; 127 PetscFunctionReturn(PETSC_SUCCESS); 128 } 129 130 static PetscErrorCode MatSetValues_MPIDense(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], const PetscScalar v[], InsertMode addv) 131 { 132 Mat_MPIDense *A = (Mat_MPIDense *)mat->data; 133 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend, row; 134 PetscBool roworiented = A->roworiented; 135 136 PetscFunctionBegin; 137 for (i = 0; i < m; i++) { 138 if (idxm[i] < 0) continue; 139 PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large"); 140 if (idxm[i] >= rstart && idxm[i] < rend) { 141 row = idxm[i] - rstart; 142 if (roworiented) { 143 PetscCall(MatSetValues(A->A, 1, &row, n, idxn, v ? v + i * n : NULL, addv)); 144 } else { 145 for (j = 0; j < n; j++) { 146 if (idxn[j] < 0) continue; 147 PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large"); 148 PetscCall(MatSetValues(A->A, 1, &row, 1, &idxn[j], v ? v + i + j * m : NULL, addv)); 149 } 150 } 151 } else if (!A->donotstash) { 152 mat->assembled = PETSC_FALSE; 153 if (roworiented) { 154 PetscCall(MatStashValuesRow_Private(&mat->stash, idxm[i], n, idxn, v ? v + i * n : NULL, PETSC_FALSE)); 155 } else { 156 PetscCall(MatStashValuesCol_Private(&mat->stash, idxm[i], n, idxn, v ? v + i : NULL, m, PETSC_FALSE)); 157 } 158 } 159 } 160 PetscFunctionReturn(PETSC_SUCCESS); 161 } 162 163 static PetscErrorCode MatGetValues_MPIDense(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[]) 164 { 165 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 166 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend, row; 167 168 PetscFunctionBegin; 169 for (i = 0; i < m; i++) { 170 if (idxm[i] < 0) continue; /* negative row */ 171 PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large"); 172 if (idxm[i] >= rstart && idxm[i] < rend) { 173 row = idxm[i] - rstart; 174 for (j = 0; j < n; j++) { 175 if (idxn[j] < 0) continue; /* negative column */ 176 PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large"); 177 PetscCall(MatGetValues(mdn->A, 1, &row, 1, &idxn[j], v + i * n + j)); 178 } 179 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported"); 180 } 181 PetscFunctionReturn(PETSC_SUCCESS); 182 } 183 184 static PetscErrorCode MatDenseGetLDA_MPIDense(Mat A, PetscInt *lda) 185 { 186 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 187 188 PetscFunctionBegin; 189 PetscCall(MatDenseGetLDA(a->A, lda)); 190 PetscFunctionReturn(PETSC_SUCCESS); 191 } 192 193 static PetscErrorCode MatDenseSetLDA_MPIDense(Mat A, PetscInt lda) 194 { 195 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 196 MatType mtype = MATSEQDENSE; 197 198 PetscFunctionBegin; 199 if (!a->A) { 200 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 201 PetscCall(PetscLayoutSetUp(A->rmap)); 202 PetscCall(PetscLayoutSetUp(A->cmap)); 203 PetscCall(MatCreate(PETSC_COMM_SELF, &a->A)); 204 PetscCall(MatSetSizes(a->A, A->rmap->n, A->cmap->N, A->rmap->n, A->cmap->N)); 205 #if PetscDefined(HAVE_CUDA) 206 PetscBool iscuda; 207 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIDENSECUDA, &iscuda)); 208 if (iscuda) mtype = MATSEQDENSECUDA; 209 #elif PetscDefined(HAVE_HIP) 210 PetscBool iship; 211 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIDENSEHIP, &iship)); 212 if (iship) mtype = MATSEQDENSEHIP; 213 #endif 214 PetscCall(MatSetType(a->A, mtype)); 215 } 216 PetscCall(MatDenseSetLDA(a->A, lda)); 217 PetscFunctionReturn(PETSC_SUCCESS); 218 } 219 220 static PetscErrorCode MatDenseGetArray_MPIDense(Mat A, PetscScalar **array) 221 { 222 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 223 224 PetscFunctionBegin; 225 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 226 PetscCall(MatDenseGetArray(a->A, array)); 227 PetscFunctionReturn(PETSC_SUCCESS); 228 } 229 230 static PetscErrorCode MatDenseGetArrayRead_MPIDense(Mat A, PetscScalar **array) 231 { 232 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 233 234 PetscFunctionBegin; 235 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 236 PetscCall(MatDenseGetArrayRead(a->A, (const PetscScalar **)array)); 237 PetscFunctionReturn(PETSC_SUCCESS); 238 } 239 240 static PetscErrorCode MatDenseGetArrayWrite_MPIDense(Mat A, PetscScalar **array) 241 { 242 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 243 244 PetscFunctionBegin; 245 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 246 PetscCall(MatDenseGetArrayWrite(a->A, array)); 247 PetscFunctionReturn(PETSC_SUCCESS); 248 } 249 250 static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A, const PetscScalar *array) 251 { 252 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 253 254 PetscFunctionBegin; 255 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 256 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 257 PetscCall(MatDensePlaceArray(a->A, array)); 258 PetscFunctionReturn(PETSC_SUCCESS); 259 } 260 261 static PetscErrorCode MatDenseResetArray_MPIDense(Mat A) 262 { 263 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 264 265 PetscFunctionBegin; 266 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 267 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 268 PetscCall(MatDenseResetArray(a->A)); 269 PetscFunctionReturn(PETSC_SUCCESS); 270 } 271 272 static PetscErrorCode MatDenseReplaceArray_MPIDense(Mat A, const PetscScalar *array) 273 { 274 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 275 276 PetscFunctionBegin; 277 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 278 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 279 PetscCall(MatDenseReplaceArray(a->A, array)); 280 PetscFunctionReturn(PETSC_SUCCESS); 281 } 282 283 static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A, IS isrow, IS iscol, MatReuse scall, Mat *B) 284 { 285 Mat_MPIDense *mat = (Mat_MPIDense *)A->data, *newmatd; 286 PetscInt lda, i, j, rstart, rend, nrows, ncols, Ncols, nlrows, nlcols; 287 const PetscInt *irow, *icol; 288 const PetscScalar *v; 289 PetscScalar *bv; 290 Mat newmat; 291 IS iscol_local; 292 MPI_Comm comm_is, comm_mat; 293 294 PetscFunctionBegin; 295 PetscCall(PetscObjectGetComm((PetscObject)A, &comm_mat)); 296 PetscCall(PetscObjectGetComm((PetscObject)iscol, &comm_is)); 297 PetscCheck(comm_mat == comm_is, PETSC_COMM_SELF, PETSC_ERR_ARG_NOTSAMECOMM, "IS communicator must match matrix communicator"); 298 299 PetscCall(ISAllGather(iscol, &iscol_local)); 300 PetscCall(ISGetIndices(isrow, &irow)); 301 PetscCall(ISGetIndices(iscol_local, &icol)); 302 PetscCall(ISGetLocalSize(isrow, &nrows)); 303 PetscCall(ISGetLocalSize(iscol, &ncols)); 304 PetscCall(ISGetSize(iscol, &Ncols)); /* global number of columns, size of iscol_local */ 305 306 /* No parallel redistribution currently supported! Should really check each index set 307 to confirm that it is OK. ... Currently supports only submatrix same partitioning as 308 original matrix! */ 309 310 PetscCall(MatGetLocalSize(A, &nlrows, &nlcols)); 311 PetscCall(MatGetOwnershipRange(A, &rstart, &rend)); 312 313 /* Check submatrix call */ 314 if (scall == MAT_REUSE_MATRIX) { 315 /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */ 316 /* Really need to test rows and column sizes! */ 317 newmat = *B; 318 } else { 319 /* Create and fill new matrix */ 320 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &newmat)); 321 PetscCall(MatSetSizes(newmat, nrows, ncols, PETSC_DECIDE, Ncols)); 322 PetscCall(MatSetType(newmat, ((PetscObject)A)->type_name)); 323 PetscCall(MatMPIDenseSetPreallocation(newmat, NULL)); 324 } 325 326 /* Now extract the data pointers and do the copy, column at a time */ 327 newmatd = (Mat_MPIDense *)newmat->data; 328 PetscCall(MatDenseGetArray(newmatd->A, &bv)); 329 PetscCall(MatDenseGetArrayRead(mat->A, &v)); 330 PetscCall(MatDenseGetLDA(mat->A, &lda)); 331 for (i = 0; i < Ncols; i++) { 332 const PetscScalar *av = v + lda * icol[i]; 333 for (j = 0; j < nrows; j++) *bv++ = av[irow[j] - rstart]; 334 } 335 PetscCall(MatDenseRestoreArrayRead(mat->A, &v)); 336 PetscCall(MatDenseRestoreArray(newmatd->A, &bv)); 337 338 /* Assemble the matrices so that the correct flags are set */ 339 PetscCall(MatAssemblyBegin(newmat, MAT_FINAL_ASSEMBLY)); 340 PetscCall(MatAssemblyEnd(newmat, MAT_FINAL_ASSEMBLY)); 341 342 /* Free work space */ 343 PetscCall(ISRestoreIndices(isrow, &irow)); 344 PetscCall(ISRestoreIndices(iscol_local, &icol)); 345 PetscCall(ISDestroy(&iscol_local)); 346 *B = newmat; 347 PetscFunctionReturn(PETSC_SUCCESS); 348 } 349 350 static PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A, PetscScalar **array) 351 { 352 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 353 354 PetscFunctionBegin; 355 PetscCall(MatDenseRestoreArray(a->A, array)); 356 PetscFunctionReturn(PETSC_SUCCESS); 357 } 358 359 static PetscErrorCode MatDenseRestoreArrayRead_MPIDense(Mat A, PetscScalar **array) 360 { 361 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 362 363 PetscFunctionBegin; 364 PetscCall(MatDenseRestoreArrayRead(a->A, (const PetscScalar **)array)); 365 PetscFunctionReturn(PETSC_SUCCESS); 366 } 367 368 static PetscErrorCode MatDenseRestoreArrayWrite_MPIDense(Mat A, PetscScalar **array) 369 { 370 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 371 372 PetscFunctionBegin; 373 PetscCall(MatDenseRestoreArrayWrite(a->A, array)); 374 PetscFunctionReturn(PETSC_SUCCESS); 375 } 376 377 static PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat, MatAssemblyType mode) 378 { 379 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 380 PetscInt nstash, reallocs; 381 382 PetscFunctionBegin; 383 if (mdn->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS); 384 385 PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range)); 386 PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs)); 387 PetscCall(PetscInfo(mdn->A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 388 PetscFunctionReturn(PETSC_SUCCESS); 389 } 390 391 static PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat, MatAssemblyType mode) 392 { 393 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 394 PetscInt i, *row, *col, flg, j, rstart, ncols; 395 PetscMPIInt n; 396 PetscScalar *val; 397 398 PetscFunctionBegin; 399 if (!mdn->donotstash && !mat->nooffprocentries) { 400 /* wait on receives */ 401 while (1) { 402 PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg)); 403 if (!flg) break; 404 405 for (i = 0; i < n;) { 406 /* Now identify the consecutive vals belonging to the same row */ 407 for (j = i, rstart = row[j]; j < n; j++) { 408 if (row[j] != rstart) break; 409 } 410 if (j < n) ncols = j - i; 411 else ncols = n - i; 412 /* Now assemble all these values with a single function call */ 413 PetscCall(MatSetValues_MPIDense(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode)); 414 i = j; 415 } 416 } 417 PetscCall(MatStashScatterEnd_Private(&mat->stash)); 418 } 419 420 PetscCall(MatAssemblyBegin(mdn->A, mode)); 421 PetscCall(MatAssemblyEnd(mdn->A, mode)); 422 PetscFunctionReturn(PETSC_SUCCESS); 423 } 424 425 static PetscErrorCode MatZeroEntries_MPIDense(Mat A) 426 { 427 Mat_MPIDense *l = (Mat_MPIDense *)A->data; 428 429 PetscFunctionBegin; 430 PetscCall(MatZeroEntries(l->A)); 431 PetscFunctionReturn(PETSC_SUCCESS); 432 } 433 434 static PetscErrorCode MatZeroRows_MPIDense(Mat A, PetscInt n, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 435 { 436 Mat_MPIDense *l = (Mat_MPIDense *)A->data; 437 PetscInt i, len, *lrows; 438 439 PetscFunctionBegin; 440 /* get locally owned rows */ 441 PetscCall(PetscLayoutMapLocal(A->rmap, n, rows, &len, &lrows, NULL)); 442 /* fix right-hand side if needed */ 443 if (x && b) { 444 const PetscScalar *xx; 445 PetscScalar *bb; 446 447 PetscCall(VecGetArrayRead(x, &xx)); 448 PetscCall(VecGetArrayWrite(b, &bb)); 449 for (i = 0; i < len; ++i) bb[lrows[i]] = diag * xx[lrows[i]]; 450 PetscCall(VecRestoreArrayRead(x, &xx)); 451 PetscCall(VecRestoreArrayWrite(b, &bb)); 452 } 453 PetscCall(MatZeroRows(l->A, len, lrows, 0.0, NULL, NULL)); 454 if (diag != 0.0) { 455 Vec d; 456 457 PetscCall(MatCreateVecs(A, NULL, &d)); 458 PetscCall(VecSet(d, diag)); 459 PetscCall(MatDiagonalSet(A, d, INSERT_VALUES)); 460 PetscCall(VecDestroy(&d)); 461 } 462 PetscCall(PetscFree(lrows)); 463 PetscFunctionReturn(PETSC_SUCCESS); 464 } 465 466 PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat, Vec, Vec); 467 PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat, Vec, Vec, Vec); 468 PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat, Vec, Vec); 469 PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat, Vec, Vec, Vec); 470 471 static PetscErrorCode MatMult_MPIDense(Mat mat, Vec xx, Vec yy) 472 { 473 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 474 const PetscScalar *ax; 475 PetscScalar *ay; 476 PetscMemType axmtype, aymtype; 477 478 PetscFunctionBegin; 479 if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat)); 480 PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype)); 481 PetscCall(VecGetArrayAndMemType(mdn->lvec, &ay, &aymtype)); 482 PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE)); 483 PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE)); 484 PetscCall(VecRestoreArrayAndMemType(mdn->lvec, &ay)); 485 PetscCall(VecRestoreArrayReadAndMemType(xx, &ax)); 486 PetscCall((*mdn->A->ops->mult)(mdn->A, mdn->lvec, yy)); 487 PetscFunctionReturn(PETSC_SUCCESS); 488 } 489 490 static PetscErrorCode MatMultAddColumnRange_MPIDense(Mat mat, Vec xx, Vec yy, Vec zz, PetscInt c_start, PetscInt c_end) 491 { 492 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 493 const PetscScalar *ax; 494 PetscScalar *ay; 495 PetscMemType axmtype, aymtype; 496 497 PetscFunctionBegin; 498 if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat)); 499 PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype)); 500 PetscCall(VecGetArrayAndMemType(mdn->lvec, &ay, &aymtype)); 501 PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE)); 502 PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE)); 503 PetscCall(VecRestoreArrayAndMemType(mdn->lvec, &ay)); 504 PetscCall(VecRestoreArrayReadAndMemType(xx, &ax)); 505 PetscUseMethod(mdn->A, "MatMultAddColumnRange_C", (Mat, Vec, Vec, Vec, PetscInt, PetscInt), (mdn->A, mdn->lvec, yy, zz, c_start, c_end)); 506 PetscFunctionReturn(PETSC_SUCCESS); 507 } 508 509 static PetscErrorCode MatMultAdd_MPIDense(Mat mat, Vec xx, Vec yy, Vec zz) 510 { 511 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 512 const PetscScalar *ax; 513 PetscScalar *ay; 514 PetscMemType axmtype, aymtype; 515 516 PetscFunctionBegin; 517 if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat)); 518 PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype)); 519 PetscCall(VecGetArrayAndMemType(mdn->lvec, &ay, &aymtype)); 520 PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE)); 521 PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE)); 522 PetscCall(VecRestoreArrayAndMemType(mdn->lvec, &ay)); 523 PetscCall(VecRestoreArrayReadAndMemType(xx, &ax)); 524 PetscCall((*mdn->A->ops->multadd)(mdn->A, mdn->lvec, yy, zz)); 525 PetscFunctionReturn(PETSC_SUCCESS); 526 } 527 528 static PetscErrorCode MatMultHermitianTransposeColumnRange_MPIDense(Mat A, Vec xx, Vec yy, PetscInt c_start, PetscInt c_end) 529 { 530 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 531 const PetscScalar *ax; 532 PetscScalar *ay; 533 PetscMemType axmtype, aymtype; 534 535 PetscFunctionBegin; 536 if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A)); 537 PetscCall(VecSet(yy, 0.0)); 538 PetscUseMethod(a->A, "MatMultHermitianTransposeColumnRange_C", (Mat, Vec, Vec, PetscInt, PetscInt), (a->A, xx, a->lvec, c_start, c_end)); 539 PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype)); 540 PetscCall(VecGetArrayAndMemType(yy, &ay, &aymtype)); 541 PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM)); 542 PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM)); 543 PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax)); 544 PetscCall(VecRestoreArrayAndMemType(yy, &ay)); 545 PetscFunctionReturn(PETSC_SUCCESS); 546 } 547 548 static PetscErrorCode MatMultTransposeKernel_MPIDense(Mat A, Vec xx, Vec yy, PetscBool herm) 549 { 550 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 551 const PetscScalar *ax; 552 PetscScalar *ay; 553 PetscMemType axmtype, aymtype; 554 555 PetscFunctionBegin; 556 if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A)); 557 PetscCall(VecSet(yy, 0.0)); 558 if (herm) PetscCall((*a->A->ops->multhermitiantranspose)(a->A, xx, a->lvec)); 559 else PetscCall((*a->A->ops->multtranspose)(a->A, xx, a->lvec)); 560 PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype)); 561 PetscCall(VecGetArrayAndMemType(yy, &ay, &aymtype)); 562 PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM)); 563 PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM)); 564 PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax)); 565 PetscCall(VecRestoreArrayAndMemType(yy, &ay)); 566 PetscFunctionReturn(PETSC_SUCCESS); 567 } 568 569 static PetscErrorCode MatMultHermitianTransposeAddColumnRange_MPIDense(Mat A, Vec xx, Vec yy, Vec zz, PetscInt c_start, PetscInt c_end) 570 { 571 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 572 const PetscScalar *ax; 573 PetscScalar *ay; 574 PetscMemType axmtype, aymtype; 575 576 PetscFunctionBegin; 577 if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A)); 578 PetscCall(VecCopy(yy, zz)); 579 PetscMPIInt rank; 580 PetscCallMPI(MPI_Comm_rank(MPI_COMM_WORLD, &rank)); 581 PetscUseMethod(a->A, "MatMultHermitianTransposeColumnRange_C", (Mat, Vec, Vec, PetscInt, PetscInt), (a->A, xx, a->lvec, c_start, c_end)); 582 PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype)); 583 PetscCall(VecGetArrayAndMemType(zz, &ay, &aymtype)); 584 PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM)); 585 PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM)); 586 PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax)); 587 PetscCall(VecRestoreArrayAndMemType(zz, &ay)); 588 PetscFunctionReturn(PETSC_SUCCESS); 589 } 590 591 static PetscErrorCode MatMultTransposeAddKernel_MPIDense(Mat A, Vec xx, Vec yy, Vec zz, PetscBool herm) 592 { 593 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 594 const PetscScalar *ax; 595 PetscScalar *ay; 596 PetscMemType axmtype, aymtype; 597 598 PetscFunctionBegin; 599 if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A)); 600 PetscCall(VecCopy(yy, zz)); 601 if (herm) PetscCall((*a->A->ops->multhermitiantranspose)(a->A, xx, a->lvec)); 602 else PetscCall((*a->A->ops->multtranspose)(a->A, xx, a->lvec)); 603 PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype)); 604 PetscCall(VecGetArrayAndMemType(zz, &ay, &aymtype)); 605 PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM)); 606 PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM)); 607 PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax)); 608 PetscCall(VecRestoreArrayAndMemType(zz, &ay)); 609 PetscFunctionReturn(PETSC_SUCCESS); 610 } 611 612 static PetscErrorCode MatMultTranspose_MPIDense(Mat A, Vec xx, Vec yy) 613 { 614 PetscFunctionBegin; 615 PetscCall(MatMultTransposeKernel_MPIDense(A, xx, yy, PETSC_FALSE)); 616 PetscFunctionReturn(PETSC_SUCCESS); 617 } 618 619 static PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A, Vec xx, Vec yy, Vec zz) 620 { 621 PetscFunctionBegin; 622 PetscCall(MatMultTransposeAddKernel_MPIDense(A, xx, yy, zz, PETSC_FALSE)); 623 PetscFunctionReturn(PETSC_SUCCESS); 624 } 625 626 static PetscErrorCode MatMultHermitianTranspose_MPIDense(Mat A, Vec xx, Vec yy) 627 { 628 PetscFunctionBegin; 629 PetscCall(MatMultTransposeKernel_MPIDense(A, xx, yy, PETSC_TRUE)); 630 PetscFunctionReturn(PETSC_SUCCESS); 631 } 632 633 static PetscErrorCode MatMultHermitianTransposeAdd_MPIDense(Mat A, Vec xx, Vec yy, Vec zz) 634 { 635 PetscFunctionBegin; 636 PetscCall(MatMultTransposeAddKernel_MPIDense(A, xx, yy, zz, PETSC_TRUE)); 637 PetscFunctionReturn(PETSC_SUCCESS); 638 } 639 640 PetscErrorCode MatGetDiagonal_MPIDense(Mat A, Vec v) 641 { 642 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 643 PetscInt lda, len, i, nl, ng, m = A->rmap->n, radd; 644 PetscScalar *x; 645 const PetscScalar *av; 646 647 PetscFunctionBegin; 648 PetscCall(VecGetArray(v, &x)); 649 PetscCall(VecGetSize(v, &ng)); 650 PetscCheck(ng == A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming mat and vec"); 651 PetscCall(VecGetLocalSize(v, &nl)); 652 len = PetscMin(a->A->rmap->n, a->A->cmap->n); 653 radd = A->rmap->rstart * m; 654 PetscCall(MatDenseGetArrayRead(a->A, &av)); 655 PetscCall(MatDenseGetLDA(a->A, &lda)); 656 for (i = 0; i < len; i++) x[i] = av[radd + i * lda + i]; 657 PetscCall(MatDenseRestoreArrayRead(a->A, &av)); 658 if (nl - i > 0) PetscCall(PetscArrayzero(x + i, nl - i)); 659 PetscCall(VecRestoreArray(v, &x)); 660 PetscFunctionReturn(PETSC_SUCCESS); 661 } 662 663 static PetscErrorCode MatDestroy_MPIDense(Mat mat) 664 { 665 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 666 667 PetscFunctionBegin; 668 PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N)); 669 PetscCall(MatStashDestroy_Private(&mat->stash)); 670 PetscCheck(!mdn->vecinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 671 PetscCheck(!mdn->matinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 672 PetscCall(MatDestroy(&mdn->A)); 673 PetscCall(VecDestroy(&mdn->lvec)); 674 PetscCall(PetscSFDestroy(&mdn->Mvctx)); 675 PetscCall(VecDestroy(&mdn->cvec)); 676 PetscCall(MatDestroy(&mdn->cmat)); 677 678 PetscCall(PetscFree(mat->data)); 679 PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL)); 680 681 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetLDA_C", NULL)); 682 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseSetLDA_C", NULL)); 683 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArray_C", NULL)); 684 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArray_C", NULL)); 685 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayRead_C", NULL)); 686 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayRead_C", NULL)); 687 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayWrite_C", NULL)); 688 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayWrite_C", NULL)); 689 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDensePlaceArray_C", NULL)); 690 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseResetArray_C", NULL)); 691 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseReplaceArray_C", NULL)); 692 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", NULL)); 693 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpiaij_C", NULL)); 694 #if defined(PETSC_HAVE_ELEMENTAL) 695 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_elemental_C", NULL)); 696 #endif 697 #if defined(PETSC_HAVE_SCALAPACK) 698 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_scalapack_C", NULL)); 699 #endif 700 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIDenseSetPreallocation_C", NULL)); 701 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidense_C", NULL)); 702 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaij_C", NULL)); 703 #if defined(PETSC_HAVE_CUDA) 704 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidense_C", NULL)); 705 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijcusparse_C", NULL)); 706 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensecuda_C", NULL)); 707 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidensecuda_mpidense_C", NULL)); 708 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidensecuda_C", NULL)); 709 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C", NULL)); 710 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensecuda_mpiaij_C", NULL)); 711 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C", NULL)); 712 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArray_C", NULL)); 713 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArrayRead_C", NULL)); 714 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArrayWrite_C", NULL)); 715 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArray_C", NULL)); 716 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArrayRead_C", NULL)); 717 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArrayWrite_C", NULL)); 718 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAPlaceArray_C", NULL)); 719 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAResetArray_C", NULL)); 720 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAReplaceArray_C", NULL)); 721 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDASetPreallocation_C", NULL)); 722 #endif 723 #if defined(PETSC_HAVE_HIP) 724 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidense_C", NULL)); 725 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijhipsparse_C", NULL)); 726 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensehip_C", NULL)); 727 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidensehip_mpidense_C", NULL)); 728 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidensehip_C", NULL)); 729 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidensehip_C", NULL)); 730 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensehip_mpiaij_C", NULL)); 731 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensehip_mpiaijhipsparse_C", NULL)); 732 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArray_C", NULL)); 733 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArrayRead_C", NULL)); 734 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArrayWrite_C", NULL)); 735 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArray_C", NULL)); 736 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArrayRead_C", NULL)); 737 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArrayWrite_C", NULL)); 738 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPPlaceArray_C", NULL)); 739 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPResetArray_C", NULL)); 740 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPReplaceArray_C", NULL)); 741 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPSetPreallocation_C", NULL)); 742 #endif 743 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumn_C", NULL)); 744 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumn_C", NULL)); 745 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", NULL)); 746 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", NULL)); 747 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", NULL)); 748 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", NULL)); 749 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", NULL)); 750 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", NULL)); 751 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetSubMatrix_C", NULL)); 752 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreSubMatrix_C", NULL)); 753 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultAddColumnRange_C", NULL)); 754 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultHermitianTransposeColumnRange_C", NULL)); 755 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultHermitianTransposeAddColumnRange_C", NULL)); 756 757 PetscCall(PetscObjectCompose((PetscObject)mat, "DiagonalBlock", NULL)); 758 PetscFunctionReturn(PETSC_SUCCESS); 759 } 760 761 #include <petscdraw.h> 762 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat, PetscViewer viewer) 763 { 764 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 765 PetscMPIInt rank; 766 PetscViewerType vtype; 767 PetscBool iascii, isdraw; 768 PetscViewer sviewer; 769 PetscViewerFormat format; 770 771 PetscFunctionBegin; 772 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 773 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 774 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 775 if (iascii) { 776 PetscCall(PetscViewerGetType(viewer, &vtype)); 777 PetscCall(PetscViewerGetFormat(viewer, &format)); 778 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 779 MatInfo info; 780 PetscCall(MatGetInfo(mat, MAT_LOCAL, &info)); 781 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 782 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %" PetscInt_FMT " \n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, 783 (PetscInt)info.memory)); 784 PetscCall(PetscViewerFlush(viewer)); 785 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 786 if (mdn->Mvctx) PetscCall(PetscSFView(mdn->Mvctx, viewer)); 787 PetscFunctionReturn(PETSC_SUCCESS); 788 } else if (format == PETSC_VIEWER_ASCII_INFO) { 789 PetscFunctionReturn(PETSC_SUCCESS); 790 } 791 } else if (isdraw) { 792 PetscDraw draw; 793 PetscBool isnull; 794 795 PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw)); 796 PetscCall(PetscDrawIsNull(draw, &isnull)); 797 if (isnull) PetscFunctionReturn(PETSC_SUCCESS); 798 } 799 800 { 801 /* assemble the entire matrix onto first processor. */ 802 Mat A; 803 PetscInt M = mat->rmap->N, N = mat->cmap->N, m, row, i, nz; 804 PetscInt *cols; 805 PetscScalar *vals; 806 807 PetscCall(MatCreate(PetscObjectComm((PetscObject)mat), &A)); 808 if (rank == 0) { 809 PetscCall(MatSetSizes(A, M, N, M, N)); 810 } else { 811 PetscCall(MatSetSizes(A, 0, 0, M, N)); 812 } 813 /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */ 814 PetscCall(MatSetType(A, MATMPIDENSE)); 815 PetscCall(MatMPIDenseSetPreallocation(A, NULL)); 816 817 /* Copy the matrix ... This isn't the most efficient means, 818 but it's quick for now */ 819 A->insertmode = INSERT_VALUES; 820 821 row = mat->rmap->rstart; 822 m = mdn->A->rmap->n; 823 for (i = 0; i < m; i++) { 824 PetscCall(MatGetRow_MPIDense(mat, row, &nz, &cols, &vals)); 825 PetscCall(MatSetValues_MPIDense(A, 1, &row, nz, cols, vals, INSERT_VALUES)); 826 PetscCall(MatRestoreRow_MPIDense(mat, row, &nz, &cols, &vals)); 827 row++; 828 } 829 830 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 831 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 832 PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 833 if (rank == 0) { 834 PetscCall(PetscObjectSetName((PetscObject)((Mat_MPIDense *)A->data)->A, ((PetscObject)mat)->name)); 835 PetscCall(MatView_SeqDense(((Mat_MPIDense *)A->data)->A, sviewer)); 836 } 837 PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 838 PetscCall(MatDestroy(&A)); 839 } 840 PetscFunctionReturn(PETSC_SUCCESS); 841 } 842 843 static PetscErrorCode MatView_MPIDense(Mat mat, PetscViewer viewer) 844 { 845 PetscBool iascii, isbinary, isdraw, issocket; 846 847 PetscFunctionBegin; 848 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 849 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 850 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket)); 851 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 852 853 if (iascii || issocket || isdraw) { 854 PetscCall(MatView_MPIDense_ASCIIorDraworSocket(mat, viewer)); 855 } else if (isbinary) PetscCall(MatView_Dense_Binary(mat, viewer)); 856 PetscFunctionReturn(PETSC_SUCCESS); 857 } 858 859 static PetscErrorCode MatGetInfo_MPIDense(Mat A, MatInfoType flag, MatInfo *info) 860 { 861 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 862 Mat mdn = mat->A; 863 PetscLogDouble isend[5], irecv[5]; 864 865 PetscFunctionBegin; 866 info->block_size = 1.0; 867 868 PetscCall(MatGetInfo(mdn, MAT_LOCAL, info)); 869 870 isend[0] = info->nz_used; 871 isend[1] = info->nz_allocated; 872 isend[2] = info->nz_unneeded; 873 isend[3] = info->memory; 874 isend[4] = info->mallocs; 875 if (flag == MAT_LOCAL) { 876 info->nz_used = isend[0]; 877 info->nz_allocated = isend[1]; 878 info->nz_unneeded = isend[2]; 879 info->memory = isend[3]; 880 info->mallocs = isend[4]; 881 } else if (flag == MAT_GLOBAL_MAX) { 882 PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)A))); 883 884 info->nz_used = irecv[0]; 885 info->nz_allocated = irecv[1]; 886 info->nz_unneeded = irecv[2]; 887 info->memory = irecv[3]; 888 info->mallocs = irecv[4]; 889 } else if (flag == MAT_GLOBAL_SUM) { 890 PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)A))); 891 892 info->nz_used = irecv[0]; 893 info->nz_allocated = irecv[1]; 894 info->nz_unneeded = irecv[2]; 895 info->memory = irecv[3]; 896 info->mallocs = irecv[4]; 897 } 898 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 899 info->fill_ratio_needed = 0; 900 info->factor_mallocs = 0; 901 PetscFunctionReturn(PETSC_SUCCESS); 902 } 903 904 static PetscErrorCode MatSetOption_MPIDense(Mat A, MatOption op, PetscBool flg) 905 { 906 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 907 908 PetscFunctionBegin; 909 switch (op) { 910 case MAT_NEW_NONZERO_LOCATIONS: 911 case MAT_NEW_NONZERO_LOCATION_ERR: 912 case MAT_NEW_NONZERO_ALLOCATION_ERR: 913 MatCheckPreallocated(A, 1); 914 PetscCall(MatSetOption(a->A, op, flg)); 915 break; 916 case MAT_ROW_ORIENTED: 917 MatCheckPreallocated(A, 1); 918 a->roworiented = flg; 919 PetscCall(MatSetOption(a->A, op, flg)); 920 break; 921 case MAT_FORCE_DIAGONAL_ENTRIES: 922 case MAT_KEEP_NONZERO_PATTERN: 923 case MAT_USE_HASH_TABLE: 924 case MAT_SORTED_FULL: 925 PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op])); 926 break; 927 case MAT_IGNORE_OFF_PROC_ENTRIES: 928 a->donotstash = flg; 929 break; 930 case MAT_SYMMETRIC: 931 case MAT_STRUCTURALLY_SYMMETRIC: 932 case MAT_HERMITIAN: 933 case MAT_SYMMETRY_ETERNAL: 934 case MAT_STRUCTURAL_SYMMETRY_ETERNAL: 935 case MAT_SPD: 936 case MAT_IGNORE_LOWER_TRIANGULAR: 937 case MAT_IGNORE_ZERO_ENTRIES: 938 case MAT_SPD_ETERNAL: 939 /* if the diagonal matrix is square it inherits some of the properties above */ 940 PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op])); 941 break; 942 default: 943 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %s", MatOptions[op]); 944 } 945 PetscFunctionReturn(PETSC_SUCCESS); 946 } 947 948 static PetscErrorCode MatDiagonalScale_MPIDense(Mat A, Vec ll, Vec rr) 949 { 950 Mat_MPIDense *mdn = (Mat_MPIDense *)A->data; 951 const PetscScalar *l; 952 PetscScalar x, *v, *vv, *r; 953 PetscInt i, j, s2a, s3a, s2, s3, m = mdn->A->rmap->n, n = mdn->A->cmap->n, lda; 954 955 PetscFunctionBegin; 956 PetscCall(MatDenseGetArray(mdn->A, &vv)); 957 PetscCall(MatDenseGetLDA(mdn->A, &lda)); 958 PetscCall(MatGetLocalSize(A, &s2, &s3)); 959 if (ll) { 960 PetscCall(VecGetLocalSize(ll, &s2a)); 961 PetscCheck(s2a == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Left scaling vector non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT, s2a, s2); 962 PetscCall(VecGetArrayRead(ll, &l)); 963 for (i = 0; i < m; i++) { 964 x = l[i]; 965 v = vv + i; 966 for (j = 0; j < n; j++) { 967 (*v) *= x; 968 v += lda; 969 } 970 } 971 PetscCall(VecRestoreArrayRead(ll, &l)); 972 PetscCall(PetscLogFlops(1.0 * n * m)); 973 } 974 if (rr) { 975 const PetscScalar *ar; 976 977 PetscCall(VecGetLocalSize(rr, &s3a)); 978 PetscCheck(s3a == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Right scaling vec non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT ".", s3a, s3); 979 PetscCall(VecGetArrayRead(rr, &ar)); 980 if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A)); 981 PetscCall(VecGetArray(mdn->lvec, &r)); 982 PetscCall(PetscSFBcastBegin(mdn->Mvctx, MPIU_SCALAR, ar, r, MPI_REPLACE)); 983 PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ar, r, MPI_REPLACE)); 984 PetscCall(VecRestoreArrayRead(rr, &ar)); 985 for (i = 0; i < n; i++) { 986 x = r[i]; 987 v = vv + i * lda; 988 for (j = 0; j < m; j++) (*v++) *= x; 989 } 990 PetscCall(VecRestoreArray(mdn->lvec, &r)); 991 PetscCall(PetscLogFlops(1.0 * n * m)); 992 } 993 PetscCall(MatDenseRestoreArray(mdn->A, &vv)); 994 PetscFunctionReturn(PETSC_SUCCESS); 995 } 996 997 static PetscErrorCode MatNorm_MPIDense(Mat A, NormType type, PetscReal *nrm) 998 { 999 Mat_MPIDense *mdn = (Mat_MPIDense *)A->data; 1000 PetscInt i, j; 1001 PetscMPIInt size; 1002 PetscReal sum = 0.0; 1003 const PetscScalar *av, *v; 1004 1005 PetscFunctionBegin; 1006 PetscCall(MatDenseGetArrayRead(mdn->A, &av)); 1007 v = av; 1008 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 1009 if (size == 1) { 1010 PetscCall(MatNorm(mdn->A, type, nrm)); 1011 } else { 1012 if (type == NORM_FROBENIUS) { 1013 for (i = 0; i < mdn->A->cmap->n * mdn->A->rmap->n; i++) { 1014 sum += PetscRealPart(PetscConj(*v) * (*v)); 1015 v++; 1016 } 1017 PetscCall(MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A))); 1018 *nrm = PetscSqrtReal(*nrm); 1019 PetscCall(PetscLogFlops(2.0 * mdn->A->cmap->n * mdn->A->rmap->n)); 1020 } else if (type == NORM_1) { 1021 PetscReal *tmp, *tmp2; 1022 PetscCall(PetscCalloc2(A->cmap->N, &tmp, A->cmap->N, &tmp2)); 1023 *nrm = 0.0; 1024 v = av; 1025 for (j = 0; j < mdn->A->cmap->n; j++) { 1026 for (i = 0; i < mdn->A->rmap->n; i++) { 1027 tmp[j] += PetscAbsScalar(*v); 1028 v++; 1029 } 1030 } 1031 PetscCall(MPIU_Allreduce(tmp, tmp2, A->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A))); 1032 for (j = 0; j < A->cmap->N; j++) { 1033 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 1034 } 1035 PetscCall(PetscFree2(tmp, tmp2)); 1036 PetscCall(PetscLogFlops(A->cmap->n * A->rmap->n)); 1037 } else if (type == NORM_INFINITY) { /* max row norm */ 1038 PetscReal ntemp; 1039 PetscCall(MatNorm(mdn->A, type, &ntemp)); 1040 PetscCall(MPIU_Allreduce(&ntemp, nrm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A))); 1041 } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "No support for two norm"); 1042 } 1043 PetscCall(MatDenseRestoreArrayRead(mdn->A, &av)); 1044 PetscFunctionReturn(PETSC_SUCCESS); 1045 } 1046 1047 static PetscErrorCode MatTranspose_MPIDense(Mat A, MatReuse reuse, Mat *matout) 1048 { 1049 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1050 Mat B; 1051 PetscInt M = A->rmap->N, N = A->cmap->N, m, n, *rwork, rstart = A->rmap->rstart; 1052 PetscInt j, i, lda; 1053 PetscScalar *v; 1054 1055 PetscFunctionBegin; 1056 if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout)); 1057 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) { 1058 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 1059 PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M)); 1060 PetscCall(MatSetType(B, ((PetscObject)A)->type_name)); 1061 PetscCall(MatMPIDenseSetPreallocation(B, NULL)); 1062 } else B = *matout; 1063 1064 m = a->A->rmap->n; 1065 n = a->A->cmap->n; 1066 PetscCall(MatDenseGetArrayRead(a->A, (const PetscScalar **)&v)); 1067 PetscCall(MatDenseGetLDA(a->A, &lda)); 1068 PetscCall(PetscMalloc1(m, &rwork)); 1069 for (i = 0; i < m; i++) rwork[i] = rstart + i; 1070 for (j = 0; j < n; j++) { 1071 PetscCall(MatSetValues(B, 1, &j, m, rwork, v, INSERT_VALUES)); 1072 v = PetscSafePointerPlusOffset(v, lda); 1073 } 1074 PetscCall(MatDenseRestoreArrayRead(a->A, (const PetscScalar **)&v)); 1075 PetscCall(PetscFree(rwork)); 1076 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 1077 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 1078 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 1079 *matout = B; 1080 } else { 1081 PetscCall(MatHeaderMerge(A, &B)); 1082 } 1083 PetscFunctionReturn(PETSC_SUCCESS); 1084 } 1085 1086 static PetscErrorCode MatDuplicate_MPIDense(Mat, MatDuplicateOption, Mat *); 1087 PETSC_INTERN PetscErrorCode MatScale_MPIDense(Mat, PetscScalar); 1088 1089 static PetscErrorCode MatSetUp_MPIDense(Mat A) 1090 { 1091 PetscFunctionBegin; 1092 PetscCall(PetscLayoutSetUp(A->rmap)); 1093 PetscCall(PetscLayoutSetUp(A->cmap)); 1094 if (!A->preallocated) PetscCall(MatMPIDenseSetPreallocation(A, NULL)); 1095 PetscFunctionReturn(PETSC_SUCCESS); 1096 } 1097 1098 static PetscErrorCode MatAXPY_MPIDense(Mat Y, PetscScalar alpha, Mat X, MatStructure str) 1099 { 1100 Mat_MPIDense *A = (Mat_MPIDense *)Y->data, *B = (Mat_MPIDense *)X->data; 1101 1102 PetscFunctionBegin; 1103 PetscCall(MatAXPY(A->A, alpha, B->A, str)); 1104 PetscFunctionReturn(PETSC_SUCCESS); 1105 } 1106 1107 static PetscErrorCode MatConjugate_MPIDense(Mat mat) 1108 { 1109 Mat_MPIDense *a = (Mat_MPIDense *)mat->data; 1110 1111 PetscFunctionBegin; 1112 PetscCall(MatConjugate(a->A)); 1113 PetscFunctionReturn(PETSC_SUCCESS); 1114 } 1115 1116 static PetscErrorCode MatRealPart_MPIDense(Mat A) 1117 { 1118 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1119 1120 PetscFunctionBegin; 1121 PetscCall(MatRealPart(a->A)); 1122 PetscFunctionReturn(PETSC_SUCCESS); 1123 } 1124 1125 static PetscErrorCode MatImaginaryPart_MPIDense(Mat A) 1126 { 1127 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1128 1129 PetscFunctionBegin; 1130 PetscCall(MatImaginaryPart(a->A)); 1131 PetscFunctionReturn(PETSC_SUCCESS); 1132 } 1133 1134 static PetscErrorCode MatGetColumnVector_MPIDense(Mat A, Vec v, PetscInt col) 1135 { 1136 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1137 1138 PetscFunctionBegin; 1139 PetscCheck(a->A, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Missing local matrix"); 1140 PetscCheck(a->A->ops->getcolumnvector, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Missing get column operation"); 1141 PetscCall((*a->A->ops->getcolumnvector)(a->A, v, col)); 1142 PetscFunctionReturn(PETSC_SUCCESS); 1143 } 1144 1145 PETSC_INTERN PetscErrorCode MatGetColumnReductions_SeqDense(Mat, PetscInt, PetscReal *); 1146 1147 static PetscErrorCode MatGetColumnReductions_MPIDense(Mat A, PetscInt type, PetscReal *reductions) 1148 { 1149 PetscInt i, m, n; 1150 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1151 PetscReal *work; 1152 1153 PetscFunctionBegin; 1154 PetscCall(MatGetSize(A, &m, &n)); 1155 PetscCall(PetscMalloc1(n, &work)); 1156 if (type == REDUCTION_MEAN_REALPART) { 1157 PetscCall(MatGetColumnReductions_SeqDense(a->A, (PetscInt)REDUCTION_SUM_REALPART, work)); 1158 } else if (type == REDUCTION_MEAN_IMAGINARYPART) { 1159 PetscCall(MatGetColumnReductions_SeqDense(a->A, (PetscInt)REDUCTION_SUM_IMAGINARYPART, work)); 1160 } else { 1161 PetscCall(MatGetColumnReductions_SeqDense(a->A, type, work)); 1162 } 1163 if (type == NORM_2) { 1164 for (i = 0; i < n; i++) work[i] *= work[i]; 1165 } 1166 if (type == NORM_INFINITY) { 1167 PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_MAX, A->hdr.comm)); 1168 } else { 1169 PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_SUM, A->hdr.comm)); 1170 } 1171 PetscCall(PetscFree(work)); 1172 if (type == NORM_2) { 1173 for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]); 1174 } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 1175 for (i = 0; i < n; i++) reductions[i] /= m; 1176 } 1177 PetscFunctionReturn(PETSC_SUCCESS); 1178 } 1179 1180 static PetscErrorCode MatSetRandom_MPIDense(Mat x, PetscRandom rctx) 1181 { 1182 Mat_MPIDense *d = (Mat_MPIDense *)x->data; 1183 1184 PetscFunctionBegin; 1185 PetscCall(MatSetRandom(d->A, rctx)); 1186 #if defined(PETSC_HAVE_DEVICE) 1187 x->offloadmask = d->A->offloadmask; 1188 #endif 1189 PetscFunctionReturn(PETSC_SUCCESS); 1190 } 1191 1192 static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A, PetscBool *missing, PetscInt *d) 1193 { 1194 PetscFunctionBegin; 1195 *missing = PETSC_FALSE; 1196 PetscFunctionReturn(PETSC_SUCCESS); 1197 } 1198 1199 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat, Mat, PetscReal, Mat); 1200 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat, Mat, Mat); 1201 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat, Mat, PetscReal, Mat); 1202 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat, Mat, Mat); 1203 static PetscErrorCode MatEqual_MPIDense(Mat, Mat, PetscBool *); 1204 static PetscErrorCode MatLoad_MPIDense(Mat, PetscViewer); 1205 static PetscErrorCode MatProductSetFromOptions_MPIDense(Mat); 1206 1207 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense, 1208 MatGetRow_MPIDense, 1209 MatRestoreRow_MPIDense, 1210 MatMult_MPIDense, 1211 /* 4*/ MatMultAdd_MPIDense, 1212 MatMultTranspose_MPIDense, 1213 MatMultTransposeAdd_MPIDense, 1214 NULL, 1215 NULL, 1216 NULL, 1217 /* 10*/ NULL, 1218 NULL, 1219 NULL, 1220 NULL, 1221 MatTranspose_MPIDense, 1222 /* 15*/ MatGetInfo_MPIDense, 1223 MatEqual_MPIDense, 1224 MatGetDiagonal_MPIDense, 1225 MatDiagonalScale_MPIDense, 1226 MatNorm_MPIDense, 1227 /* 20*/ MatAssemblyBegin_MPIDense, 1228 MatAssemblyEnd_MPIDense, 1229 MatSetOption_MPIDense, 1230 MatZeroEntries_MPIDense, 1231 /* 24*/ MatZeroRows_MPIDense, 1232 NULL, 1233 NULL, 1234 NULL, 1235 NULL, 1236 /* 29*/ MatSetUp_MPIDense, 1237 NULL, 1238 NULL, 1239 MatGetDiagonalBlock_MPIDense, 1240 NULL, 1241 /* 34*/ MatDuplicate_MPIDense, 1242 NULL, 1243 NULL, 1244 NULL, 1245 NULL, 1246 /* 39*/ MatAXPY_MPIDense, 1247 MatCreateSubMatrices_MPIDense, 1248 NULL, 1249 MatGetValues_MPIDense, 1250 MatCopy_MPIDense, 1251 /* 44*/ NULL, 1252 MatScale_MPIDense, 1253 MatShift_MPIDense, 1254 NULL, 1255 NULL, 1256 /* 49*/ MatSetRandom_MPIDense, 1257 NULL, 1258 NULL, 1259 NULL, 1260 NULL, 1261 /* 54*/ NULL, 1262 NULL, 1263 NULL, 1264 NULL, 1265 NULL, 1266 /* 59*/ MatCreateSubMatrix_MPIDense, 1267 MatDestroy_MPIDense, 1268 MatView_MPIDense, 1269 NULL, 1270 NULL, 1271 /* 64*/ NULL, 1272 NULL, 1273 NULL, 1274 NULL, 1275 NULL, 1276 /* 69*/ NULL, 1277 NULL, 1278 NULL, 1279 NULL, 1280 NULL, 1281 /* 74*/ NULL, 1282 NULL, 1283 NULL, 1284 NULL, 1285 NULL, 1286 /* 79*/ NULL, 1287 NULL, 1288 NULL, 1289 NULL, 1290 /* 83*/ MatLoad_MPIDense, 1291 NULL, 1292 NULL, 1293 NULL, 1294 NULL, 1295 NULL, 1296 /* 89*/ NULL, 1297 NULL, 1298 NULL, 1299 NULL, 1300 NULL, 1301 /* 94*/ NULL, 1302 NULL, 1303 MatMatTransposeMultSymbolic_MPIDense_MPIDense, 1304 MatMatTransposeMultNumeric_MPIDense_MPIDense, 1305 NULL, 1306 /* 99*/ MatProductSetFromOptions_MPIDense, 1307 NULL, 1308 NULL, 1309 MatConjugate_MPIDense, 1310 NULL, 1311 /*104*/ NULL, 1312 MatRealPart_MPIDense, 1313 MatImaginaryPart_MPIDense, 1314 NULL, 1315 NULL, 1316 /*109*/ NULL, 1317 NULL, 1318 NULL, 1319 MatGetColumnVector_MPIDense, 1320 MatMissingDiagonal_MPIDense, 1321 /*114*/ NULL, 1322 NULL, 1323 NULL, 1324 NULL, 1325 NULL, 1326 /*119*/ NULL, 1327 NULL, 1328 MatMultHermitianTranspose_MPIDense, 1329 MatMultHermitianTransposeAdd_MPIDense, 1330 NULL, 1331 /*124*/ NULL, 1332 MatGetColumnReductions_MPIDense, 1333 NULL, 1334 NULL, 1335 NULL, 1336 /*129*/ NULL, 1337 NULL, 1338 MatTransposeMatMultSymbolic_MPIDense_MPIDense, 1339 MatTransposeMatMultNumeric_MPIDense_MPIDense, 1340 NULL, 1341 /*134*/ NULL, 1342 NULL, 1343 NULL, 1344 NULL, 1345 NULL, 1346 /*139*/ NULL, 1347 NULL, 1348 NULL, 1349 NULL, 1350 NULL, 1351 MatCreateMPIMatConcatenateSeqMat_MPIDense, 1352 /*145*/ NULL, 1353 NULL, 1354 NULL, 1355 NULL, 1356 NULL, 1357 /*150*/ NULL, 1358 NULL, 1359 NULL, 1360 NULL}; 1361 1362 static PetscErrorCode MatMPIDenseSetPreallocation_MPIDense(Mat mat, PetscScalar *data) 1363 { 1364 Mat_MPIDense *a = (Mat_MPIDense *)mat->data; 1365 MatType mtype = MATSEQDENSE; 1366 1367 PetscFunctionBegin; 1368 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1369 PetscCall(PetscLayoutSetUp(mat->rmap)); 1370 PetscCall(PetscLayoutSetUp(mat->cmap)); 1371 if (!a->A) { 1372 PetscCall(MatCreate(PETSC_COMM_SELF, &a->A)); 1373 PetscCall(MatSetSizes(a->A, mat->rmap->n, mat->cmap->N, mat->rmap->n, mat->cmap->N)); 1374 } 1375 #if defined(PETSC_HAVE_CUDA) 1376 PetscBool iscuda; 1377 PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPIDENSECUDA, &iscuda)); 1378 if (iscuda) mtype = MATSEQDENSECUDA; 1379 #endif 1380 #if defined(PETSC_HAVE_HIP) 1381 PetscBool iship; 1382 PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPIDENSEHIP, &iship)); 1383 if (iship) mtype = MATSEQDENSEHIP; 1384 #endif 1385 PetscCall(MatSetType(a->A, mtype)); 1386 PetscCall(MatSeqDenseSetPreallocation(a->A, data)); 1387 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 1388 mat->offloadmask = a->A->offloadmask; 1389 #endif 1390 mat->preallocated = PETSC_TRUE; 1391 mat->assembled = PETSC_TRUE; 1392 PetscFunctionReturn(PETSC_SUCCESS); 1393 } 1394 1395 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIDense(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 1396 { 1397 Mat B, C; 1398 1399 PetscFunctionBegin; 1400 PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &C)); 1401 PetscCall(MatConvert_SeqAIJ_SeqDense(C, MATSEQDENSE, MAT_INITIAL_MATRIX, &B)); 1402 PetscCall(MatDestroy(&C)); 1403 if (reuse == MAT_REUSE_MATRIX) { 1404 C = *newmat; 1405 } else C = NULL; 1406 PetscCall(MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A), B, A->cmap->n, !C ? MAT_INITIAL_MATRIX : MAT_REUSE_MATRIX, &C)); 1407 PetscCall(MatDestroy(&B)); 1408 if (reuse == MAT_INPLACE_MATRIX) { 1409 PetscCall(MatHeaderReplace(A, &C)); 1410 } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C; 1411 PetscFunctionReturn(PETSC_SUCCESS); 1412 } 1413 1414 static PetscErrorCode MatConvert_MPIDense_MPIAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 1415 { 1416 Mat B, C; 1417 1418 PetscFunctionBegin; 1419 PetscCall(MatDenseGetLocalMatrix(A, &C)); 1420 PetscCall(MatConvert_SeqDense_SeqAIJ(C, MATSEQAIJ, MAT_INITIAL_MATRIX, &B)); 1421 if (reuse == MAT_REUSE_MATRIX) { 1422 C = *newmat; 1423 } else C = NULL; 1424 PetscCall(MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A), B, A->cmap->n, !C ? MAT_INITIAL_MATRIX : MAT_REUSE_MATRIX, &C)); 1425 PetscCall(MatDestroy(&B)); 1426 if (reuse == MAT_INPLACE_MATRIX) { 1427 PetscCall(MatHeaderReplace(A, &C)); 1428 } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C; 1429 PetscFunctionReturn(PETSC_SUCCESS); 1430 } 1431 1432 #if defined(PETSC_HAVE_ELEMENTAL) 1433 PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 1434 { 1435 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1436 Mat mat_elemental; 1437 PetscScalar *v; 1438 PetscInt m = A->rmap->n, N = A->cmap->N, rstart = A->rmap->rstart, i, *rows, *cols, lda; 1439 1440 PetscFunctionBegin; 1441 if (reuse == MAT_REUSE_MATRIX) { 1442 mat_elemental = *newmat; 1443 PetscCall(MatZeroEntries(*newmat)); 1444 } else { 1445 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental)); 1446 PetscCall(MatSetSizes(mat_elemental, PETSC_DECIDE, PETSC_DECIDE, A->rmap->N, A->cmap->N)); 1447 PetscCall(MatSetType(mat_elemental, MATELEMENTAL)); 1448 PetscCall(MatSetUp(mat_elemental)); 1449 PetscCall(MatSetOption(mat_elemental, MAT_ROW_ORIENTED, PETSC_FALSE)); 1450 } 1451 1452 PetscCall(PetscMalloc2(m, &rows, N, &cols)); 1453 for (i = 0; i < N; i++) cols[i] = i; 1454 for (i = 0; i < m; i++) rows[i] = rstart + i; 1455 1456 /* PETSc-Elemental interface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */ 1457 PetscCall(MatDenseGetArray(A, &v)); 1458 PetscCall(MatDenseGetLDA(a->A, &lda)); 1459 if (lda == m) PetscCall(MatSetValues(mat_elemental, m, rows, N, cols, v, ADD_VALUES)); 1460 else { 1461 for (i = 0; i < N; i++) PetscCall(MatSetValues(mat_elemental, m, rows, 1, &i, v + lda * i, ADD_VALUES)); 1462 } 1463 PetscCall(MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY)); 1464 PetscCall(MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY)); 1465 PetscCall(MatDenseRestoreArray(A, &v)); 1466 PetscCall(PetscFree2(rows, cols)); 1467 1468 if (reuse == MAT_INPLACE_MATRIX) { 1469 PetscCall(MatHeaderReplace(A, &mat_elemental)); 1470 } else { 1471 *newmat = mat_elemental; 1472 } 1473 PetscFunctionReturn(PETSC_SUCCESS); 1474 } 1475 #endif 1476 1477 static PetscErrorCode MatDenseGetColumn_MPIDense(Mat A, PetscInt col, PetscScalar **vals) 1478 { 1479 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 1480 1481 PetscFunctionBegin; 1482 PetscCall(MatDenseGetColumn(mat->A, col, vals)); 1483 PetscFunctionReturn(PETSC_SUCCESS); 1484 } 1485 1486 static PetscErrorCode MatDenseRestoreColumn_MPIDense(Mat A, PetscScalar **vals) 1487 { 1488 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 1489 1490 PetscFunctionBegin; 1491 PetscCall(MatDenseRestoreColumn(mat->A, vals)); 1492 PetscFunctionReturn(PETSC_SUCCESS); 1493 } 1494 1495 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIDense(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat) 1496 { 1497 Mat_MPIDense *mat; 1498 PetscInt m, nloc, N; 1499 1500 PetscFunctionBegin; 1501 PetscCall(MatGetSize(inmat, &m, &N)); 1502 PetscCall(MatGetLocalSize(inmat, NULL, &nloc)); 1503 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 1504 PetscInt sum; 1505 1506 if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnership(comm, &n, &N)); 1507 /* Check sum(n) = N */ 1508 PetscCall(MPIU_Allreduce(&n, &sum, 1, MPIU_INT, MPI_SUM, comm)); 1509 PetscCheck(sum == N, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT, sum, N); 1510 1511 PetscCall(MatCreateDense(comm, m, n, PETSC_DETERMINE, N, NULL, outmat)); 1512 PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 1513 } 1514 1515 /* numeric phase */ 1516 mat = (Mat_MPIDense *)(*outmat)->data; 1517 PetscCall(MatCopy(inmat, mat->A, SAME_NONZERO_PATTERN)); 1518 PetscFunctionReturn(PETSC_SUCCESS); 1519 } 1520 1521 PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat A, PetscInt col, Vec *v) 1522 { 1523 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1524 PetscInt lda; 1525 1526 PetscFunctionBegin; 1527 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1528 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1529 if (!a->cvec) PetscCall(MatDenseCreateColumnVec_Private(A, &a->cvec)); 1530 a->vecinuse = col + 1; 1531 PetscCall(MatDenseGetLDA(a->A, &lda)); 1532 PetscCall(MatDenseGetArray(a->A, (PetscScalar **)&a->ptrinuse)); 1533 PetscCall(VecPlaceArray(a->cvec, PetscSafePointerPlusOffset(a->ptrinuse, (size_t)col * (size_t)lda))); 1534 *v = a->cvec; 1535 PetscFunctionReturn(PETSC_SUCCESS); 1536 } 1537 1538 PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat A, PetscInt col, Vec *v) 1539 { 1540 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1541 1542 PetscFunctionBegin; 1543 PetscCheck(a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first"); 1544 PetscCheck(a->cvec, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing internal column vector"); 1545 a->vecinuse = 0; 1546 PetscCall(MatDenseRestoreArray(a->A, (PetscScalar **)&a->ptrinuse)); 1547 PetscCall(VecResetArray(a->cvec)); 1548 if (v) *v = NULL; 1549 PetscFunctionReturn(PETSC_SUCCESS); 1550 } 1551 1552 PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat A, PetscInt col, Vec *v) 1553 { 1554 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1555 PetscInt lda; 1556 1557 PetscFunctionBegin; 1558 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1559 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1560 if (!a->cvec) PetscCall(MatDenseCreateColumnVec_Private(A, &a->cvec)); 1561 a->vecinuse = col + 1; 1562 PetscCall(MatDenseGetLDA(a->A, &lda)); 1563 PetscCall(MatDenseGetArrayRead(a->A, &a->ptrinuse)); 1564 PetscCall(VecPlaceArray(a->cvec, PetscSafePointerPlusOffset(a->ptrinuse, (size_t)col * (size_t)lda))); 1565 PetscCall(VecLockReadPush(a->cvec)); 1566 *v = a->cvec; 1567 PetscFunctionReturn(PETSC_SUCCESS); 1568 } 1569 1570 PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat A, PetscInt col, Vec *v) 1571 { 1572 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1573 1574 PetscFunctionBegin; 1575 PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first"); 1576 PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector"); 1577 a->vecinuse = 0; 1578 PetscCall(MatDenseRestoreArrayRead(a->A, &a->ptrinuse)); 1579 PetscCall(VecLockReadPop(a->cvec)); 1580 PetscCall(VecResetArray(a->cvec)); 1581 if (v) *v = NULL; 1582 PetscFunctionReturn(PETSC_SUCCESS); 1583 } 1584 1585 PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat A, PetscInt col, Vec *v) 1586 { 1587 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1588 PetscInt lda; 1589 1590 PetscFunctionBegin; 1591 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1592 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1593 if (!a->cvec) PetscCall(MatDenseCreateColumnVec_Private(A, &a->cvec)); 1594 a->vecinuse = col + 1; 1595 PetscCall(MatDenseGetLDA(a->A, &lda)); 1596 PetscCall(MatDenseGetArrayWrite(a->A, (PetscScalar **)&a->ptrinuse)); 1597 PetscCall(VecPlaceArray(a->cvec, PetscSafePointerPlusOffset(a->ptrinuse, (size_t)col * (size_t)lda))); 1598 *v = a->cvec; 1599 PetscFunctionReturn(PETSC_SUCCESS); 1600 } 1601 1602 PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat A, PetscInt col, Vec *v) 1603 { 1604 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1605 1606 PetscFunctionBegin; 1607 PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first"); 1608 PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector"); 1609 a->vecinuse = 0; 1610 PetscCall(MatDenseRestoreArrayWrite(a->A, (PetscScalar **)&a->ptrinuse)); 1611 PetscCall(VecResetArray(a->cvec)); 1612 if (v) *v = NULL; 1613 PetscFunctionReturn(PETSC_SUCCESS); 1614 } 1615 1616 static PetscErrorCode MatDenseGetSubMatrix_MPIDense(Mat A, PetscInt rbegin, PetscInt rend, PetscInt cbegin, PetscInt cend, Mat *v) 1617 { 1618 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1619 Mat_MPIDense *c; 1620 MPI_Comm comm; 1621 PetscInt pbegin, pend; 1622 1623 PetscFunctionBegin; 1624 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 1625 PetscCheck(!a->vecinuse, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1626 PetscCheck(!a->matinuse, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1627 pbegin = PetscMax(0, PetscMin(A->rmap->rend, rbegin) - A->rmap->rstart); 1628 pend = PetscMin(A->rmap->n, PetscMax(0, rend - A->rmap->rstart)); 1629 if (!a->cmat) { 1630 PetscCall(MatCreate(comm, &a->cmat)); 1631 PetscCall(MatSetType(a->cmat, ((PetscObject)A)->type_name)); 1632 if (rend - rbegin == A->rmap->N) PetscCall(PetscLayoutReference(A->rmap, &a->cmat->rmap)); 1633 else { 1634 PetscCall(PetscLayoutSetLocalSize(a->cmat->rmap, pend - pbegin)); 1635 PetscCall(PetscLayoutSetSize(a->cmat->rmap, rend - rbegin)); 1636 PetscCall(PetscLayoutSetUp(a->cmat->rmap)); 1637 } 1638 PetscCall(PetscLayoutSetSize(a->cmat->cmap, cend - cbegin)); 1639 PetscCall(PetscLayoutSetUp(a->cmat->cmap)); 1640 } else { 1641 PetscBool same = (PetscBool)(rend - rbegin == a->cmat->rmap->N); 1642 if (same && a->cmat->rmap->N != A->rmap->N) { 1643 same = (PetscBool)(pend - pbegin == a->cmat->rmap->n); 1644 PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &same, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A))); 1645 } 1646 if (!same) { 1647 PetscCall(PetscLayoutDestroy(&a->cmat->rmap)); 1648 PetscCall(PetscLayoutCreate(comm, &a->cmat->rmap)); 1649 PetscCall(PetscLayoutSetLocalSize(a->cmat->rmap, pend - pbegin)); 1650 PetscCall(PetscLayoutSetSize(a->cmat->rmap, rend - rbegin)); 1651 PetscCall(PetscLayoutSetUp(a->cmat->rmap)); 1652 } 1653 if (cend - cbegin != a->cmat->cmap->N) { 1654 PetscCall(PetscLayoutDestroy(&a->cmat->cmap)); 1655 PetscCall(PetscLayoutCreate(comm, &a->cmat->cmap)); 1656 PetscCall(PetscLayoutSetSize(a->cmat->cmap, cend - cbegin)); 1657 PetscCall(PetscLayoutSetUp(a->cmat->cmap)); 1658 } 1659 } 1660 c = (Mat_MPIDense *)a->cmat->data; 1661 PetscCheck(!c->A, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1662 PetscCall(MatDenseGetSubMatrix(a->A, pbegin, pend, cbegin, cend, &c->A)); 1663 1664 a->cmat->preallocated = PETSC_TRUE; 1665 a->cmat->assembled = PETSC_TRUE; 1666 #if defined(PETSC_HAVE_DEVICE) 1667 a->cmat->offloadmask = c->A->offloadmask; 1668 #endif 1669 a->matinuse = cbegin + 1; 1670 *v = a->cmat; 1671 PetscFunctionReturn(PETSC_SUCCESS); 1672 } 1673 1674 static PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat A, Mat *v) 1675 { 1676 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1677 Mat_MPIDense *c; 1678 1679 PetscFunctionBegin; 1680 PetscCheck(a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetSubMatrix() first"); 1681 PetscCheck(a->cmat, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal matrix"); 1682 PetscCheck(*v == a->cmat, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Not the matrix obtained from MatDenseGetSubMatrix()"); 1683 a->matinuse = 0; 1684 c = (Mat_MPIDense *)a->cmat->data; 1685 PetscCall(MatDenseRestoreSubMatrix(a->A, &c->A)); 1686 if (v) *v = NULL; 1687 #if defined(PETSC_HAVE_DEVICE) 1688 A->offloadmask = a->A->offloadmask; 1689 #endif 1690 PetscFunctionReturn(PETSC_SUCCESS); 1691 } 1692 1693 /*MC 1694 MATMPIDENSE - MATMPIDENSE = "mpidense" - A matrix type to be used for distributed dense matrices. 1695 1696 Options Database Key: 1697 . -mat_type mpidense - sets the matrix type to `MATMPIDENSE` during a call to `MatSetFromOptions()` 1698 1699 Level: beginner 1700 1701 .seealso: [](ch_matrices), `Mat`, `MatCreateDense()`, `MATSEQDENSE`, `MATDENSE` 1702 M*/ 1703 PetscErrorCode MatCreate_MPIDense(Mat mat) 1704 { 1705 Mat_MPIDense *a; 1706 1707 PetscFunctionBegin; 1708 PetscCall(PetscNew(&a)); 1709 mat->data = (void *)a; 1710 mat->ops[0] = MatOps_Values; 1711 1712 mat->insertmode = NOT_SET_VALUES; 1713 1714 /* build cache for off array entries formed */ 1715 a->donotstash = PETSC_FALSE; 1716 1717 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)mat), 1, &mat->stash)); 1718 1719 /* stuff used for matrix vector multiply */ 1720 a->lvec = NULL; 1721 a->Mvctx = NULL; 1722 a->roworiented = PETSC_TRUE; 1723 1724 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetLDA_C", MatDenseGetLDA_MPIDense)); 1725 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseSetLDA_C", MatDenseSetLDA_MPIDense)); 1726 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArray_C", MatDenseGetArray_MPIDense)); 1727 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArray_C", MatDenseRestoreArray_MPIDense)); 1728 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayRead_C", MatDenseGetArrayRead_MPIDense)); 1729 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayRead_C", MatDenseRestoreArrayRead_MPIDense)); 1730 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayWrite_C", MatDenseGetArrayWrite_MPIDense)); 1731 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayWrite_C", MatDenseRestoreArrayWrite_MPIDense)); 1732 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDensePlaceArray_C", MatDensePlaceArray_MPIDense)); 1733 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseResetArray_C", MatDenseResetArray_MPIDense)); 1734 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseReplaceArray_C", MatDenseReplaceArray_MPIDense)); 1735 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", MatDenseGetColumnVec_MPIDense)); 1736 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", MatDenseRestoreColumnVec_MPIDense)); 1737 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", MatDenseGetColumnVecRead_MPIDense)); 1738 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", MatDenseRestoreColumnVecRead_MPIDense)); 1739 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", MatDenseGetColumnVecWrite_MPIDense)); 1740 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", MatDenseRestoreColumnVecWrite_MPIDense)); 1741 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetSubMatrix_C", MatDenseGetSubMatrix_MPIDense)); 1742 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreSubMatrix_C", MatDenseRestoreSubMatrix_MPIDense)); 1743 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", MatConvert_MPIAIJ_MPIDense)); 1744 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpiaij_C", MatConvert_MPIDense_MPIAIJ)); 1745 #if defined(PETSC_HAVE_ELEMENTAL) 1746 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_elemental_C", MatConvert_MPIDense_Elemental)); 1747 #endif 1748 #if defined(PETSC_HAVE_SCALAPACK) 1749 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_scalapack_C", MatConvert_Dense_ScaLAPACK)); 1750 #endif 1751 #if defined(PETSC_HAVE_CUDA) 1752 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensecuda_C", MatConvert_MPIDense_MPIDenseCUDA)); 1753 #endif 1754 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIDenseSetPreallocation_C", MatMPIDenseSetPreallocation_MPIDense)); 1755 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense)); 1756 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaij_C", MatProductSetFromOptions_MPIDense_MPIAIJ)); 1757 #if defined(PETSC_HAVE_CUDA) 1758 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense)); 1759 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijcusparse_C", MatProductSetFromOptions_MPIDense_MPIAIJ)); 1760 #endif 1761 #if defined(PETSC_HAVE_HIP) 1762 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensehip_C", MatConvert_MPIDense_MPIDenseHIP)); 1763 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense)); 1764 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijhipsparse_C", MatProductSetFromOptions_MPIDense_MPIAIJ)); 1765 #endif 1766 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumn_C", MatDenseGetColumn_MPIDense)); 1767 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumn_C", MatDenseRestoreColumn_MPIDense)); 1768 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultAddColumnRange_C", MatMultAddColumnRange_MPIDense)); 1769 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultHermitianTransposeColumnRange_C", MatMultHermitianTransposeColumnRange_MPIDense)); 1770 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultHermitianTransposeAddColumnRange_C", MatMultHermitianTransposeAddColumnRange_MPIDense)); 1771 PetscCall(PetscObjectChangeTypeName((PetscObject)mat, MATMPIDENSE)); 1772 PetscFunctionReturn(PETSC_SUCCESS); 1773 } 1774 1775 /*MC 1776 MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices. 1777 1778 This matrix type is identical to `MATSEQDENSE` when constructed with a single process communicator, 1779 and `MATMPIDENSE` otherwise. 1780 1781 Options Database Key: 1782 . -mat_type dense - sets the matrix type to `MATDENSE` during a call to `MatSetFromOptions()` 1783 1784 Level: beginner 1785 1786 .seealso: [](ch_matrices), `Mat`, `MATSEQDENSE`, `MATMPIDENSE`, `MATDENSECUDA`, `MATDENSEHIP` 1787 M*/ 1788 1789 /*@ 1790 MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries 1791 1792 Collective 1793 1794 Input Parameters: 1795 + B - the matrix 1796 - data - optional location of matrix data. Set to `NULL` for PETSc 1797 to control all matrix memory allocation. 1798 1799 Level: intermediate 1800 1801 Notes: 1802 The dense format is fully compatible with standard Fortran 1803 storage by columns. 1804 1805 The data input variable is intended primarily for Fortran programmers 1806 who wish to allocate their own matrix memory space. Most users should 1807 set `data` to `NULL`. 1808 1809 .seealso: [](ch_matrices), `Mat`, `MATMPIDENSE`, `MatCreate()`, `MatCreateSeqDense()`, `MatSetValues()` 1810 @*/ 1811 PetscErrorCode MatMPIDenseSetPreallocation(Mat B, PetscScalar *data) 1812 { 1813 PetscFunctionBegin; 1814 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 1815 PetscTryMethod(B, "MatMPIDenseSetPreallocation_C", (Mat, PetscScalar *), (B, data)); 1816 PetscFunctionReturn(PETSC_SUCCESS); 1817 } 1818 1819 /*@ 1820 MatDensePlaceArray - Allows one to replace the array in a `MATDENSE` matrix with an 1821 array provided by the user. This is useful to avoid copying an array 1822 into a matrix 1823 1824 Not Collective 1825 1826 Input Parameters: 1827 + mat - the matrix 1828 - array - the array in column major order 1829 1830 Level: developer 1831 1832 Note: 1833 You can return to the original array with a call to `MatDenseResetArray()`. The user is responsible for freeing this array; it will not be 1834 freed when the matrix is destroyed. 1835 1836 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseGetArray()`, `MatDenseResetArray()`, `VecPlaceArray()`, `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`, `VecResetArray()`, 1837 `MatDenseReplaceArray()` 1838 @*/ 1839 PetscErrorCode MatDensePlaceArray(Mat mat, const PetscScalar *array) 1840 { 1841 PetscFunctionBegin; 1842 PetscValidHeaderSpecific(mat, MAT_CLASSID, 1); 1843 PetscUseMethod(mat, "MatDensePlaceArray_C", (Mat, const PetscScalar *), (mat, array)); 1844 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 1845 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 1846 mat->offloadmask = PETSC_OFFLOAD_CPU; 1847 #endif 1848 PetscFunctionReturn(PETSC_SUCCESS); 1849 } 1850 1851 /*@ 1852 MatDenseResetArray - Resets the matrix array to that it previously had before the call to `MatDensePlaceArray()` 1853 1854 Not Collective 1855 1856 Input Parameter: 1857 . mat - the matrix 1858 1859 Level: developer 1860 1861 Note: 1862 You can only call this after a call to `MatDensePlaceArray()` 1863 1864 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseGetArray()`, `MatDensePlaceArray()`, `VecPlaceArray()`, `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`, `VecResetArray()` 1865 @*/ 1866 PetscErrorCode MatDenseResetArray(Mat mat) 1867 { 1868 PetscFunctionBegin; 1869 PetscValidHeaderSpecific(mat, MAT_CLASSID, 1); 1870 PetscUseMethod(mat, "MatDenseResetArray_C", (Mat), (mat)); 1871 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 1872 PetscFunctionReturn(PETSC_SUCCESS); 1873 } 1874 1875 /*@ 1876 MatDenseReplaceArray - Allows one to replace the array in a dense matrix with an 1877 array provided by the user. This is useful to avoid copying an array 1878 into a matrix 1879 1880 Not Collective 1881 1882 Input Parameters: 1883 + mat - the matrix 1884 - array - the array in column major order 1885 1886 Level: developer 1887 1888 Note: 1889 The memory passed in MUST be obtained with `PetscMalloc()` and CANNOT be 1890 freed by the user. It will be freed when the matrix is destroyed. 1891 1892 .seealso: [](ch_matrices), `Mat`, `MatDensePlaceArray()`, `MatDenseGetArray()`, `VecReplaceArray()` 1893 @*/ 1894 PetscErrorCode MatDenseReplaceArray(Mat mat, const PetscScalar *array) 1895 { 1896 PetscFunctionBegin; 1897 PetscValidHeaderSpecific(mat, MAT_CLASSID, 1); 1898 PetscUseMethod(mat, "MatDenseReplaceArray_C", (Mat, const PetscScalar *), (mat, array)); 1899 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 1900 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 1901 mat->offloadmask = PETSC_OFFLOAD_CPU; 1902 #endif 1903 PetscFunctionReturn(PETSC_SUCCESS); 1904 } 1905 1906 /*@ 1907 MatCreateDense - Creates a matrix in `MATDENSE` format. 1908 1909 Collective 1910 1911 Input Parameters: 1912 + comm - MPI communicator 1913 . m - number of local rows (or `PETSC_DECIDE` to have calculated if `M` is given) 1914 . n - number of local columns (or `PETSC_DECIDE` to have calculated if `N` is given) 1915 . M - number of global rows (or `PETSC_DECIDE` to have calculated if `m` is given) 1916 . N - number of global columns (or `PETSC_DECIDE` to have calculated if `n` is given) 1917 - data - optional location of matrix data. Set data to `NULL` (`PETSC_NULL_SCALAR` for Fortran users) for PETSc 1918 to control all matrix memory allocation. 1919 1920 Output Parameter: 1921 . A - the matrix 1922 1923 Level: intermediate 1924 1925 Notes: 1926 The dense format is fully compatible with standard Fortran 1927 storage by columns. 1928 1929 Although local portions of the matrix are stored in column-major 1930 order, the matrix is partitioned across MPI ranks by row. 1931 1932 The data input variable is intended primarily for Fortran programmers 1933 who wish to allocate their own matrix memory space. Most users should 1934 set `data` to `NULL` (`PETSC_NULL_SCALAR` for Fortran users). 1935 1936 The user MUST specify either the local or global matrix dimensions 1937 (possibly both). 1938 1939 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatCreate()`, `MatCreateSeqDense()`, `MatSetValues()` 1940 @*/ 1941 PetscErrorCode MatCreateDense(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscScalar *data, Mat *A) 1942 { 1943 PetscFunctionBegin; 1944 PetscCall(MatCreate(comm, A)); 1945 PetscCall(MatSetSizes(*A, m, n, M, N)); 1946 PetscCall(MatSetType(*A, MATDENSE)); 1947 PetscCall(MatSeqDenseSetPreallocation(*A, data)); 1948 PetscCall(MatMPIDenseSetPreallocation(*A, data)); 1949 PetscFunctionReturn(PETSC_SUCCESS); 1950 } 1951 1952 static PetscErrorCode MatDuplicate_MPIDense(Mat A, MatDuplicateOption cpvalues, Mat *newmat) 1953 { 1954 Mat mat; 1955 Mat_MPIDense *a, *oldmat = (Mat_MPIDense *)A->data; 1956 1957 PetscFunctionBegin; 1958 *newmat = NULL; 1959 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat)); 1960 PetscCall(MatSetSizes(mat, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 1961 PetscCall(MatSetType(mat, ((PetscObject)A)->type_name)); 1962 a = (Mat_MPIDense *)mat->data; 1963 1964 mat->factortype = A->factortype; 1965 mat->assembled = PETSC_TRUE; 1966 mat->preallocated = PETSC_TRUE; 1967 1968 mat->insertmode = NOT_SET_VALUES; 1969 a->donotstash = oldmat->donotstash; 1970 1971 PetscCall(PetscLayoutReference(A->rmap, &mat->rmap)); 1972 PetscCall(PetscLayoutReference(A->cmap, &mat->cmap)); 1973 1974 PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A)); 1975 1976 *newmat = mat; 1977 PetscFunctionReturn(PETSC_SUCCESS); 1978 } 1979 1980 static PetscErrorCode MatLoad_MPIDense(Mat newMat, PetscViewer viewer) 1981 { 1982 PetscBool isbinary; 1983 #if defined(PETSC_HAVE_HDF5) 1984 PetscBool ishdf5; 1985 #endif 1986 1987 PetscFunctionBegin; 1988 PetscValidHeaderSpecific(newMat, MAT_CLASSID, 1); 1989 PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2); 1990 /* force binary viewer to load .info file if it has not yet done so */ 1991 PetscCall(PetscViewerSetUp(viewer)); 1992 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 1993 #if defined(PETSC_HAVE_HDF5) 1994 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5)); 1995 #endif 1996 if (isbinary) { 1997 PetscCall(MatLoad_Dense_Binary(newMat, viewer)); 1998 #if defined(PETSC_HAVE_HDF5) 1999 } else if (ishdf5) { 2000 PetscCall(MatLoad_Dense_HDF5(newMat, viewer)); 2001 #endif 2002 } else SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name); 2003 PetscFunctionReturn(PETSC_SUCCESS); 2004 } 2005 2006 static PetscErrorCode MatEqual_MPIDense(Mat A, Mat B, PetscBool *flag) 2007 { 2008 Mat_MPIDense *matB = (Mat_MPIDense *)B->data, *matA = (Mat_MPIDense *)A->data; 2009 Mat a, b; 2010 2011 PetscFunctionBegin; 2012 a = matA->A; 2013 b = matB->A; 2014 PetscCall(MatEqual(a, b, flag)); 2015 PetscCall(MPIU_Allreduce(MPI_IN_PLACE, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A))); 2016 PetscFunctionReturn(PETSC_SUCCESS); 2017 } 2018 2019 static PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(void *data) 2020 { 2021 Mat_TransMatMultDense *atb = (Mat_TransMatMultDense *)data; 2022 2023 PetscFunctionBegin; 2024 PetscCall(PetscFree2(atb->sendbuf, atb->recvcounts)); 2025 PetscCall(MatDestroy(&atb->atb)); 2026 PetscCall(PetscFree(atb)); 2027 PetscFunctionReturn(PETSC_SUCCESS); 2028 } 2029 2030 static PetscErrorCode MatDestroy_MatMatTransMult_MPIDense_MPIDense(void *data) 2031 { 2032 Mat_MatTransMultDense *abt = (Mat_MatTransMultDense *)data; 2033 2034 PetscFunctionBegin; 2035 PetscCall(PetscFree2(abt->buf[0], abt->buf[1])); 2036 PetscCall(PetscFree2(abt->recvcounts, abt->recvdispls)); 2037 PetscCall(PetscFree(abt)); 2038 PetscFunctionReturn(PETSC_SUCCESS); 2039 } 2040 2041 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C) 2042 { 2043 Mat_MPIDense *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data; 2044 Mat_TransMatMultDense *atb; 2045 MPI_Comm comm; 2046 PetscMPIInt size, *recvcounts; 2047 PetscScalar *carray, *sendbuf; 2048 const PetscScalar *atbarray; 2049 PetscInt i, cN = C->cmap->N, proc, k, j, lda; 2050 const PetscInt *ranges; 2051 2052 PetscFunctionBegin; 2053 MatCheckProduct(C, 3); 2054 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 2055 atb = (Mat_TransMatMultDense *)C->product->data; 2056 recvcounts = atb->recvcounts; 2057 sendbuf = atb->sendbuf; 2058 2059 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 2060 PetscCallMPI(MPI_Comm_size(comm, &size)); 2061 2062 /* compute atbarray = aseq^T * bseq */ 2063 PetscCall(MatTransposeMatMult(a->A, b->A, atb->atb ? MAT_REUSE_MATRIX : MAT_INITIAL_MATRIX, PETSC_DEFAULT, &atb->atb)); 2064 2065 PetscCall(MatGetOwnershipRanges(C, &ranges)); 2066 2067 /* arrange atbarray into sendbuf */ 2068 PetscCall(MatDenseGetArrayRead(atb->atb, &atbarray)); 2069 PetscCall(MatDenseGetLDA(atb->atb, &lda)); 2070 for (proc = 0, k = 0; proc < size; proc++) { 2071 for (j = 0; j < cN; j++) { 2072 for (i = ranges[proc]; i < ranges[proc + 1]; i++) sendbuf[k++] = atbarray[i + j * lda]; 2073 } 2074 } 2075 PetscCall(MatDenseRestoreArrayRead(atb->atb, &atbarray)); 2076 2077 /* sum all atbarray to local values of C */ 2078 PetscCall(MatDenseGetArrayWrite(c->A, &carray)); 2079 PetscCallMPI(MPI_Reduce_scatter(sendbuf, carray, recvcounts, MPIU_SCALAR, MPIU_SUM, comm)); 2080 PetscCall(MatDenseRestoreArrayWrite(c->A, &carray)); 2081 PetscCall(MatSetOption(C, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 2082 PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY)); 2083 PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY)); 2084 PetscFunctionReturn(PETSC_SUCCESS); 2085 } 2086 2087 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C) 2088 { 2089 MPI_Comm comm; 2090 PetscMPIInt size; 2091 PetscInt cm = A->cmap->n, cM, cN = B->cmap->N; 2092 Mat_TransMatMultDense *atb; 2093 PetscBool cisdense = PETSC_FALSE; 2094 PetscInt i; 2095 const PetscInt *ranges; 2096 2097 PetscFunctionBegin; 2098 MatCheckProduct(C, 4); 2099 PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty"); 2100 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 2101 if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) { 2102 SETERRQ(comm, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend); 2103 } 2104 2105 /* create matrix product C */ 2106 PetscCall(MatSetSizes(C, cm, B->cmap->n, A->cmap->N, B->cmap->N)); 2107 #if defined(PETSC_HAVE_CUDA) 2108 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSECUDA, "")); 2109 #endif 2110 #if defined(PETSC_HAVE_HIP) 2111 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSEHIP, "")); 2112 #endif 2113 if (!cisdense) PetscCall(MatSetType(C, ((PetscObject)A)->type_name)); 2114 PetscCall(MatSetUp(C)); 2115 2116 /* create data structure for reuse C */ 2117 PetscCallMPI(MPI_Comm_size(comm, &size)); 2118 PetscCall(PetscNew(&atb)); 2119 cM = C->rmap->N; 2120 PetscCall(PetscMalloc2(cM * cN, &atb->sendbuf, size, &atb->recvcounts)); 2121 PetscCall(MatGetOwnershipRanges(C, &ranges)); 2122 for (i = 0; i < size; i++) atb->recvcounts[i] = (ranges[i + 1] - ranges[i]) * cN; 2123 2124 C->product->data = atb; 2125 C->product->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense; 2126 PetscFunctionReturn(PETSC_SUCCESS); 2127 } 2128 2129 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C) 2130 { 2131 MPI_Comm comm; 2132 PetscMPIInt i, size; 2133 PetscInt maxRows, bufsiz; 2134 PetscMPIInt tag; 2135 PetscInt alg; 2136 Mat_MatTransMultDense *abt; 2137 Mat_Product *product = C->product; 2138 PetscBool flg; 2139 2140 PetscFunctionBegin; 2141 MatCheckProduct(C, 4); 2142 PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty"); 2143 /* check local size of A and B */ 2144 PetscCheck(A->cmap->n == B->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local column dimensions are incompatible, A (%" PetscInt_FMT ") != B (%" PetscInt_FMT ")", A->cmap->n, B->cmap->n); 2145 2146 PetscCall(PetscStrcmp(product->alg, "allgatherv", &flg)); 2147 alg = flg ? 0 : 1; 2148 2149 /* setup matrix product C */ 2150 PetscCall(MatSetSizes(C, A->rmap->n, B->rmap->n, A->rmap->N, B->rmap->N)); 2151 PetscCall(MatSetType(C, MATMPIDENSE)); 2152 PetscCall(MatSetUp(C)); 2153 PetscCall(PetscObjectGetNewTag((PetscObject)C, &tag)); 2154 2155 /* create data structure for reuse C */ 2156 PetscCall(PetscObjectGetComm((PetscObject)C, &comm)); 2157 PetscCallMPI(MPI_Comm_size(comm, &size)); 2158 PetscCall(PetscNew(&abt)); 2159 abt->tag = tag; 2160 abt->alg = alg; 2161 switch (alg) { 2162 case 1: /* alg: "cyclic" */ 2163 for (maxRows = 0, i = 0; i < size; i++) maxRows = PetscMax(maxRows, (B->rmap->range[i + 1] - B->rmap->range[i])); 2164 bufsiz = A->cmap->N * maxRows; 2165 PetscCall(PetscMalloc2(bufsiz, &abt->buf[0], bufsiz, &abt->buf[1])); 2166 break; 2167 default: /* alg: "allgatherv" */ 2168 PetscCall(PetscMalloc2(B->rmap->n * B->cmap->N, &abt->buf[0], B->rmap->N * B->cmap->N, &abt->buf[1])); 2169 PetscCall(PetscMalloc2(size, &abt->recvcounts, size + 1, &abt->recvdispls)); 2170 for (i = 0; i <= size; i++) abt->recvdispls[i] = B->rmap->range[i] * A->cmap->N; 2171 for (i = 0; i < size; i++) abt->recvcounts[i] = abt->recvdispls[i + 1] - abt->recvdispls[i]; 2172 break; 2173 } 2174 2175 C->product->data = abt; 2176 C->product->destroy = MatDestroy_MatMatTransMult_MPIDense_MPIDense; 2177 C->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_MPIDense_MPIDense; 2178 PetscFunctionReturn(PETSC_SUCCESS); 2179 } 2180 2181 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(Mat A, Mat B, Mat C) 2182 { 2183 Mat_MPIDense *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data; 2184 Mat_MatTransMultDense *abt; 2185 MPI_Comm comm; 2186 PetscMPIInt rank, size, sendsiz, recvsiz, sendto, recvfrom, recvisfrom; 2187 PetscScalar *sendbuf, *recvbuf = NULL, *cv; 2188 PetscInt i, cK = A->cmap->N, k, j, bn; 2189 PetscScalar _DOne = 1.0, _DZero = 0.0; 2190 const PetscScalar *av, *bv; 2191 PetscBLASInt cm, cn, ck, alda, blda = 0, clda; 2192 MPI_Request reqs[2]; 2193 const PetscInt *ranges; 2194 2195 PetscFunctionBegin; 2196 MatCheckProduct(C, 3); 2197 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 2198 abt = (Mat_MatTransMultDense *)C->product->data; 2199 PetscCall(PetscObjectGetComm((PetscObject)C, &comm)); 2200 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 2201 PetscCallMPI(MPI_Comm_size(comm, &size)); 2202 PetscCall(MatDenseGetArrayRead(a->A, &av)); 2203 PetscCall(MatDenseGetArrayRead(b->A, &bv)); 2204 PetscCall(MatDenseGetArrayWrite(c->A, &cv)); 2205 PetscCall(MatDenseGetLDA(a->A, &i)); 2206 PetscCall(PetscBLASIntCast(i, &alda)); 2207 PetscCall(MatDenseGetLDA(b->A, &i)); 2208 PetscCall(PetscBLASIntCast(i, &blda)); 2209 PetscCall(MatDenseGetLDA(c->A, &i)); 2210 PetscCall(PetscBLASIntCast(i, &clda)); 2211 PetscCall(MatGetOwnershipRanges(B, &ranges)); 2212 bn = B->rmap->n; 2213 if (blda == bn) { 2214 sendbuf = (PetscScalar *)bv; 2215 } else { 2216 sendbuf = abt->buf[0]; 2217 for (k = 0, i = 0; i < cK; i++) { 2218 for (j = 0; j < bn; j++, k++) sendbuf[k] = bv[i * blda + j]; 2219 } 2220 } 2221 if (size > 1) { 2222 sendto = (rank + size - 1) % size; 2223 recvfrom = (rank + size + 1) % size; 2224 } else { 2225 sendto = recvfrom = 0; 2226 } 2227 PetscCall(PetscBLASIntCast(cK, &ck)); 2228 PetscCall(PetscBLASIntCast(c->A->rmap->n, &cm)); 2229 recvisfrom = rank; 2230 for (i = 0; i < size; i++) { 2231 /* we have finished receiving in sending, bufs can be read/modified */ 2232 PetscInt nextrecvisfrom = (recvisfrom + 1) % size; /* which process the next recvbuf will originate on */ 2233 PetscInt nextbn = ranges[nextrecvisfrom + 1] - ranges[nextrecvisfrom]; 2234 2235 if (nextrecvisfrom != rank) { 2236 /* start the cyclic sends from sendbuf, to recvbuf (which will switch to sendbuf) */ 2237 sendsiz = cK * bn; 2238 recvsiz = cK * nextbn; 2239 recvbuf = (i & 1) ? abt->buf[0] : abt->buf[1]; 2240 PetscCallMPI(MPI_Isend(sendbuf, sendsiz, MPIU_SCALAR, sendto, abt->tag, comm, &reqs[0])); 2241 PetscCallMPI(MPI_Irecv(recvbuf, recvsiz, MPIU_SCALAR, recvfrom, abt->tag, comm, &reqs[1])); 2242 } 2243 2244 /* local aseq * sendbuf^T */ 2245 PetscCall(PetscBLASIntCast(ranges[recvisfrom + 1] - ranges[recvisfrom], &cn)); 2246 if (cm && cn && ck) PetscCallBLAS("BLASgemm", BLASgemm_("N", "T", &cm, &cn, &ck, &_DOne, av, &alda, sendbuf, &cn, &_DZero, cv + clda * ranges[recvisfrom], &clda)); 2247 2248 if (nextrecvisfrom != rank) { 2249 /* wait for the sends and receives to complete, swap sendbuf and recvbuf */ 2250 PetscCallMPI(MPI_Waitall(2, reqs, MPI_STATUSES_IGNORE)); 2251 } 2252 bn = nextbn; 2253 recvisfrom = nextrecvisfrom; 2254 sendbuf = recvbuf; 2255 } 2256 PetscCall(MatDenseRestoreArrayRead(a->A, &av)); 2257 PetscCall(MatDenseRestoreArrayRead(b->A, &bv)); 2258 PetscCall(MatDenseRestoreArrayWrite(c->A, &cv)); 2259 PetscCall(MatSetOption(C, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 2260 PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY)); 2261 PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY)); 2262 PetscFunctionReturn(PETSC_SUCCESS); 2263 } 2264 2265 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(Mat A, Mat B, Mat C) 2266 { 2267 Mat_MPIDense *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data; 2268 Mat_MatTransMultDense *abt; 2269 MPI_Comm comm; 2270 PetscMPIInt size; 2271 PetscScalar *cv, *sendbuf, *recvbuf; 2272 const PetscScalar *av, *bv; 2273 PetscInt blda, i, cK = A->cmap->N, k, j, bn; 2274 PetscScalar _DOne = 1.0, _DZero = 0.0; 2275 PetscBLASInt cm, cn, ck, alda, clda; 2276 2277 PetscFunctionBegin; 2278 MatCheckProduct(C, 3); 2279 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 2280 abt = (Mat_MatTransMultDense *)C->product->data; 2281 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 2282 PetscCallMPI(MPI_Comm_size(comm, &size)); 2283 PetscCall(MatDenseGetArrayRead(a->A, &av)); 2284 PetscCall(MatDenseGetArrayRead(b->A, &bv)); 2285 PetscCall(MatDenseGetArrayWrite(c->A, &cv)); 2286 PetscCall(MatDenseGetLDA(a->A, &i)); 2287 PetscCall(PetscBLASIntCast(i, &alda)); 2288 PetscCall(MatDenseGetLDA(b->A, &blda)); 2289 PetscCall(MatDenseGetLDA(c->A, &i)); 2290 PetscCall(PetscBLASIntCast(i, &clda)); 2291 /* copy transpose of B into buf[0] */ 2292 bn = B->rmap->n; 2293 sendbuf = abt->buf[0]; 2294 recvbuf = abt->buf[1]; 2295 for (k = 0, j = 0; j < bn; j++) { 2296 for (i = 0; i < cK; i++, k++) sendbuf[k] = bv[i * blda + j]; 2297 } 2298 PetscCall(MatDenseRestoreArrayRead(b->A, &bv)); 2299 PetscCallMPI(MPI_Allgatherv(sendbuf, bn * cK, MPIU_SCALAR, recvbuf, abt->recvcounts, abt->recvdispls, MPIU_SCALAR, comm)); 2300 PetscCall(PetscBLASIntCast(cK, &ck)); 2301 PetscCall(PetscBLASIntCast(c->A->rmap->n, &cm)); 2302 PetscCall(PetscBLASIntCast(c->A->cmap->n, &cn)); 2303 if (cm && cn && ck) PetscCallBLAS("BLASgemm", BLASgemm_("N", "N", &cm, &cn, &ck, &_DOne, av, &alda, recvbuf, &ck, &_DZero, cv, &clda)); 2304 PetscCall(MatDenseRestoreArrayRead(a->A, &av)); 2305 PetscCall(MatDenseRestoreArrayRead(b->A, &bv)); 2306 PetscCall(MatDenseRestoreArrayWrite(c->A, &cv)); 2307 PetscCall(MatSetOption(C, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 2308 PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY)); 2309 PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY)); 2310 PetscFunctionReturn(PETSC_SUCCESS); 2311 } 2312 2313 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C) 2314 { 2315 Mat_MatTransMultDense *abt; 2316 2317 PetscFunctionBegin; 2318 MatCheckProduct(C, 3); 2319 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 2320 abt = (Mat_MatTransMultDense *)C->product->data; 2321 switch (abt->alg) { 2322 case 1: 2323 PetscCall(MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(A, B, C)); 2324 break; 2325 default: 2326 PetscCall(MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(A, B, C)); 2327 break; 2328 } 2329 PetscFunctionReturn(PETSC_SUCCESS); 2330 } 2331 2332 static PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(void *data) 2333 { 2334 Mat_MatMultDense *ab = (Mat_MatMultDense *)data; 2335 2336 PetscFunctionBegin; 2337 PetscCall(MatDestroy(&ab->Ce)); 2338 PetscCall(MatDestroy(&ab->Ae)); 2339 PetscCall(MatDestroy(&ab->Be)); 2340 PetscCall(PetscFree(ab)); 2341 PetscFunctionReturn(PETSC_SUCCESS); 2342 } 2343 2344 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C) 2345 { 2346 Mat_MatMultDense *ab; 2347 Mat_MPIDense *mdn = (Mat_MPIDense *)A->data; 2348 2349 PetscFunctionBegin; 2350 MatCheckProduct(C, 3); 2351 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Missing product data"); 2352 ab = (Mat_MatMultDense *)C->product->data; 2353 if (ab->Ae && ab->Ce) { 2354 #if PetscDefined(HAVE_ELEMENTAL) 2355 PetscCall(MatConvert_MPIDense_Elemental(A, MATELEMENTAL, MAT_REUSE_MATRIX, &ab->Ae)); 2356 PetscCall(MatConvert_MPIDense_Elemental(B, MATELEMENTAL, MAT_REUSE_MATRIX, &ab->Be)); 2357 PetscCall(MatMatMultNumeric_Elemental(ab->Ae, ab->Be, ab->Ce)); 2358 PetscCall(MatConvert(ab->Ce, MATMPIDENSE, MAT_REUSE_MATRIX, &C)); 2359 #else 2360 SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "PETSC_HAVE_ELEMENTAL not defined"); 2361 #endif 2362 } else { 2363 const PetscScalar *read; 2364 PetscScalar *write; 2365 PetscInt lda; 2366 2367 PetscCall(MatDenseGetLDA(B, &lda)); 2368 PetscCall(MatDenseGetArrayRead(B, &read)); 2369 PetscCall(MatDenseGetArrayWrite(ab->Be, &write)); 2370 if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A)); /* cannot be done during the symbolic phase because of possible calls to MatProductReplaceMats() */ 2371 for (PetscInt i = 0; i < C->cmap->N; ++i) { 2372 PetscCall(PetscSFBcastBegin(mdn->Mvctx, MPIU_SCALAR, read + i * lda, write + i * ab->Be->rmap->n, MPI_REPLACE)); 2373 PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, read + i * lda, write + i * ab->Be->rmap->n, MPI_REPLACE)); 2374 } 2375 PetscCall(MatDenseRestoreArrayWrite(ab->Be, &write)); 2376 PetscCall(MatDenseRestoreArrayRead(B, &read)); 2377 PetscCall(MatMatMultNumeric_SeqDense_SeqDense(((Mat_MPIDense *)A->data)->A, ab->Be, ((Mat_MPIDense *)C->data)->A)); 2378 } 2379 PetscFunctionReturn(PETSC_SUCCESS); 2380 } 2381 2382 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C) 2383 { 2384 Mat_Product *product = C->product; 2385 PetscInt alg; 2386 Mat_MatMultDense *ab; 2387 PetscBool flg; 2388 2389 PetscFunctionBegin; 2390 MatCheckProduct(C, 4); 2391 PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty"); 2392 /* check local size of A and B */ 2393 PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ", %" PetscInt_FMT ")", 2394 A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend); 2395 2396 PetscCall(PetscStrcmp(product->alg, "petsc", &flg)); 2397 alg = flg ? 0 : 1; 2398 2399 /* setup C */ 2400 PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, A->rmap->N, B->cmap->N)); 2401 PetscCall(MatSetType(C, MATMPIDENSE)); 2402 PetscCall(MatSetUp(C)); 2403 2404 /* create data structure for reuse Cdense */ 2405 PetscCall(PetscNew(&ab)); 2406 2407 switch (alg) { 2408 case 1: /* alg: "elemental" */ 2409 #if PetscDefined(HAVE_ELEMENTAL) 2410 /* create elemental matrices Ae and Be */ 2411 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &ab->Ae)); 2412 PetscCall(MatSetSizes(ab->Ae, PETSC_DECIDE, PETSC_DECIDE, A->rmap->N, A->cmap->N)); 2413 PetscCall(MatSetType(ab->Ae, MATELEMENTAL)); 2414 PetscCall(MatSetUp(ab->Ae)); 2415 PetscCall(MatSetOption(ab->Ae, MAT_ROW_ORIENTED, PETSC_FALSE)); 2416 2417 PetscCall(MatCreate(PetscObjectComm((PetscObject)B), &ab->Be)); 2418 PetscCall(MatSetSizes(ab->Be, PETSC_DECIDE, PETSC_DECIDE, B->rmap->N, B->cmap->N)); 2419 PetscCall(MatSetType(ab->Be, MATELEMENTAL)); 2420 PetscCall(MatSetUp(ab->Be)); 2421 PetscCall(MatSetOption(ab->Be, MAT_ROW_ORIENTED, PETSC_FALSE)); 2422 2423 /* compute symbolic Ce = Ae*Be */ 2424 PetscCall(MatCreate(PetscObjectComm((PetscObject)C), &ab->Ce)); 2425 PetscCall(MatMatMultSymbolic_Elemental(ab->Ae, ab->Be, fill, ab->Ce)); 2426 #else 2427 SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "PETSC_HAVE_ELEMENTAL not defined"); 2428 #endif 2429 break; 2430 default: /* alg: "petsc" */ 2431 ab->Ae = NULL; 2432 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, A->cmap->N, B->cmap->N, NULL, &ab->Be)); 2433 ab->Ce = NULL; 2434 break; 2435 } 2436 2437 C->product->data = ab; 2438 C->product->destroy = MatDestroy_MatMatMult_MPIDense_MPIDense; 2439 C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense; 2440 PetscFunctionReturn(PETSC_SUCCESS); 2441 } 2442 2443 static PetscErrorCode MatProductSetFromOptions_MPIDense_AB(Mat C) 2444 { 2445 Mat_Product *product = C->product; 2446 const char *algTypes[2] = {"petsc", "elemental"}; 2447 PetscInt alg, nalg = PetscDefined(HAVE_ELEMENTAL) ? 2 : 1; 2448 PetscBool flg = PETSC_FALSE; 2449 2450 PetscFunctionBegin; 2451 /* Set default algorithm */ 2452 alg = 0; /* default is petsc */ 2453 PetscCall(PetscStrcmp(product->alg, "default", &flg)); 2454 if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg])); 2455 2456 /* Get runtime option */ 2457 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_AB", "Mat"); 2458 PetscCall(PetscOptionsEList("-mat_product_algorithm", "Algorithmic approach", "MatProduct_AB", algTypes, nalg, algTypes[alg], &alg, &flg)); 2459 PetscOptionsEnd(); 2460 if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg])); 2461 2462 C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIDense; 2463 C->ops->productsymbolic = MatProductSymbolic_AB; 2464 PetscFunctionReturn(PETSC_SUCCESS); 2465 } 2466 2467 static PetscErrorCode MatProductSetFromOptions_MPIDense_AtB(Mat C) 2468 { 2469 Mat_Product *product = C->product; 2470 Mat A = product->A, B = product->B; 2471 2472 PetscFunctionBegin; 2473 PetscCheck(A->rmap->rstart == B->rmap->rstart && A->rmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", 2474 A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend); 2475 C->ops->transposematmultsymbolic = MatTransposeMatMultSymbolic_MPIDense_MPIDense; 2476 C->ops->productsymbolic = MatProductSymbolic_AtB; 2477 PetscFunctionReturn(PETSC_SUCCESS); 2478 } 2479 2480 static PetscErrorCode MatProductSetFromOptions_MPIDense_ABt(Mat C) 2481 { 2482 Mat_Product *product = C->product; 2483 const char *algTypes[2] = {"allgatherv", "cyclic"}; 2484 PetscInt alg, nalg = 2; 2485 PetscBool flg = PETSC_FALSE; 2486 2487 PetscFunctionBegin; 2488 /* Set default algorithm */ 2489 alg = 0; /* default is allgatherv */ 2490 PetscCall(PetscStrcmp(product->alg, "default", &flg)); 2491 if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg])); 2492 2493 /* Get runtime option */ 2494 if (product->api_user) { 2495 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatMatTransposeMult", "Mat"); 2496 PetscCall(PetscOptionsEList("-matmattransmult_mpidense_mpidense_via", "Algorithmic approach", "MatMatTransposeMult", algTypes, nalg, algTypes[alg], &alg, &flg)); 2497 PetscOptionsEnd(); 2498 } else { 2499 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_ABt", "Mat"); 2500 PetscCall(PetscOptionsEList("-mat_product_algorithm", "Algorithmic approach", "MatProduct_ABt", algTypes, nalg, algTypes[alg], &alg, &flg)); 2501 PetscOptionsEnd(); 2502 } 2503 if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg])); 2504 2505 C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_MPIDense_MPIDense; 2506 C->ops->productsymbolic = MatProductSymbolic_ABt; 2507 PetscFunctionReturn(PETSC_SUCCESS); 2508 } 2509 2510 static PetscErrorCode MatProductSetFromOptions_MPIDense(Mat C) 2511 { 2512 Mat_Product *product = C->product; 2513 2514 PetscFunctionBegin; 2515 switch (product->type) { 2516 case MATPRODUCT_AB: 2517 PetscCall(MatProductSetFromOptions_MPIDense_AB(C)); 2518 break; 2519 case MATPRODUCT_AtB: 2520 PetscCall(MatProductSetFromOptions_MPIDense_AtB(C)); 2521 break; 2522 case MATPRODUCT_ABt: 2523 PetscCall(MatProductSetFromOptions_MPIDense_ABt(C)); 2524 break; 2525 default: 2526 break; 2527 } 2528 PetscFunctionReturn(PETSC_SUCCESS); 2529 } 2530