1 /* 2 Basic functions for basic parallel dense matrices. 3 Portions of this code are under: 4 Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. 5 */ 6 7 #include <../src/mat/impls/dense/mpi/mpidense.h> /*I "petscmat.h" I*/ 8 #include <../src/mat/impls/aij/mpi/mpiaij.h> 9 #include <petscblaslapack.h> 10 11 /*@ 12 MatDenseGetLocalMatrix - For a `MATMPIDENSE` or `MATSEQDENSE` matrix returns the sequential 13 matrix that represents the operator. For sequential matrices it returns itself. 14 15 Input Parameter: 16 . A - the sequential or MPI `MATDENSE` matrix 17 18 Output Parameter: 19 . B - the inner matrix 20 21 Level: intermediate 22 23 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MATMPIDENSE`, `MATSEQDENSE` 24 @*/ 25 PetscErrorCode MatDenseGetLocalMatrix(Mat A, Mat *B) 26 { 27 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 28 PetscBool flg; 29 30 PetscFunctionBegin; 31 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 32 PetscAssertPointer(B, 2); 33 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIDENSE, &flg)); 34 if (flg) *B = mat->A; 35 else { 36 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQDENSE, &flg)); 37 PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Not for matrix type %s", ((PetscObject)A)->type_name); 38 *B = A; 39 } 40 PetscFunctionReturn(PETSC_SUCCESS); 41 } 42 43 static PetscErrorCode MatCopy_MPIDense(Mat A, Mat B, MatStructure s) 44 { 45 Mat_MPIDense *Amat = (Mat_MPIDense *)A->data; 46 Mat_MPIDense *Bmat = (Mat_MPIDense *)B->data; 47 48 PetscFunctionBegin; 49 PetscCall(MatCopy(Amat->A, Bmat->A, s)); 50 PetscFunctionReturn(PETSC_SUCCESS); 51 } 52 53 PetscErrorCode MatShift_MPIDense(Mat A, PetscScalar alpha) 54 { 55 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 56 PetscInt j, lda, rstart = A->rmap->rstart, rend = A->rmap->rend, rend2; 57 PetscScalar *v; 58 59 PetscFunctionBegin; 60 PetscCall(MatDenseGetArray(mat->A, &v)); 61 PetscCall(MatDenseGetLDA(mat->A, &lda)); 62 rend2 = PetscMin(rend, A->cmap->N); 63 if (rend2 > rstart) { 64 for (j = rstart; j < rend2; j++) v[j - rstart + j * lda] += alpha; 65 PetscCall(PetscLogFlops(rend2 - rstart)); 66 } 67 PetscCall(MatDenseRestoreArray(mat->A, &v)); 68 PetscFunctionReturn(PETSC_SUCCESS); 69 } 70 71 static PetscErrorCode MatGetRow_MPIDense(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 72 { 73 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 74 PetscInt lrow, rstart = A->rmap->rstart, rend = A->rmap->rend; 75 76 PetscFunctionBegin; 77 PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "only local rows"); 78 lrow = row - rstart; 79 PetscCall(MatGetRow(mat->A, lrow, nz, (const PetscInt **)idx, (const PetscScalar **)v)); 80 PetscFunctionReturn(PETSC_SUCCESS); 81 } 82 83 static PetscErrorCode MatRestoreRow_MPIDense(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 84 { 85 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 86 PetscInt lrow, rstart = A->rmap->rstart, rend = A->rmap->rend; 87 88 PetscFunctionBegin; 89 PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "only local rows"); 90 lrow = row - rstart; 91 PetscCall(MatRestoreRow(mat->A, lrow, nz, (const PetscInt **)idx, (const PetscScalar **)v)); 92 PetscFunctionReturn(PETSC_SUCCESS); 93 } 94 95 static PetscErrorCode MatGetDiagonalBlock_MPIDense(Mat A, Mat *a) 96 { 97 Mat_MPIDense *mdn = (Mat_MPIDense *)A->data; 98 PetscInt m = A->rmap->n, rstart = A->rmap->rstart; 99 PetscScalar *array; 100 MPI_Comm comm; 101 PetscBool flg; 102 Mat B; 103 104 PetscFunctionBegin; 105 PetscCall(MatHasCongruentLayouts(A, &flg)); 106 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only square matrices supported."); 107 PetscCall(PetscObjectQuery((PetscObject)A, "DiagonalBlock", (PetscObject *)&B)); 108 if (!B) { /* This should use MatDenseGetSubMatrix (not create), but we would need a call like MatRestoreDiagonalBlock */ 109 #if PetscDefined(HAVE_CUDA) 110 PetscCall(PetscObjectTypeCompare((PetscObject)mdn->A, MATSEQDENSECUDA, &flg)); 111 PetscCheck(!flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature", MATSEQDENSECUDA); 112 #elif PetscDefined(HAVE_HIP) 113 PetscCall(PetscObjectTypeCompare((PetscObject)mdn->A, MATSEQDENSEHIP, &flg)); 114 PetscCheck(!flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature", MATSEQDENSEHIP); 115 #endif 116 PetscCall(PetscObjectGetComm((PetscObject)mdn->A, &comm)); 117 PetscCall(MatCreate(comm, &B)); 118 PetscCall(MatSetSizes(B, m, m, m, m)); 119 PetscCall(MatSetType(B, ((PetscObject)mdn->A)->type_name)); 120 PetscCall(MatDenseGetArrayRead(mdn->A, (const PetscScalar **)&array)); 121 PetscCall(MatSeqDenseSetPreallocation(B, array + m * rstart)); 122 PetscCall(MatDenseRestoreArrayRead(mdn->A, (const PetscScalar **)&array)); 123 PetscCall(PetscObjectCompose((PetscObject)A, "DiagonalBlock", (PetscObject)B)); 124 *a = B; 125 PetscCall(MatDestroy(&B)); 126 } else *a = B; 127 PetscFunctionReturn(PETSC_SUCCESS); 128 } 129 130 static PetscErrorCode MatSetValues_MPIDense(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], const PetscScalar v[], InsertMode addv) 131 { 132 Mat_MPIDense *A = (Mat_MPIDense *)mat->data; 133 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend, row; 134 PetscBool roworiented = A->roworiented; 135 136 PetscFunctionBegin; 137 for (i = 0; i < m; i++) { 138 if (idxm[i] < 0) continue; 139 PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large"); 140 if (idxm[i] >= rstart && idxm[i] < rend) { 141 row = idxm[i] - rstart; 142 if (roworiented) { 143 PetscCall(MatSetValues(A->A, 1, &row, n, idxn, v ? v + i * n : NULL, addv)); 144 } else { 145 for (j = 0; j < n; j++) { 146 if (idxn[j] < 0) continue; 147 PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large"); 148 PetscCall(MatSetValues(A->A, 1, &row, 1, &idxn[j], v ? v + i + j * m : NULL, addv)); 149 } 150 } 151 } else if (!A->donotstash) { 152 mat->assembled = PETSC_FALSE; 153 if (roworiented) { 154 PetscCall(MatStashValuesRow_Private(&mat->stash, idxm[i], n, idxn, v ? v + i * n : NULL, PETSC_FALSE)); 155 } else { 156 PetscCall(MatStashValuesCol_Private(&mat->stash, idxm[i], n, idxn, v ? v + i : NULL, m, PETSC_FALSE)); 157 } 158 } 159 } 160 PetscFunctionReturn(PETSC_SUCCESS); 161 } 162 163 static PetscErrorCode MatGetValues_MPIDense(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[]) 164 { 165 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 166 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend, row; 167 168 PetscFunctionBegin; 169 for (i = 0; i < m; i++) { 170 if (idxm[i] < 0) continue; /* negative row */ 171 PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large"); 172 if (idxm[i] >= rstart && idxm[i] < rend) { 173 row = idxm[i] - rstart; 174 for (j = 0; j < n; j++) { 175 if (idxn[j] < 0) continue; /* negative column */ 176 PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large"); 177 PetscCall(MatGetValues(mdn->A, 1, &row, 1, &idxn[j], v + i * n + j)); 178 } 179 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported"); 180 } 181 PetscFunctionReturn(PETSC_SUCCESS); 182 } 183 184 static PetscErrorCode MatDenseGetLDA_MPIDense(Mat A, PetscInt *lda) 185 { 186 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 187 188 PetscFunctionBegin; 189 PetscCall(MatDenseGetLDA(a->A, lda)); 190 PetscFunctionReturn(PETSC_SUCCESS); 191 } 192 193 static PetscErrorCode MatDenseSetLDA_MPIDense(Mat A, PetscInt lda) 194 { 195 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 196 MatType mtype = MATSEQDENSE; 197 198 PetscFunctionBegin; 199 if (!a->A) { 200 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 201 PetscCall(PetscLayoutSetUp(A->rmap)); 202 PetscCall(PetscLayoutSetUp(A->cmap)); 203 PetscCall(MatCreate(PETSC_COMM_SELF, &a->A)); 204 PetscCall(MatSetSizes(a->A, A->rmap->n, A->cmap->N, A->rmap->n, A->cmap->N)); 205 #if PetscDefined(HAVE_CUDA) 206 PetscBool iscuda; 207 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIDENSECUDA, &iscuda)); 208 if (iscuda) mtype = MATSEQDENSECUDA; 209 #elif PetscDefined(HAVE_HIP) 210 PetscBool iship; 211 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIDENSEHIP, &iship)); 212 if (iship) mtype = MATSEQDENSEHIP; 213 #endif 214 PetscCall(MatSetType(a->A, mtype)); 215 } 216 PetscCall(MatDenseSetLDA(a->A, lda)); 217 PetscFunctionReturn(PETSC_SUCCESS); 218 } 219 220 static PetscErrorCode MatDenseGetArray_MPIDense(Mat A, PetscScalar **array) 221 { 222 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 223 224 PetscFunctionBegin; 225 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 226 PetscCall(MatDenseGetArray(a->A, array)); 227 PetscFunctionReturn(PETSC_SUCCESS); 228 } 229 230 static PetscErrorCode MatDenseGetArrayRead_MPIDense(Mat A, PetscScalar **array) 231 { 232 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 233 234 PetscFunctionBegin; 235 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 236 PetscCall(MatDenseGetArrayRead(a->A, (const PetscScalar **)array)); 237 PetscFunctionReturn(PETSC_SUCCESS); 238 } 239 240 static PetscErrorCode MatDenseGetArrayWrite_MPIDense(Mat A, PetscScalar **array) 241 { 242 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 243 244 PetscFunctionBegin; 245 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 246 PetscCall(MatDenseGetArrayWrite(a->A, array)); 247 PetscFunctionReturn(PETSC_SUCCESS); 248 } 249 250 static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A, const PetscScalar *array) 251 { 252 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 253 254 PetscFunctionBegin; 255 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 256 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 257 PetscCall(MatDensePlaceArray(a->A, array)); 258 PetscFunctionReturn(PETSC_SUCCESS); 259 } 260 261 static PetscErrorCode MatDenseResetArray_MPIDense(Mat A) 262 { 263 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 264 265 PetscFunctionBegin; 266 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 267 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 268 PetscCall(MatDenseResetArray(a->A)); 269 PetscFunctionReturn(PETSC_SUCCESS); 270 } 271 272 static PetscErrorCode MatDenseReplaceArray_MPIDense(Mat A, const PetscScalar *array) 273 { 274 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 275 276 PetscFunctionBegin; 277 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 278 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 279 PetscCall(MatDenseReplaceArray(a->A, array)); 280 PetscFunctionReturn(PETSC_SUCCESS); 281 } 282 283 static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A, IS isrow, IS iscol, MatReuse scall, Mat *B) 284 { 285 Mat_MPIDense *mat = (Mat_MPIDense *)A->data, *newmatd; 286 PetscInt lda, i, j, rstart, rend, nrows, ncols, Ncols, nlrows, nlcols; 287 const PetscInt *irow, *icol; 288 const PetscScalar *v; 289 PetscScalar *bv; 290 Mat newmat; 291 IS iscol_local; 292 MPI_Comm comm_is, comm_mat; 293 294 PetscFunctionBegin; 295 PetscCall(PetscObjectGetComm((PetscObject)A, &comm_mat)); 296 PetscCall(PetscObjectGetComm((PetscObject)iscol, &comm_is)); 297 PetscCheck(comm_mat == comm_is, PETSC_COMM_SELF, PETSC_ERR_ARG_NOTSAMECOMM, "IS communicator must match matrix communicator"); 298 299 PetscCall(ISAllGather(iscol, &iscol_local)); 300 PetscCall(ISGetIndices(isrow, &irow)); 301 PetscCall(ISGetIndices(iscol_local, &icol)); 302 PetscCall(ISGetLocalSize(isrow, &nrows)); 303 PetscCall(ISGetLocalSize(iscol, &ncols)); 304 PetscCall(ISGetSize(iscol, &Ncols)); /* global number of columns, size of iscol_local */ 305 306 /* No parallel redistribution currently supported! Should really check each index set 307 to confirm that it is OK. ... Currently supports only submatrix same partitioning as 308 original matrix! */ 309 310 PetscCall(MatGetLocalSize(A, &nlrows, &nlcols)); 311 PetscCall(MatGetOwnershipRange(A, &rstart, &rend)); 312 313 /* Check submatrix call */ 314 if (scall == MAT_REUSE_MATRIX) { 315 /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */ 316 /* Really need to test rows and column sizes! */ 317 newmat = *B; 318 } else { 319 /* Create and fill new matrix */ 320 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &newmat)); 321 PetscCall(MatSetSizes(newmat, nrows, ncols, PETSC_DECIDE, Ncols)); 322 PetscCall(MatSetType(newmat, ((PetscObject)A)->type_name)); 323 PetscCall(MatMPIDenseSetPreallocation(newmat, NULL)); 324 } 325 326 /* Now extract the data pointers and do the copy, column at a time */ 327 newmatd = (Mat_MPIDense *)newmat->data; 328 PetscCall(MatDenseGetArray(newmatd->A, &bv)); 329 PetscCall(MatDenseGetArrayRead(mat->A, &v)); 330 PetscCall(MatDenseGetLDA(mat->A, &lda)); 331 for (i = 0; i < Ncols; i++) { 332 const PetscScalar *av = v + lda * icol[i]; 333 for (j = 0; j < nrows; j++) *bv++ = av[irow[j] - rstart]; 334 } 335 PetscCall(MatDenseRestoreArrayRead(mat->A, &v)); 336 PetscCall(MatDenseRestoreArray(newmatd->A, &bv)); 337 338 /* Assemble the matrices so that the correct flags are set */ 339 PetscCall(MatAssemblyBegin(newmat, MAT_FINAL_ASSEMBLY)); 340 PetscCall(MatAssemblyEnd(newmat, MAT_FINAL_ASSEMBLY)); 341 342 /* Free work space */ 343 PetscCall(ISRestoreIndices(isrow, &irow)); 344 PetscCall(ISRestoreIndices(iscol_local, &icol)); 345 PetscCall(ISDestroy(&iscol_local)); 346 *B = newmat; 347 PetscFunctionReturn(PETSC_SUCCESS); 348 } 349 350 static PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A, PetscScalar **array) 351 { 352 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 353 354 PetscFunctionBegin; 355 PetscCall(MatDenseRestoreArray(a->A, array)); 356 PetscFunctionReturn(PETSC_SUCCESS); 357 } 358 359 static PetscErrorCode MatDenseRestoreArrayRead_MPIDense(Mat A, PetscScalar **array) 360 { 361 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 362 363 PetscFunctionBegin; 364 PetscCall(MatDenseRestoreArrayRead(a->A, (const PetscScalar **)array)); 365 PetscFunctionReturn(PETSC_SUCCESS); 366 } 367 368 static PetscErrorCode MatDenseRestoreArrayWrite_MPIDense(Mat A, PetscScalar **array) 369 { 370 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 371 372 PetscFunctionBegin; 373 PetscCall(MatDenseRestoreArrayWrite(a->A, array)); 374 PetscFunctionReturn(PETSC_SUCCESS); 375 } 376 377 static PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat, MatAssemblyType mode) 378 { 379 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 380 PetscInt nstash, reallocs; 381 382 PetscFunctionBegin; 383 if (mdn->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS); 384 385 PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range)); 386 PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs)); 387 PetscCall(PetscInfo(mdn->A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 388 PetscFunctionReturn(PETSC_SUCCESS); 389 } 390 391 static PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat, MatAssemblyType mode) 392 { 393 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 394 PetscInt i, *row, *col, flg, j, rstart, ncols; 395 PetscMPIInt n; 396 PetscScalar *val; 397 398 PetscFunctionBegin; 399 if (!mdn->donotstash && !mat->nooffprocentries) { 400 /* wait on receives */ 401 while (1) { 402 PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg)); 403 if (!flg) break; 404 405 for (i = 0; i < n;) { 406 /* Now identify the consecutive vals belonging to the same row */ 407 for (j = i, rstart = row[j]; j < n; j++) { 408 if (row[j] != rstart) break; 409 } 410 if (j < n) ncols = j - i; 411 else ncols = n - i; 412 /* Now assemble all these values with a single function call */ 413 PetscCall(MatSetValues_MPIDense(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode)); 414 i = j; 415 } 416 } 417 PetscCall(MatStashScatterEnd_Private(&mat->stash)); 418 } 419 420 PetscCall(MatAssemblyBegin(mdn->A, mode)); 421 PetscCall(MatAssemblyEnd(mdn->A, mode)); 422 PetscFunctionReturn(PETSC_SUCCESS); 423 } 424 425 static PetscErrorCode MatZeroEntries_MPIDense(Mat A) 426 { 427 Mat_MPIDense *l = (Mat_MPIDense *)A->data; 428 429 PetscFunctionBegin; 430 PetscCall(MatZeroEntries(l->A)); 431 PetscFunctionReturn(PETSC_SUCCESS); 432 } 433 434 static PetscErrorCode MatZeroRows_MPIDense(Mat A, PetscInt n, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 435 { 436 Mat_MPIDense *l = (Mat_MPIDense *)A->data; 437 PetscInt i, len, *lrows; 438 439 PetscFunctionBegin; 440 /* get locally owned rows */ 441 PetscCall(PetscLayoutMapLocal(A->rmap, n, rows, &len, &lrows, NULL)); 442 /* fix right-hand side if needed */ 443 if (x && b) { 444 const PetscScalar *xx; 445 PetscScalar *bb; 446 447 PetscCall(VecGetArrayRead(x, &xx)); 448 PetscCall(VecGetArrayWrite(b, &bb)); 449 for (i = 0; i < len; ++i) bb[lrows[i]] = diag * xx[lrows[i]]; 450 PetscCall(VecRestoreArrayRead(x, &xx)); 451 PetscCall(VecRestoreArrayWrite(b, &bb)); 452 } 453 PetscCall(MatZeroRows(l->A, len, lrows, 0.0, NULL, NULL)); 454 if (diag != 0.0) { 455 Vec d; 456 457 PetscCall(MatCreateVecs(A, NULL, &d)); 458 PetscCall(VecSet(d, diag)); 459 PetscCall(MatDiagonalSet(A, d, INSERT_VALUES)); 460 PetscCall(VecDestroy(&d)); 461 } 462 PetscCall(PetscFree(lrows)); 463 PetscFunctionReturn(PETSC_SUCCESS); 464 } 465 466 PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat, Vec, Vec); 467 PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat, Vec, Vec, Vec); 468 PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat, Vec, Vec); 469 PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat, Vec, Vec, Vec); 470 471 static PetscErrorCode MatMult_MPIDense(Mat mat, Vec xx, Vec yy) 472 { 473 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 474 const PetscScalar *ax; 475 PetscScalar *ay; 476 PetscMemType axmtype, aymtype; 477 478 PetscFunctionBegin; 479 if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat)); 480 PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype)); 481 PetscCall(VecGetArrayAndMemType(mdn->lvec, &ay, &aymtype)); 482 PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE)); 483 PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE)); 484 PetscCall(VecRestoreArrayAndMemType(mdn->lvec, &ay)); 485 PetscCall(VecRestoreArrayReadAndMemType(xx, &ax)); 486 PetscCall((*mdn->A->ops->mult)(mdn->A, mdn->lvec, yy)); 487 PetscFunctionReturn(PETSC_SUCCESS); 488 } 489 490 static PetscErrorCode MatMultAddColumnRange_MPIDense(Mat mat, Vec xx, Vec yy, Vec zz, PetscInt c_start, PetscInt c_end) 491 { 492 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 493 const PetscScalar *ax; 494 PetscScalar *ay; 495 PetscMemType axmtype, aymtype; 496 497 PetscFunctionBegin; 498 if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat)); 499 PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype)); 500 PetscCall(VecGetArrayAndMemType(mdn->lvec, &ay, &aymtype)); 501 PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE)); 502 PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE)); 503 PetscCall(VecRestoreArrayAndMemType(mdn->lvec, &ay)); 504 PetscCall(VecRestoreArrayReadAndMemType(xx, &ax)); 505 PetscUseMethod(mdn->A, "MatMultAddColumnRange_C", (Mat, Vec, Vec, Vec, PetscInt, PetscInt), (mdn->A, mdn->lvec, yy, zz, c_start, c_end)); 506 PetscFunctionReturn(PETSC_SUCCESS); 507 } 508 509 static PetscErrorCode MatMultAdd_MPIDense(Mat mat, Vec xx, Vec yy, Vec zz) 510 { 511 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 512 const PetscScalar *ax; 513 PetscScalar *ay; 514 PetscMemType axmtype, aymtype; 515 516 PetscFunctionBegin; 517 if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat)); 518 PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype)); 519 PetscCall(VecGetArrayAndMemType(mdn->lvec, &ay, &aymtype)); 520 PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE)); 521 PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE)); 522 PetscCall(VecRestoreArrayAndMemType(mdn->lvec, &ay)); 523 PetscCall(VecRestoreArrayReadAndMemType(xx, &ax)); 524 PetscCall((*mdn->A->ops->multadd)(mdn->A, mdn->lvec, yy, zz)); 525 PetscFunctionReturn(PETSC_SUCCESS); 526 } 527 528 static PetscErrorCode MatMultHermitianTransposeColumnRange_MPIDense(Mat A, Vec xx, Vec yy, PetscInt c_start, PetscInt c_end) 529 { 530 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 531 const PetscScalar *ax; 532 PetscScalar *ay; 533 PetscMemType axmtype, aymtype; 534 535 PetscFunctionBegin; 536 if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A)); 537 PetscCall(VecSet(yy, 0.0)); 538 PetscUseMethod(a->A, "MatMultHermitianTransposeColumnRange_C", (Mat, Vec, Vec, PetscInt, PetscInt), (a->A, xx, a->lvec, c_start, c_end)); 539 PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype)); 540 PetscCall(VecGetArrayAndMemType(yy, &ay, &aymtype)); 541 PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM)); 542 PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM)); 543 PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax)); 544 PetscCall(VecRestoreArrayAndMemType(yy, &ay)); 545 PetscFunctionReturn(PETSC_SUCCESS); 546 } 547 548 static PetscErrorCode MatMultTransposeKernel_MPIDense(Mat A, Vec xx, Vec yy, PetscBool herm) 549 { 550 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 551 const PetscScalar *ax; 552 PetscScalar *ay; 553 PetscMemType axmtype, aymtype; 554 555 PetscFunctionBegin; 556 if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A)); 557 PetscCall(VecSet(yy, 0.0)); 558 if (herm) PetscCall((*a->A->ops->multhermitiantranspose)(a->A, xx, a->lvec)); 559 else PetscCall((*a->A->ops->multtranspose)(a->A, xx, a->lvec)); 560 PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype)); 561 PetscCall(VecGetArrayAndMemType(yy, &ay, &aymtype)); 562 PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM)); 563 PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM)); 564 PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax)); 565 PetscCall(VecRestoreArrayAndMemType(yy, &ay)); 566 PetscFunctionReturn(PETSC_SUCCESS); 567 } 568 569 static PetscErrorCode MatMultHermitianTransposeAddColumnRange_MPIDense(Mat A, Vec xx, Vec yy, Vec zz, PetscInt c_start, PetscInt c_end) 570 { 571 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 572 const PetscScalar *ax; 573 PetscScalar *ay; 574 PetscMemType axmtype, aymtype; 575 576 PetscFunctionBegin; 577 if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A)); 578 PetscCall(VecCopy(yy, zz)); 579 PetscMPIInt rank; 580 PetscCallMPI(MPI_Comm_rank(MPI_COMM_WORLD, &rank)); 581 PetscUseMethod(a->A, "MatMultHermitianTransposeColumnRange_C", (Mat, Vec, Vec, PetscInt, PetscInt), (a->A, xx, a->lvec, c_start, c_end)); 582 PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype)); 583 PetscCall(VecGetArrayAndMemType(zz, &ay, &aymtype)); 584 PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM)); 585 PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM)); 586 PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax)); 587 PetscCall(VecRestoreArrayAndMemType(zz, &ay)); 588 PetscFunctionReturn(PETSC_SUCCESS); 589 } 590 591 static PetscErrorCode MatMultTransposeAddKernel_MPIDense(Mat A, Vec xx, Vec yy, Vec zz, PetscBool herm) 592 { 593 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 594 const PetscScalar *ax; 595 PetscScalar *ay; 596 PetscMemType axmtype, aymtype; 597 598 PetscFunctionBegin; 599 if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A)); 600 PetscCall(VecCopy(yy, zz)); 601 if (herm) PetscCall((*a->A->ops->multhermitiantranspose)(a->A, xx, a->lvec)); 602 else PetscCall((*a->A->ops->multtranspose)(a->A, xx, a->lvec)); 603 PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype)); 604 PetscCall(VecGetArrayAndMemType(zz, &ay, &aymtype)); 605 PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM)); 606 PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM)); 607 PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax)); 608 PetscCall(VecRestoreArrayAndMemType(zz, &ay)); 609 PetscFunctionReturn(PETSC_SUCCESS); 610 } 611 612 static PetscErrorCode MatMultTranspose_MPIDense(Mat A, Vec xx, Vec yy) 613 { 614 PetscFunctionBegin; 615 PetscCall(MatMultTransposeKernel_MPIDense(A, xx, yy, PETSC_FALSE)); 616 PetscFunctionReturn(PETSC_SUCCESS); 617 } 618 619 static PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A, Vec xx, Vec yy, Vec zz) 620 { 621 PetscFunctionBegin; 622 PetscCall(MatMultTransposeAddKernel_MPIDense(A, xx, yy, zz, PETSC_FALSE)); 623 PetscFunctionReturn(PETSC_SUCCESS); 624 } 625 626 static PetscErrorCode MatMultHermitianTranspose_MPIDense(Mat A, Vec xx, Vec yy) 627 { 628 PetscFunctionBegin; 629 PetscCall(MatMultTransposeKernel_MPIDense(A, xx, yy, PETSC_TRUE)); 630 PetscFunctionReturn(PETSC_SUCCESS); 631 } 632 633 static PetscErrorCode MatMultHermitianTransposeAdd_MPIDense(Mat A, Vec xx, Vec yy, Vec zz) 634 { 635 PetscFunctionBegin; 636 PetscCall(MatMultTransposeAddKernel_MPIDense(A, xx, yy, zz, PETSC_TRUE)); 637 PetscFunctionReturn(PETSC_SUCCESS); 638 } 639 640 PetscErrorCode MatGetDiagonal_MPIDense(Mat A, Vec v) 641 { 642 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 643 PetscInt lda, len, i, nl, ng, m = A->rmap->n, radd; 644 PetscScalar *x; 645 const PetscScalar *av; 646 647 PetscFunctionBegin; 648 PetscCall(VecGetArray(v, &x)); 649 PetscCall(VecGetSize(v, &ng)); 650 PetscCheck(ng == A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming mat and vec"); 651 PetscCall(VecGetLocalSize(v, &nl)); 652 len = PetscMin(a->A->rmap->n, a->A->cmap->n); 653 radd = A->rmap->rstart * m; 654 PetscCall(MatDenseGetArrayRead(a->A, &av)); 655 PetscCall(MatDenseGetLDA(a->A, &lda)); 656 for (i = 0; i < len; i++) x[i] = av[radd + i * lda + i]; 657 PetscCall(MatDenseRestoreArrayRead(a->A, &av)); 658 if (nl - i > 0) PetscCall(PetscArrayzero(x + i, nl - i)); 659 PetscCall(VecRestoreArray(v, &x)); 660 PetscFunctionReturn(PETSC_SUCCESS); 661 } 662 663 static PetscErrorCode MatDestroy_MPIDense(Mat mat) 664 { 665 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 666 667 PetscFunctionBegin; 668 PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N)); 669 PetscCall(MatStashDestroy_Private(&mat->stash)); 670 PetscCheck(!mdn->vecinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 671 PetscCheck(!mdn->matinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 672 PetscCall(MatDestroy(&mdn->A)); 673 PetscCall(VecDestroy(&mdn->lvec)); 674 PetscCall(PetscSFDestroy(&mdn->Mvctx)); 675 PetscCall(VecDestroy(&mdn->cvec)); 676 PetscCall(MatDestroy(&mdn->cmat)); 677 678 PetscCall(PetscFree(mat->data)); 679 PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL)); 680 681 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetLDA_C", NULL)); 682 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseSetLDA_C", NULL)); 683 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArray_C", NULL)); 684 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArray_C", NULL)); 685 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayRead_C", NULL)); 686 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayRead_C", NULL)); 687 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayWrite_C", NULL)); 688 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayWrite_C", NULL)); 689 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDensePlaceArray_C", NULL)); 690 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseResetArray_C", NULL)); 691 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseReplaceArray_C", NULL)); 692 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", NULL)); 693 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpiaij_C", NULL)); 694 #if defined(PETSC_HAVE_ELEMENTAL) 695 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_elemental_C", NULL)); 696 #endif 697 #if defined(PETSC_HAVE_SCALAPACK) 698 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_scalapack_C", NULL)); 699 #endif 700 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIDenseSetPreallocation_C", NULL)); 701 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidense_C", NULL)); 702 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaij_C", NULL)); 703 #if defined(PETSC_HAVE_CUDA) 704 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidense_C", NULL)); 705 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijcusparse_C", NULL)); 706 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensecuda_C", NULL)); 707 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidensecuda_mpidense_C", NULL)); 708 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidensecuda_C", NULL)); 709 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C", NULL)); 710 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensecuda_mpiaij_C", NULL)); 711 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C", NULL)); 712 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArray_C", NULL)); 713 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArrayRead_C", NULL)); 714 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArrayWrite_C", NULL)); 715 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArray_C", NULL)); 716 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArrayRead_C", NULL)); 717 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArrayWrite_C", NULL)); 718 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAPlaceArray_C", NULL)); 719 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAResetArray_C", NULL)); 720 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAReplaceArray_C", NULL)); 721 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDASetPreallocation_C", NULL)); 722 #endif 723 #if defined(PETSC_HAVE_HIP) 724 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidense_C", NULL)); 725 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijhipsparse_C", NULL)); 726 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensehip_C", NULL)); 727 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidensehip_mpidense_C", NULL)); 728 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidensehip_C", NULL)); 729 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidensehip_C", NULL)); 730 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensehip_mpiaij_C", NULL)); 731 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensehip_mpiaijhipsparse_C", NULL)); 732 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArray_C", NULL)); 733 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArrayRead_C", NULL)); 734 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArrayWrite_C", NULL)); 735 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArray_C", NULL)); 736 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArrayRead_C", NULL)); 737 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArrayWrite_C", NULL)); 738 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPPlaceArray_C", NULL)); 739 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPResetArray_C", NULL)); 740 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPReplaceArray_C", NULL)); 741 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPSetPreallocation_C", NULL)); 742 #endif 743 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumn_C", NULL)); 744 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumn_C", NULL)); 745 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", NULL)); 746 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", NULL)); 747 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", NULL)); 748 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", NULL)); 749 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", NULL)); 750 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", NULL)); 751 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetSubMatrix_C", NULL)); 752 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreSubMatrix_C", NULL)); 753 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultAddColumnRange_C", NULL)); 754 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultHermitianTransposeColumnRange_C", NULL)); 755 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultHermitianTransposeAddColumnRange_C", NULL)); 756 757 PetscCall(PetscObjectCompose((PetscObject)mat, "DiagonalBlock", NULL)); 758 PetscFunctionReturn(PETSC_SUCCESS); 759 } 760 761 #include <petscdraw.h> 762 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat, PetscViewer viewer) 763 { 764 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 765 PetscMPIInt rank; 766 PetscViewerType vtype; 767 PetscBool iascii, isdraw; 768 PetscViewer sviewer; 769 PetscViewerFormat format; 770 771 PetscFunctionBegin; 772 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 773 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 774 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 775 if (iascii) { 776 PetscCall(PetscViewerGetType(viewer, &vtype)); 777 PetscCall(PetscViewerGetFormat(viewer, &format)); 778 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 779 MatInfo info; 780 PetscCall(MatGetInfo(mat, MAT_LOCAL, &info)); 781 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 782 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %" PetscInt_FMT " \n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, 783 (PetscInt)info.memory)); 784 PetscCall(PetscViewerFlush(viewer)); 785 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 786 if (mdn->Mvctx) PetscCall(PetscSFView(mdn->Mvctx, viewer)); 787 PetscFunctionReturn(PETSC_SUCCESS); 788 } else if (format == PETSC_VIEWER_ASCII_INFO) { 789 PetscFunctionReturn(PETSC_SUCCESS); 790 } 791 } else if (isdraw) { 792 PetscDraw draw; 793 PetscBool isnull; 794 795 PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw)); 796 PetscCall(PetscDrawIsNull(draw, &isnull)); 797 if (isnull) PetscFunctionReturn(PETSC_SUCCESS); 798 } 799 800 { 801 /* assemble the entire matrix onto first processor. */ 802 Mat A; 803 PetscInt M = mat->rmap->N, N = mat->cmap->N, m, row, i, nz; 804 PetscInt *cols; 805 PetscScalar *vals; 806 807 PetscCall(MatCreate(PetscObjectComm((PetscObject)mat), &A)); 808 if (rank == 0) { 809 PetscCall(MatSetSizes(A, M, N, M, N)); 810 } else { 811 PetscCall(MatSetSizes(A, 0, 0, M, N)); 812 } 813 /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */ 814 PetscCall(MatSetType(A, MATMPIDENSE)); 815 PetscCall(MatMPIDenseSetPreallocation(A, NULL)); 816 817 /* Copy the matrix ... This isn't the most efficient means, 818 but it's quick for now */ 819 A->insertmode = INSERT_VALUES; 820 821 row = mat->rmap->rstart; 822 m = mdn->A->rmap->n; 823 for (i = 0; i < m; i++) { 824 PetscCall(MatGetRow_MPIDense(mat, row, &nz, &cols, &vals)); 825 PetscCall(MatSetValues_MPIDense(A, 1, &row, nz, cols, vals, INSERT_VALUES)); 826 PetscCall(MatRestoreRow_MPIDense(mat, row, &nz, &cols, &vals)); 827 row++; 828 } 829 830 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 831 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 832 PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 833 if (rank == 0) { 834 PetscCall(PetscObjectSetName((PetscObject)((Mat_MPIDense *)A->data)->A, ((PetscObject)mat)->name)); 835 PetscCall(MatView_SeqDense(((Mat_MPIDense *)A->data)->A, sviewer)); 836 } 837 PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 838 PetscCall(MatDestroy(&A)); 839 } 840 PetscFunctionReturn(PETSC_SUCCESS); 841 } 842 843 static PetscErrorCode MatView_MPIDense(Mat mat, PetscViewer viewer) 844 { 845 PetscBool iascii, isbinary, isdraw, issocket; 846 847 PetscFunctionBegin; 848 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 849 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 850 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket)); 851 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 852 853 if (iascii || issocket || isdraw) { 854 PetscCall(MatView_MPIDense_ASCIIorDraworSocket(mat, viewer)); 855 } else if (isbinary) PetscCall(MatView_Dense_Binary(mat, viewer)); 856 PetscFunctionReturn(PETSC_SUCCESS); 857 } 858 859 static PetscErrorCode MatGetInfo_MPIDense(Mat A, MatInfoType flag, MatInfo *info) 860 { 861 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 862 Mat mdn = mat->A; 863 PetscLogDouble isend[5], irecv[5]; 864 865 PetscFunctionBegin; 866 info->block_size = 1.0; 867 868 PetscCall(MatGetInfo(mdn, MAT_LOCAL, info)); 869 870 isend[0] = info->nz_used; 871 isend[1] = info->nz_allocated; 872 isend[2] = info->nz_unneeded; 873 isend[3] = info->memory; 874 isend[4] = info->mallocs; 875 if (flag == MAT_LOCAL) { 876 info->nz_used = isend[0]; 877 info->nz_allocated = isend[1]; 878 info->nz_unneeded = isend[2]; 879 info->memory = isend[3]; 880 info->mallocs = isend[4]; 881 } else if (flag == MAT_GLOBAL_MAX) { 882 PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)A))); 883 884 info->nz_used = irecv[0]; 885 info->nz_allocated = irecv[1]; 886 info->nz_unneeded = irecv[2]; 887 info->memory = irecv[3]; 888 info->mallocs = irecv[4]; 889 } else if (flag == MAT_GLOBAL_SUM) { 890 PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)A))); 891 892 info->nz_used = irecv[0]; 893 info->nz_allocated = irecv[1]; 894 info->nz_unneeded = irecv[2]; 895 info->memory = irecv[3]; 896 info->mallocs = irecv[4]; 897 } 898 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 899 info->fill_ratio_needed = 0; 900 info->factor_mallocs = 0; 901 PetscFunctionReturn(PETSC_SUCCESS); 902 } 903 904 static PetscErrorCode MatSetOption_MPIDense(Mat A, MatOption op, PetscBool flg) 905 { 906 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 907 908 PetscFunctionBegin; 909 switch (op) { 910 case MAT_NEW_NONZERO_LOCATIONS: 911 case MAT_NEW_NONZERO_LOCATION_ERR: 912 case MAT_NEW_NONZERO_ALLOCATION_ERR: 913 MatCheckPreallocated(A, 1); 914 PetscCall(MatSetOption(a->A, op, flg)); 915 break; 916 case MAT_ROW_ORIENTED: 917 MatCheckPreallocated(A, 1); 918 a->roworiented = flg; 919 PetscCall(MatSetOption(a->A, op, flg)); 920 break; 921 case MAT_FORCE_DIAGONAL_ENTRIES: 922 case MAT_KEEP_NONZERO_PATTERN: 923 case MAT_USE_HASH_TABLE: 924 case MAT_SORTED_FULL: 925 PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op])); 926 break; 927 case MAT_IGNORE_OFF_PROC_ENTRIES: 928 a->donotstash = flg; 929 break; 930 case MAT_SYMMETRIC: 931 case MAT_STRUCTURALLY_SYMMETRIC: 932 case MAT_HERMITIAN: 933 case MAT_SYMMETRY_ETERNAL: 934 case MAT_STRUCTURAL_SYMMETRY_ETERNAL: 935 case MAT_SPD: 936 case MAT_IGNORE_LOWER_TRIANGULAR: 937 case MAT_IGNORE_ZERO_ENTRIES: 938 case MAT_SPD_ETERNAL: 939 /* if the diagonal matrix is square it inherits some of the properties above */ 940 PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op])); 941 break; 942 default: 943 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %s", MatOptions[op]); 944 } 945 PetscFunctionReturn(PETSC_SUCCESS); 946 } 947 948 static PetscErrorCode MatDiagonalScale_MPIDense(Mat A, Vec ll, Vec rr) 949 { 950 Mat_MPIDense *mdn = (Mat_MPIDense *)A->data; 951 const PetscScalar *l; 952 PetscScalar x, *v, *vv, *r; 953 PetscInt i, j, s2a, s3a, s2, s3, m = mdn->A->rmap->n, n = mdn->A->cmap->n, lda; 954 955 PetscFunctionBegin; 956 PetscCall(MatDenseGetArray(mdn->A, &vv)); 957 PetscCall(MatDenseGetLDA(mdn->A, &lda)); 958 PetscCall(MatGetLocalSize(A, &s2, &s3)); 959 if (ll) { 960 PetscCall(VecGetLocalSize(ll, &s2a)); 961 PetscCheck(s2a == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Left scaling vector non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT, s2a, s2); 962 PetscCall(VecGetArrayRead(ll, &l)); 963 for (i = 0; i < m; i++) { 964 x = l[i]; 965 v = vv + i; 966 for (j = 0; j < n; j++) { 967 (*v) *= x; 968 v += lda; 969 } 970 } 971 PetscCall(VecRestoreArrayRead(ll, &l)); 972 PetscCall(PetscLogFlops(1.0 * n * m)); 973 } 974 if (rr) { 975 const PetscScalar *ar; 976 977 PetscCall(VecGetLocalSize(rr, &s3a)); 978 PetscCheck(s3a == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Right scaling vec non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT ".", s3a, s3); 979 PetscCall(VecGetArrayRead(rr, &ar)); 980 if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A)); 981 PetscCall(VecGetArray(mdn->lvec, &r)); 982 PetscCall(PetscSFBcastBegin(mdn->Mvctx, MPIU_SCALAR, ar, r, MPI_REPLACE)); 983 PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ar, r, MPI_REPLACE)); 984 PetscCall(VecRestoreArrayRead(rr, &ar)); 985 for (i = 0; i < n; i++) { 986 x = r[i]; 987 v = vv + i * lda; 988 for (j = 0; j < m; j++) (*v++) *= x; 989 } 990 PetscCall(VecRestoreArray(mdn->lvec, &r)); 991 PetscCall(PetscLogFlops(1.0 * n * m)); 992 } 993 PetscCall(MatDenseRestoreArray(mdn->A, &vv)); 994 PetscFunctionReturn(PETSC_SUCCESS); 995 } 996 997 static PetscErrorCode MatNorm_MPIDense(Mat A, NormType type, PetscReal *nrm) 998 { 999 Mat_MPIDense *mdn = (Mat_MPIDense *)A->data; 1000 PetscInt i, j; 1001 PetscMPIInt size; 1002 PetscReal sum = 0.0; 1003 const PetscScalar *av, *v; 1004 1005 PetscFunctionBegin; 1006 PetscCall(MatDenseGetArrayRead(mdn->A, &av)); 1007 v = av; 1008 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 1009 if (size == 1) { 1010 PetscCall(MatNorm(mdn->A, type, nrm)); 1011 } else { 1012 if (type == NORM_FROBENIUS) { 1013 for (i = 0; i < mdn->A->cmap->n * mdn->A->rmap->n; i++) { 1014 sum += PetscRealPart(PetscConj(*v) * (*v)); 1015 v++; 1016 } 1017 PetscCall(MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A))); 1018 *nrm = PetscSqrtReal(*nrm); 1019 PetscCall(PetscLogFlops(2.0 * mdn->A->cmap->n * mdn->A->rmap->n)); 1020 } else if (type == NORM_1) { 1021 PetscReal *tmp, *tmp2; 1022 PetscCall(PetscCalloc2(A->cmap->N, &tmp, A->cmap->N, &tmp2)); 1023 *nrm = 0.0; 1024 v = av; 1025 for (j = 0; j < mdn->A->cmap->n; j++) { 1026 for (i = 0; i < mdn->A->rmap->n; i++) { 1027 tmp[j] += PetscAbsScalar(*v); 1028 v++; 1029 } 1030 } 1031 PetscCall(MPIU_Allreduce(tmp, tmp2, A->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A))); 1032 for (j = 0; j < A->cmap->N; j++) { 1033 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 1034 } 1035 PetscCall(PetscFree2(tmp, tmp2)); 1036 PetscCall(PetscLogFlops(A->cmap->n * A->rmap->n)); 1037 } else if (type == NORM_INFINITY) { /* max row norm */ 1038 PetscReal ntemp; 1039 PetscCall(MatNorm(mdn->A, type, &ntemp)); 1040 PetscCall(MPIU_Allreduce(&ntemp, nrm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A))); 1041 } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "No support for two norm"); 1042 } 1043 PetscCall(MatDenseRestoreArrayRead(mdn->A, &av)); 1044 PetscFunctionReturn(PETSC_SUCCESS); 1045 } 1046 1047 static PetscErrorCode MatTranspose_MPIDense(Mat A, MatReuse reuse, Mat *matout) 1048 { 1049 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1050 Mat B; 1051 PetscInt M = A->rmap->N, N = A->cmap->N, m, n, *rwork, rstart = A->rmap->rstart; 1052 PetscInt j, i, lda; 1053 PetscScalar *v; 1054 1055 PetscFunctionBegin; 1056 if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout)); 1057 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) { 1058 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 1059 PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M)); 1060 PetscCall(MatSetType(B, ((PetscObject)A)->type_name)); 1061 PetscCall(MatMPIDenseSetPreallocation(B, NULL)); 1062 } else B = *matout; 1063 1064 m = a->A->rmap->n; 1065 n = a->A->cmap->n; 1066 PetscCall(MatDenseGetArrayRead(a->A, (const PetscScalar **)&v)); 1067 PetscCall(MatDenseGetLDA(a->A, &lda)); 1068 PetscCall(PetscMalloc1(m, &rwork)); 1069 for (i = 0; i < m; i++) rwork[i] = rstart + i; 1070 for (j = 0; j < n; j++) { 1071 PetscCall(MatSetValues(B, 1, &j, m, rwork, v, INSERT_VALUES)); 1072 v = PetscSafePointerPlusOffset(v, lda); 1073 } 1074 PetscCall(MatDenseRestoreArrayRead(a->A, (const PetscScalar **)&v)); 1075 PetscCall(PetscFree(rwork)); 1076 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 1077 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 1078 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 1079 *matout = B; 1080 } else { 1081 PetscCall(MatHeaderMerge(A, &B)); 1082 } 1083 PetscFunctionReturn(PETSC_SUCCESS); 1084 } 1085 1086 static PetscErrorCode MatDuplicate_MPIDense(Mat, MatDuplicateOption, Mat *); 1087 PETSC_INTERN PetscErrorCode MatScale_MPIDense(Mat, PetscScalar); 1088 1089 static PetscErrorCode MatSetUp_MPIDense(Mat A) 1090 { 1091 PetscFunctionBegin; 1092 PetscCall(PetscLayoutSetUp(A->rmap)); 1093 PetscCall(PetscLayoutSetUp(A->cmap)); 1094 if (!A->preallocated) PetscCall(MatMPIDenseSetPreallocation(A, NULL)); 1095 PetscFunctionReturn(PETSC_SUCCESS); 1096 } 1097 1098 static PetscErrorCode MatAXPY_MPIDense(Mat Y, PetscScalar alpha, Mat X, MatStructure str) 1099 { 1100 Mat_MPIDense *A = (Mat_MPIDense *)Y->data, *B = (Mat_MPIDense *)X->data; 1101 1102 PetscFunctionBegin; 1103 PetscCall(MatAXPY(A->A, alpha, B->A, str)); 1104 PetscFunctionReturn(PETSC_SUCCESS); 1105 } 1106 1107 static PetscErrorCode MatConjugate_MPIDense(Mat mat) 1108 { 1109 Mat_MPIDense *a = (Mat_MPIDense *)mat->data; 1110 1111 PetscFunctionBegin; 1112 PetscCall(MatConjugate(a->A)); 1113 PetscFunctionReturn(PETSC_SUCCESS); 1114 } 1115 1116 static PetscErrorCode MatRealPart_MPIDense(Mat A) 1117 { 1118 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1119 1120 PetscFunctionBegin; 1121 PetscCall(MatRealPart(a->A)); 1122 PetscFunctionReturn(PETSC_SUCCESS); 1123 } 1124 1125 static PetscErrorCode MatImaginaryPart_MPIDense(Mat A) 1126 { 1127 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1128 1129 PetscFunctionBegin; 1130 PetscCall(MatImaginaryPart(a->A)); 1131 PetscFunctionReturn(PETSC_SUCCESS); 1132 } 1133 1134 static PetscErrorCode MatGetColumnVector_MPIDense(Mat A, Vec v, PetscInt col) 1135 { 1136 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1137 1138 PetscFunctionBegin; 1139 PetscCheck(a->A, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Missing local matrix"); 1140 PetscCheck(a->A->ops->getcolumnvector, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Missing get column operation"); 1141 PetscCall((*a->A->ops->getcolumnvector)(a->A, v, col)); 1142 PetscFunctionReturn(PETSC_SUCCESS); 1143 } 1144 1145 PETSC_INTERN PetscErrorCode MatGetColumnReductions_SeqDense(Mat, PetscInt, PetscReal *); 1146 1147 static PetscErrorCode MatGetColumnReductions_MPIDense(Mat A, PetscInt type, PetscReal *reductions) 1148 { 1149 PetscInt i, m, n; 1150 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1151 PetscReal *work; 1152 1153 PetscFunctionBegin; 1154 PetscCall(MatGetSize(A, &m, &n)); 1155 PetscCall(PetscMalloc1(n, &work)); 1156 if (type == REDUCTION_MEAN_REALPART) { 1157 PetscCall(MatGetColumnReductions_SeqDense(a->A, (PetscInt)REDUCTION_SUM_REALPART, work)); 1158 } else if (type == REDUCTION_MEAN_IMAGINARYPART) { 1159 PetscCall(MatGetColumnReductions_SeqDense(a->A, (PetscInt)REDUCTION_SUM_IMAGINARYPART, work)); 1160 } else { 1161 PetscCall(MatGetColumnReductions_SeqDense(a->A, type, work)); 1162 } 1163 if (type == NORM_2) { 1164 for (i = 0; i < n; i++) work[i] *= work[i]; 1165 } 1166 if (type == NORM_INFINITY) { 1167 PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_MAX, A->hdr.comm)); 1168 } else { 1169 PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_SUM, A->hdr.comm)); 1170 } 1171 PetscCall(PetscFree(work)); 1172 if (type == NORM_2) { 1173 for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]); 1174 } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 1175 for (i = 0; i < n; i++) reductions[i] /= m; 1176 } 1177 PetscFunctionReturn(PETSC_SUCCESS); 1178 } 1179 1180 static PetscErrorCode MatSetRandom_MPIDense(Mat x, PetscRandom rctx) 1181 { 1182 Mat_MPIDense *d = (Mat_MPIDense *)x->data; 1183 1184 PetscFunctionBegin; 1185 PetscCall(MatSetRandom(d->A, rctx)); 1186 #if defined(PETSC_HAVE_DEVICE) 1187 x->offloadmask = d->A->offloadmask; 1188 #endif 1189 PetscFunctionReturn(PETSC_SUCCESS); 1190 } 1191 1192 static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A, PetscBool *missing, PetscInt *d) 1193 { 1194 PetscFunctionBegin; 1195 *missing = PETSC_FALSE; 1196 PetscFunctionReturn(PETSC_SUCCESS); 1197 } 1198 1199 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat, Mat, PetscReal, Mat); 1200 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat, Mat, Mat); 1201 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat, Mat, PetscReal, Mat); 1202 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat, Mat, Mat); 1203 static PetscErrorCode MatEqual_MPIDense(Mat, Mat, PetscBool *); 1204 static PetscErrorCode MatLoad_MPIDense(Mat, PetscViewer); 1205 static PetscErrorCode MatProductSetFromOptions_MPIDense(Mat); 1206 1207 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense, 1208 MatGetRow_MPIDense, 1209 MatRestoreRow_MPIDense, 1210 MatMult_MPIDense, 1211 /* 4*/ MatMultAdd_MPIDense, 1212 MatMultTranspose_MPIDense, 1213 MatMultTransposeAdd_MPIDense, 1214 NULL, 1215 NULL, 1216 NULL, 1217 /* 10*/ NULL, 1218 NULL, 1219 NULL, 1220 NULL, 1221 MatTranspose_MPIDense, 1222 /* 15*/ MatGetInfo_MPIDense, 1223 MatEqual_MPIDense, 1224 MatGetDiagonal_MPIDense, 1225 MatDiagonalScale_MPIDense, 1226 MatNorm_MPIDense, 1227 /* 20*/ MatAssemblyBegin_MPIDense, 1228 MatAssemblyEnd_MPIDense, 1229 MatSetOption_MPIDense, 1230 MatZeroEntries_MPIDense, 1231 /* 24*/ MatZeroRows_MPIDense, 1232 NULL, 1233 NULL, 1234 NULL, 1235 NULL, 1236 /* 29*/ MatSetUp_MPIDense, 1237 NULL, 1238 NULL, 1239 MatGetDiagonalBlock_MPIDense, 1240 NULL, 1241 /* 34*/ MatDuplicate_MPIDense, 1242 NULL, 1243 NULL, 1244 NULL, 1245 NULL, 1246 /* 39*/ MatAXPY_MPIDense, 1247 MatCreateSubMatrices_MPIDense, 1248 NULL, 1249 MatGetValues_MPIDense, 1250 MatCopy_MPIDense, 1251 /* 44*/ NULL, 1252 MatScale_MPIDense, 1253 MatShift_MPIDense, 1254 NULL, 1255 NULL, 1256 /* 49*/ MatSetRandom_MPIDense, 1257 NULL, 1258 NULL, 1259 NULL, 1260 NULL, 1261 /* 54*/ NULL, 1262 NULL, 1263 NULL, 1264 NULL, 1265 NULL, 1266 /* 59*/ MatCreateSubMatrix_MPIDense, 1267 MatDestroy_MPIDense, 1268 MatView_MPIDense, 1269 NULL, 1270 NULL, 1271 /* 64*/ NULL, 1272 NULL, 1273 NULL, 1274 NULL, 1275 NULL, 1276 /* 69*/ NULL, 1277 NULL, 1278 NULL, 1279 NULL, 1280 NULL, 1281 /* 74*/ NULL, 1282 NULL, 1283 NULL, 1284 NULL, 1285 NULL, 1286 /* 79*/ NULL, 1287 NULL, 1288 NULL, 1289 NULL, 1290 /* 83*/ MatLoad_MPIDense, 1291 NULL, 1292 NULL, 1293 NULL, 1294 NULL, 1295 NULL, 1296 /* 89*/ NULL, 1297 NULL, 1298 NULL, 1299 NULL, 1300 NULL, 1301 /* 94*/ NULL, 1302 NULL, 1303 MatMatTransposeMultSymbolic_MPIDense_MPIDense, 1304 MatMatTransposeMultNumeric_MPIDense_MPIDense, 1305 NULL, 1306 /* 99*/ MatProductSetFromOptions_MPIDense, 1307 NULL, 1308 NULL, 1309 MatConjugate_MPIDense, 1310 NULL, 1311 /*104*/ NULL, 1312 MatRealPart_MPIDense, 1313 MatImaginaryPart_MPIDense, 1314 NULL, 1315 NULL, 1316 /*109*/ NULL, 1317 NULL, 1318 NULL, 1319 MatGetColumnVector_MPIDense, 1320 MatMissingDiagonal_MPIDense, 1321 /*114*/ NULL, 1322 NULL, 1323 NULL, 1324 NULL, 1325 NULL, 1326 /*119*/ NULL, 1327 NULL, 1328 MatMultHermitianTranspose_MPIDense, 1329 MatMultHermitianTransposeAdd_MPIDense, 1330 NULL, 1331 /*124*/ NULL, 1332 MatGetColumnReductions_MPIDense, 1333 NULL, 1334 NULL, 1335 NULL, 1336 /*129*/ NULL, 1337 NULL, 1338 MatTransposeMatMultSymbolic_MPIDense_MPIDense, 1339 MatTransposeMatMultNumeric_MPIDense_MPIDense, 1340 NULL, 1341 /*134*/ NULL, 1342 NULL, 1343 NULL, 1344 NULL, 1345 NULL, 1346 /*139*/ NULL, 1347 NULL, 1348 NULL, 1349 NULL, 1350 NULL, 1351 MatCreateMPIMatConcatenateSeqMat_MPIDense, 1352 /*145*/ NULL, 1353 NULL, 1354 NULL, 1355 NULL, 1356 NULL, 1357 /*150*/ NULL, 1358 NULL, 1359 NULL}; 1360 1361 static PetscErrorCode MatMPIDenseSetPreallocation_MPIDense(Mat mat, PetscScalar *data) 1362 { 1363 Mat_MPIDense *a = (Mat_MPIDense *)mat->data; 1364 MatType mtype = MATSEQDENSE; 1365 1366 PetscFunctionBegin; 1367 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1368 PetscCall(PetscLayoutSetUp(mat->rmap)); 1369 PetscCall(PetscLayoutSetUp(mat->cmap)); 1370 if (!a->A) { 1371 PetscCall(MatCreate(PETSC_COMM_SELF, &a->A)); 1372 PetscCall(MatSetSizes(a->A, mat->rmap->n, mat->cmap->N, mat->rmap->n, mat->cmap->N)); 1373 } 1374 #if defined(PETSC_HAVE_CUDA) 1375 PetscBool iscuda; 1376 PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPIDENSECUDA, &iscuda)); 1377 if (iscuda) mtype = MATSEQDENSECUDA; 1378 #endif 1379 #if defined(PETSC_HAVE_HIP) 1380 PetscBool iship; 1381 PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPIDENSEHIP, &iship)); 1382 if (iship) mtype = MATSEQDENSEHIP; 1383 #endif 1384 PetscCall(MatSetType(a->A, mtype)); 1385 PetscCall(MatSeqDenseSetPreallocation(a->A, data)); 1386 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 1387 mat->offloadmask = a->A->offloadmask; 1388 #endif 1389 mat->preallocated = PETSC_TRUE; 1390 mat->assembled = PETSC_TRUE; 1391 PetscFunctionReturn(PETSC_SUCCESS); 1392 } 1393 1394 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIDense(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 1395 { 1396 Mat B, C; 1397 1398 PetscFunctionBegin; 1399 PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &C)); 1400 PetscCall(MatConvert_SeqAIJ_SeqDense(C, MATSEQDENSE, MAT_INITIAL_MATRIX, &B)); 1401 PetscCall(MatDestroy(&C)); 1402 if (reuse == MAT_REUSE_MATRIX) { 1403 C = *newmat; 1404 } else C = NULL; 1405 PetscCall(MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A), B, A->cmap->n, !C ? MAT_INITIAL_MATRIX : MAT_REUSE_MATRIX, &C)); 1406 PetscCall(MatDestroy(&B)); 1407 if (reuse == MAT_INPLACE_MATRIX) { 1408 PetscCall(MatHeaderReplace(A, &C)); 1409 } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C; 1410 PetscFunctionReturn(PETSC_SUCCESS); 1411 } 1412 1413 static PetscErrorCode MatConvert_MPIDense_MPIAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 1414 { 1415 Mat B, C; 1416 1417 PetscFunctionBegin; 1418 PetscCall(MatDenseGetLocalMatrix(A, &C)); 1419 PetscCall(MatConvert_SeqDense_SeqAIJ(C, MATSEQAIJ, MAT_INITIAL_MATRIX, &B)); 1420 if (reuse == MAT_REUSE_MATRIX) { 1421 C = *newmat; 1422 } else C = NULL; 1423 PetscCall(MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A), B, A->cmap->n, !C ? MAT_INITIAL_MATRIX : MAT_REUSE_MATRIX, &C)); 1424 PetscCall(MatDestroy(&B)); 1425 if (reuse == MAT_INPLACE_MATRIX) { 1426 PetscCall(MatHeaderReplace(A, &C)); 1427 } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C; 1428 PetscFunctionReturn(PETSC_SUCCESS); 1429 } 1430 1431 #if defined(PETSC_HAVE_ELEMENTAL) 1432 PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 1433 { 1434 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1435 Mat mat_elemental; 1436 PetscScalar *v; 1437 PetscInt m = A->rmap->n, N = A->cmap->N, rstart = A->rmap->rstart, i, *rows, *cols, lda; 1438 1439 PetscFunctionBegin; 1440 if (reuse == MAT_REUSE_MATRIX) { 1441 mat_elemental = *newmat; 1442 PetscCall(MatZeroEntries(*newmat)); 1443 } else { 1444 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental)); 1445 PetscCall(MatSetSizes(mat_elemental, PETSC_DECIDE, PETSC_DECIDE, A->rmap->N, A->cmap->N)); 1446 PetscCall(MatSetType(mat_elemental, MATELEMENTAL)); 1447 PetscCall(MatSetUp(mat_elemental)); 1448 PetscCall(MatSetOption(mat_elemental, MAT_ROW_ORIENTED, PETSC_FALSE)); 1449 } 1450 1451 PetscCall(PetscMalloc2(m, &rows, N, &cols)); 1452 for (i = 0; i < N; i++) cols[i] = i; 1453 for (i = 0; i < m; i++) rows[i] = rstart + i; 1454 1455 /* PETSc-Elemental interface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */ 1456 PetscCall(MatDenseGetArray(A, &v)); 1457 PetscCall(MatDenseGetLDA(a->A, &lda)); 1458 if (lda == m) PetscCall(MatSetValues(mat_elemental, m, rows, N, cols, v, ADD_VALUES)); 1459 else { 1460 for (i = 0; i < N; i++) PetscCall(MatSetValues(mat_elemental, m, rows, 1, &i, v + lda * i, ADD_VALUES)); 1461 } 1462 PetscCall(MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY)); 1463 PetscCall(MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY)); 1464 PetscCall(MatDenseRestoreArray(A, &v)); 1465 PetscCall(PetscFree2(rows, cols)); 1466 1467 if (reuse == MAT_INPLACE_MATRIX) { 1468 PetscCall(MatHeaderReplace(A, &mat_elemental)); 1469 } else { 1470 *newmat = mat_elemental; 1471 } 1472 PetscFunctionReturn(PETSC_SUCCESS); 1473 } 1474 #endif 1475 1476 static PetscErrorCode MatDenseGetColumn_MPIDense(Mat A, PetscInt col, PetscScalar **vals) 1477 { 1478 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 1479 1480 PetscFunctionBegin; 1481 PetscCall(MatDenseGetColumn(mat->A, col, vals)); 1482 PetscFunctionReturn(PETSC_SUCCESS); 1483 } 1484 1485 static PetscErrorCode MatDenseRestoreColumn_MPIDense(Mat A, PetscScalar **vals) 1486 { 1487 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 1488 1489 PetscFunctionBegin; 1490 PetscCall(MatDenseRestoreColumn(mat->A, vals)); 1491 PetscFunctionReturn(PETSC_SUCCESS); 1492 } 1493 1494 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIDense(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat) 1495 { 1496 Mat_MPIDense *mat; 1497 PetscInt m, nloc, N; 1498 1499 PetscFunctionBegin; 1500 PetscCall(MatGetSize(inmat, &m, &N)); 1501 PetscCall(MatGetLocalSize(inmat, NULL, &nloc)); 1502 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 1503 PetscInt sum; 1504 1505 if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnership(comm, &n, &N)); 1506 /* Check sum(n) = N */ 1507 PetscCall(MPIU_Allreduce(&n, &sum, 1, MPIU_INT, MPI_SUM, comm)); 1508 PetscCheck(sum == N, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT, sum, N); 1509 1510 PetscCall(MatCreateDense(comm, m, n, PETSC_DETERMINE, N, NULL, outmat)); 1511 PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 1512 } 1513 1514 /* numeric phase */ 1515 mat = (Mat_MPIDense *)(*outmat)->data; 1516 PetscCall(MatCopy(inmat, mat->A, SAME_NONZERO_PATTERN)); 1517 PetscFunctionReturn(PETSC_SUCCESS); 1518 } 1519 1520 PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat A, PetscInt col, Vec *v) 1521 { 1522 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1523 PetscInt lda; 1524 1525 PetscFunctionBegin; 1526 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1527 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1528 if (!a->cvec) PetscCall(MatDenseCreateColumnVec_Private(A, &a->cvec)); 1529 a->vecinuse = col + 1; 1530 PetscCall(MatDenseGetLDA(a->A, &lda)); 1531 PetscCall(MatDenseGetArray(a->A, (PetscScalar **)&a->ptrinuse)); 1532 PetscCall(VecPlaceArray(a->cvec, PetscSafePointerPlusOffset(a->ptrinuse, (size_t)col * (size_t)lda))); 1533 *v = a->cvec; 1534 PetscFunctionReturn(PETSC_SUCCESS); 1535 } 1536 1537 PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat A, PetscInt col, Vec *v) 1538 { 1539 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1540 1541 PetscFunctionBegin; 1542 PetscCheck(a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first"); 1543 PetscCheck(a->cvec, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing internal column vector"); 1544 a->vecinuse = 0; 1545 PetscCall(MatDenseRestoreArray(a->A, (PetscScalar **)&a->ptrinuse)); 1546 PetscCall(VecResetArray(a->cvec)); 1547 if (v) *v = NULL; 1548 PetscFunctionReturn(PETSC_SUCCESS); 1549 } 1550 1551 PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat A, PetscInt col, Vec *v) 1552 { 1553 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1554 PetscInt lda; 1555 1556 PetscFunctionBegin; 1557 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1558 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1559 if (!a->cvec) PetscCall(MatDenseCreateColumnVec_Private(A, &a->cvec)); 1560 a->vecinuse = col + 1; 1561 PetscCall(MatDenseGetLDA(a->A, &lda)); 1562 PetscCall(MatDenseGetArrayRead(a->A, &a->ptrinuse)); 1563 PetscCall(VecPlaceArray(a->cvec, PetscSafePointerPlusOffset(a->ptrinuse, (size_t)col * (size_t)lda))); 1564 PetscCall(VecLockReadPush(a->cvec)); 1565 *v = a->cvec; 1566 PetscFunctionReturn(PETSC_SUCCESS); 1567 } 1568 1569 PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat A, PetscInt col, Vec *v) 1570 { 1571 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1572 1573 PetscFunctionBegin; 1574 PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first"); 1575 PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector"); 1576 a->vecinuse = 0; 1577 PetscCall(MatDenseRestoreArrayRead(a->A, &a->ptrinuse)); 1578 PetscCall(VecLockReadPop(a->cvec)); 1579 PetscCall(VecResetArray(a->cvec)); 1580 if (v) *v = NULL; 1581 PetscFunctionReturn(PETSC_SUCCESS); 1582 } 1583 1584 PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat A, PetscInt col, Vec *v) 1585 { 1586 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1587 PetscInt lda; 1588 1589 PetscFunctionBegin; 1590 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1591 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1592 if (!a->cvec) PetscCall(MatDenseCreateColumnVec_Private(A, &a->cvec)); 1593 a->vecinuse = col + 1; 1594 PetscCall(MatDenseGetLDA(a->A, &lda)); 1595 PetscCall(MatDenseGetArrayWrite(a->A, (PetscScalar **)&a->ptrinuse)); 1596 PetscCall(VecPlaceArray(a->cvec, PetscSafePointerPlusOffset(a->ptrinuse, (size_t)col * (size_t)lda))); 1597 *v = a->cvec; 1598 PetscFunctionReturn(PETSC_SUCCESS); 1599 } 1600 1601 PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat A, PetscInt col, Vec *v) 1602 { 1603 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1604 1605 PetscFunctionBegin; 1606 PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first"); 1607 PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector"); 1608 a->vecinuse = 0; 1609 PetscCall(MatDenseRestoreArrayWrite(a->A, (PetscScalar **)&a->ptrinuse)); 1610 PetscCall(VecResetArray(a->cvec)); 1611 if (v) *v = NULL; 1612 PetscFunctionReturn(PETSC_SUCCESS); 1613 } 1614 1615 static PetscErrorCode MatDenseGetSubMatrix_MPIDense(Mat A, PetscInt rbegin, PetscInt rend, PetscInt cbegin, PetscInt cend, Mat *v) 1616 { 1617 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1618 Mat_MPIDense *c; 1619 MPI_Comm comm; 1620 PetscInt pbegin, pend; 1621 1622 PetscFunctionBegin; 1623 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 1624 PetscCheck(!a->vecinuse, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1625 PetscCheck(!a->matinuse, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1626 pbegin = PetscMax(0, PetscMin(A->rmap->rend, rbegin) - A->rmap->rstart); 1627 pend = PetscMin(A->rmap->n, PetscMax(0, rend - A->rmap->rstart)); 1628 if (!a->cmat) { 1629 PetscCall(MatCreate(comm, &a->cmat)); 1630 PetscCall(MatSetType(a->cmat, ((PetscObject)A)->type_name)); 1631 if (rend - rbegin == A->rmap->N) PetscCall(PetscLayoutReference(A->rmap, &a->cmat->rmap)); 1632 else { 1633 PetscCall(PetscLayoutSetLocalSize(a->cmat->rmap, pend - pbegin)); 1634 PetscCall(PetscLayoutSetSize(a->cmat->rmap, rend - rbegin)); 1635 PetscCall(PetscLayoutSetUp(a->cmat->rmap)); 1636 } 1637 PetscCall(PetscLayoutSetSize(a->cmat->cmap, cend - cbegin)); 1638 PetscCall(PetscLayoutSetUp(a->cmat->cmap)); 1639 } else { 1640 PetscBool same = (PetscBool)(rend - rbegin == a->cmat->rmap->N); 1641 if (same && a->cmat->rmap->N != A->rmap->N) { 1642 same = (PetscBool)(pend - pbegin == a->cmat->rmap->n); 1643 PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &same, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A))); 1644 } 1645 if (!same) { 1646 PetscCall(PetscLayoutDestroy(&a->cmat->rmap)); 1647 PetscCall(PetscLayoutCreate(comm, &a->cmat->rmap)); 1648 PetscCall(PetscLayoutSetLocalSize(a->cmat->rmap, pend - pbegin)); 1649 PetscCall(PetscLayoutSetSize(a->cmat->rmap, rend - rbegin)); 1650 PetscCall(PetscLayoutSetUp(a->cmat->rmap)); 1651 } 1652 if (cend - cbegin != a->cmat->cmap->N) { 1653 PetscCall(PetscLayoutDestroy(&a->cmat->cmap)); 1654 PetscCall(PetscLayoutCreate(comm, &a->cmat->cmap)); 1655 PetscCall(PetscLayoutSetSize(a->cmat->cmap, cend - cbegin)); 1656 PetscCall(PetscLayoutSetUp(a->cmat->cmap)); 1657 } 1658 } 1659 c = (Mat_MPIDense *)a->cmat->data; 1660 PetscCheck(!c->A, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1661 PetscCall(MatDenseGetSubMatrix(a->A, pbegin, pend, cbegin, cend, &c->A)); 1662 1663 a->cmat->preallocated = PETSC_TRUE; 1664 a->cmat->assembled = PETSC_TRUE; 1665 #if defined(PETSC_HAVE_DEVICE) 1666 a->cmat->offloadmask = c->A->offloadmask; 1667 #endif 1668 a->matinuse = cbegin + 1; 1669 *v = a->cmat; 1670 PetscFunctionReturn(PETSC_SUCCESS); 1671 } 1672 1673 static PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat A, Mat *v) 1674 { 1675 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1676 Mat_MPIDense *c; 1677 1678 PetscFunctionBegin; 1679 PetscCheck(a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetSubMatrix() first"); 1680 PetscCheck(a->cmat, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal matrix"); 1681 PetscCheck(*v == a->cmat, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Not the matrix obtained from MatDenseGetSubMatrix()"); 1682 a->matinuse = 0; 1683 c = (Mat_MPIDense *)a->cmat->data; 1684 PetscCall(MatDenseRestoreSubMatrix(a->A, &c->A)); 1685 if (v) *v = NULL; 1686 #if defined(PETSC_HAVE_DEVICE) 1687 A->offloadmask = a->A->offloadmask; 1688 #endif 1689 PetscFunctionReturn(PETSC_SUCCESS); 1690 } 1691 1692 /*MC 1693 MATMPIDENSE - MATMPIDENSE = "mpidense" - A matrix type to be used for distributed dense matrices. 1694 1695 Options Database Key: 1696 . -mat_type mpidense - sets the matrix type to `MATMPIDENSE` during a call to `MatSetFromOptions()` 1697 1698 Level: beginner 1699 1700 .seealso: [](ch_matrices), `Mat`, `MatCreateDense()`, `MATSEQDENSE`, `MATDENSE` 1701 M*/ 1702 PetscErrorCode MatCreate_MPIDense(Mat mat) 1703 { 1704 Mat_MPIDense *a; 1705 1706 PetscFunctionBegin; 1707 PetscCall(PetscNew(&a)); 1708 mat->data = (void *)a; 1709 mat->ops[0] = MatOps_Values; 1710 1711 mat->insertmode = NOT_SET_VALUES; 1712 1713 /* build cache for off array entries formed */ 1714 a->donotstash = PETSC_FALSE; 1715 1716 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)mat), 1, &mat->stash)); 1717 1718 /* stuff used for matrix vector multiply */ 1719 a->lvec = NULL; 1720 a->Mvctx = NULL; 1721 a->roworiented = PETSC_TRUE; 1722 1723 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetLDA_C", MatDenseGetLDA_MPIDense)); 1724 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseSetLDA_C", MatDenseSetLDA_MPIDense)); 1725 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArray_C", MatDenseGetArray_MPIDense)); 1726 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArray_C", MatDenseRestoreArray_MPIDense)); 1727 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayRead_C", MatDenseGetArrayRead_MPIDense)); 1728 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayRead_C", MatDenseRestoreArrayRead_MPIDense)); 1729 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayWrite_C", MatDenseGetArrayWrite_MPIDense)); 1730 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayWrite_C", MatDenseRestoreArrayWrite_MPIDense)); 1731 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDensePlaceArray_C", MatDensePlaceArray_MPIDense)); 1732 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseResetArray_C", MatDenseResetArray_MPIDense)); 1733 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseReplaceArray_C", MatDenseReplaceArray_MPIDense)); 1734 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", MatDenseGetColumnVec_MPIDense)); 1735 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", MatDenseRestoreColumnVec_MPIDense)); 1736 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", MatDenseGetColumnVecRead_MPIDense)); 1737 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", MatDenseRestoreColumnVecRead_MPIDense)); 1738 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", MatDenseGetColumnVecWrite_MPIDense)); 1739 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", MatDenseRestoreColumnVecWrite_MPIDense)); 1740 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetSubMatrix_C", MatDenseGetSubMatrix_MPIDense)); 1741 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreSubMatrix_C", MatDenseRestoreSubMatrix_MPIDense)); 1742 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", MatConvert_MPIAIJ_MPIDense)); 1743 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpiaij_C", MatConvert_MPIDense_MPIAIJ)); 1744 #if defined(PETSC_HAVE_ELEMENTAL) 1745 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_elemental_C", MatConvert_MPIDense_Elemental)); 1746 #endif 1747 #if defined(PETSC_HAVE_SCALAPACK) 1748 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_scalapack_C", MatConvert_Dense_ScaLAPACK)); 1749 #endif 1750 #if defined(PETSC_HAVE_CUDA) 1751 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensecuda_C", MatConvert_MPIDense_MPIDenseCUDA)); 1752 #endif 1753 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIDenseSetPreallocation_C", MatMPIDenseSetPreallocation_MPIDense)); 1754 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense)); 1755 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaij_C", MatProductSetFromOptions_MPIDense_MPIAIJ)); 1756 #if defined(PETSC_HAVE_CUDA) 1757 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense)); 1758 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijcusparse_C", MatProductSetFromOptions_MPIDense_MPIAIJ)); 1759 #endif 1760 #if defined(PETSC_HAVE_HIP) 1761 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensehip_C", MatConvert_MPIDense_MPIDenseHIP)); 1762 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense)); 1763 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijhipsparse_C", MatProductSetFromOptions_MPIDense_MPIAIJ)); 1764 #endif 1765 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumn_C", MatDenseGetColumn_MPIDense)); 1766 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumn_C", MatDenseRestoreColumn_MPIDense)); 1767 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultAddColumnRange_C", MatMultAddColumnRange_MPIDense)); 1768 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultHermitianTransposeColumnRange_C", MatMultHermitianTransposeColumnRange_MPIDense)); 1769 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultHermitianTransposeAddColumnRange_C", MatMultHermitianTransposeAddColumnRange_MPIDense)); 1770 PetscCall(PetscObjectChangeTypeName((PetscObject)mat, MATMPIDENSE)); 1771 PetscFunctionReturn(PETSC_SUCCESS); 1772 } 1773 1774 /*MC 1775 MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices. 1776 1777 This matrix type is identical to `MATSEQDENSE` when constructed with a single process communicator, 1778 and `MATMPIDENSE` otherwise. 1779 1780 Options Database Key: 1781 . -mat_type dense - sets the matrix type to `MATDENSE` during a call to `MatSetFromOptions()` 1782 1783 Level: beginner 1784 1785 .seealso: [](ch_matrices), `Mat`, `MATSEQDENSE`, `MATMPIDENSE`, `MATDENSECUDA`, `MATDENSEHIP` 1786 M*/ 1787 1788 /*@C 1789 MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries 1790 1791 Collective 1792 1793 Input Parameters: 1794 + B - the matrix 1795 - data - optional location of matrix data. Set to `NULL` for PETSc 1796 to control all matrix memory allocation. 1797 1798 Level: intermediate 1799 1800 Notes: 1801 The dense format is fully compatible with standard Fortran 1802 storage by columns. 1803 1804 The data input variable is intended primarily for Fortran programmers 1805 who wish to allocate their own matrix memory space. Most users should 1806 set `data` to `NULL`. 1807 1808 .seealso: [](ch_matrices), `Mat`, `MATMPIDENSE`, `MatCreate()`, `MatCreateSeqDense()`, `MatSetValues()` 1809 @*/ 1810 PetscErrorCode MatMPIDenseSetPreallocation(Mat B, PetscScalar *data) 1811 { 1812 PetscFunctionBegin; 1813 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 1814 PetscTryMethod(B, "MatMPIDenseSetPreallocation_C", (Mat, PetscScalar *), (B, data)); 1815 PetscFunctionReturn(PETSC_SUCCESS); 1816 } 1817 1818 /*@ 1819 MatDensePlaceArray - Allows one to replace the array in a `MATDENSE` matrix with an 1820 array provided by the user. This is useful to avoid copying an array 1821 into a matrix 1822 1823 Not Collective 1824 1825 Input Parameters: 1826 + mat - the matrix 1827 - array - the array in column major order 1828 1829 Level: developer 1830 1831 Note: 1832 You can return to the original array with a call to `MatDenseResetArray()`. The user is responsible for freeing this array; it will not be 1833 freed when the matrix is destroyed. 1834 1835 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseGetArray()`, `MatDenseResetArray()`, `VecPlaceArray()`, `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`, `VecResetArray()`, 1836 `MatDenseReplaceArray()` 1837 @*/ 1838 PetscErrorCode MatDensePlaceArray(Mat mat, const PetscScalar *array) 1839 { 1840 PetscFunctionBegin; 1841 PetscValidHeaderSpecific(mat, MAT_CLASSID, 1); 1842 PetscUseMethod(mat, "MatDensePlaceArray_C", (Mat, const PetscScalar *), (mat, array)); 1843 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 1844 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 1845 mat->offloadmask = PETSC_OFFLOAD_CPU; 1846 #endif 1847 PetscFunctionReturn(PETSC_SUCCESS); 1848 } 1849 1850 /*@ 1851 MatDenseResetArray - Resets the matrix array to that it previously had before the call to `MatDensePlaceArray()` 1852 1853 Not Collective 1854 1855 Input Parameter: 1856 . mat - the matrix 1857 1858 Level: developer 1859 1860 Note: 1861 You can only call this after a call to `MatDensePlaceArray()` 1862 1863 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseGetArray()`, `MatDensePlaceArray()`, `VecPlaceArray()`, `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`, `VecResetArray()` 1864 @*/ 1865 PetscErrorCode MatDenseResetArray(Mat mat) 1866 { 1867 PetscFunctionBegin; 1868 PetscValidHeaderSpecific(mat, MAT_CLASSID, 1); 1869 PetscUseMethod(mat, "MatDenseResetArray_C", (Mat), (mat)); 1870 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 1871 PetscFunctionReturn(PETSC_SUCCESS); 1872 } 1873 1874 /*@ 1875 MatDenseReplaceArray - Allows one to replace the array in a dense matrix with an 1876 array provided by the user. This is useful to avoid copying an array 1877 into a matrix 1878 1879 Not Collective 1880 1881 Input Parameters: 1882 + mat - the matrix 1883 - array - the array in column major order 1884 1885 Level: developer 1886 1887 Note: 1888 The memory passed in MUST be obtained with `PetscMalloc()` and CANNOT be 1889 freed by the user. It will be freed when the matrix is destroyed. 1890 1891 .seealso: [](ch_matrices), `Mat`, `MatDensePlaceArray()`, `MatDenseGetArray()`, `VecReplaceArray()` 1892 @*/ 1893 PetscErrorCode MatDenseReplaceArray(Mat mat, const PetscScalar *array) 1894 { 1895 PetscFunctionBegin; 1896 PetscValidHeaderSpecific(mat, MAT_CLASSID, 1); 1897 PetscUseMethod(mat, "MatDenseReplaceArray_C", (Mat, const PetscScalar *), (mat, array)); 1898 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 1899 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 1900 mat->offloadmask = PETSC_OFFLOAD_CPU; 1901 #endif 1902 PetscFunctionReturn(PETSC_SUCCESS); 1903 } 1904 1905 /*@C 1906 MatCreateDense - Creates a matrix in `MATDENSE` format. 1907 1908 Collective 1909 1910 Input Parameters: 1911 + comm - MPI communicator 1912 . m - number of local rows (or `PETSC_DECIDE` to have calculated if `M` is given) 1913 . n - number of local columns (or `PETSC_DECIDE` to have calculated if `N` is given) 1914 . M - number of global rows (or `PETSC_DECIDE` to have calculated if `m` is given) 1915 . N - number of global columns (or `PETSC_DECIDE` to have calculated if `n` is given) 1916 - data - optional location of matrix data. Set data to `NULL` (`PETSC_NULL_SCALAR` for Fortran users) for PETSc 1917 to control all matrix memory allocation. 1918 1919 Output Parameter: 1920 . A - the matrix 1921 1922 Level: intermediate 1923 1924 Notes: 1925 The dense format is fully compatible with standard Fortran 1926 storage by columns. 1927 1928 Although local portions of the matrix are stored in column-major 1929 order, the matrix is partitioned across MPI ranks by row. 1930 1931 The data input variable is intended primarily for Fortran programmers 1932 who wish to allocate their own matrix memory space. Most users should 1933 set `data` to `NULL` (`PETSC_NULL_SCALAR` for Fortran users). 1934 1935 The user MUST specify either the local or global matrix dimensions 1936 (possibly both). 1937 1938 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatCreate()`, `MatCreateSeqDense()`, `MatSetValues()` 1939 @*/ 1940 PetscErrorCode MatCreateDense(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscScalar *data, Mat *A) 1941 { 1942 PetscFunctionBegin; 1943 PetscCall(MatCreate(comm, A)); 1944 PetscCall(MatSetSizes(*A, m, n, M, N)); 1945 PetscCall(MatSetType(*A, MATDENSE)); 1946 PetscCall(MatSeqDenseSetPreallocation(*A, data)); 1947 PetscCall(MatMPIDenseSetPreallocation(*A, data)); 1948 PetscFunctionReturn(PETSC_SUCCESS); 1949 } 1950 1951 static PetscErrorCode MatDuplicate_MPIDense(Mat A, MatDuplicateOption cpvalues, Mat *newmat) 1952 { 1953 Mat mat; 1954 Mat_MPIDense *a, *oldmat = (Mat_MPIDense *)A->data; 1955 1956 PetscFunctionBegin; 1957 *newmat = NULL; 1958 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat)); 1959 PetscCall(MatSetSizes(mat, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 1960 PetscCall(MatSetType(mat, ((PetscObject)A)->type_name)); 1961 a = (Mat_MPIDense *)mat->data; 1962 1963 mat->factortype = A->factortype; 1964 mat->assembled = PETSC_TRUE; 1965 mat->preallocated = PETSC_TRUE; 1966 1967 mat->insertmode = NOT_SET_VALUES; 1968 a->donotstash = oldmat->donotstash; 1969 1970 PetscCall(PetscLayoutReference(A->rmap, &mat->rmap)); 1971 PetscCall(PetscLayoutReference(A->cmap, &mat->cmap)); 1972 1973 PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A)); 1974 1975 *newmat = mat; 1976 PetscFunctionReturn(PETSC_SUCCESS); 1977 } 1978 1979 static PetscErrorCode MatLoad_MPIDense(Mat newMat, PetscViewer viewer) 1980 { 1981 PetscBool isbinary; 1982 #if defined(PETSC_HAVE_HDF5) 1983 PetscBool ishdf5; 1984 #endif 1985 1986 PetscFunctionBegin; 1987 PetscValidHeaderSpecific(newMat, MAT_CLASSID, 1); 1988 PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2); 1989 /* force binary viewer to load .info file if it has not yet done so */ 1990 PetscCall(PetscViewerSetUp(viewer)); 1991 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 1992 #if defined(PETSC_HAVE_HDF5) 1993 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5)); 1994 #endif 1995 if (isbinary) { 1996 PetscCall(MatLoad_Dense_Binary(newMat, viewer)); 1997 #if defined(PETSC_HAVE_HDF5) 1998 } else if (ishdf5) { 1999 PetscCall(MatLoad_Dense_HDF5(newMat, viewer)); 2000 #endif 2001 } else SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name); 2002 PetscFunctionReturn(PETSC_SUCCESS); 2003 } 2004 2005 static PetscErrorCode MatEqual_MPIDense(Mat A, Mat B, PetscBool *flag) 2006 { 2007 Mat_MPIDense *matB = (Mat_MPIDense *)B->data, *matA = (Mat_MPIDense *)A->data; 2008 Mat a, b; 2009 2010 PetscFunctionBegin; 2011 a = matA->A; 2012 b = matB->A; 2013 PetscCall(MatEqual(a, b, flag)); 2014 PetscCall(MPIU_Allreduce(MPI_IN_PLACE, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A))); 2015 PetscFunctionReturn(PETSC_SUCCESS); 2016 } 2017 2018 static PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(void *data) 2019 { 2020 Mat_TransMatMultDense *atb = (Mat_TransMatMultDense *)data; 2021 2022 PetscFunctionBegin; 2023 PetscCall(PetscFree2(atb->sendbuf, atb->recvcounts)); 2024 PetscCall(MatDestroy(&atb->atb)); 2025 PetscCall(PetscFree(atb)); 2026 PetscFunctionReturn(PETSC_SUCCESS); 2027 } 2028 2029 static PetscErrorCode MatDestroy_MatMatTransMult_MPIDense_MPIDense(void *data) 2030 { 2031 Mat_MatTransMultDense *abt = (Mat_MatTransMultDense *)data; 2032 2033 PetscFunctionBegin; 2034 PetscCall(PetscFree2(abt->buf[0], abt->buf[1])); 2035 PetscCall(PetscFree2(abt->recvcounts, abt->recvdispls)); 2036 PetscCall(PetscFree(abt)); 2037 PetscFunctionReturn(PETSC_SUCCESS); 2038 } 2039 2040 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C) 2041 { 2042 Mat_MPIDense *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data; 2043 Mat_TransMatMultDense *atb; 2044 MPI_Comm comm; 2045 PetscMPIInt size, *recvcounts; 2046 PetscScalar *carray, *sendbuf; 2047 const PetscScalar *atbarray; 2048 PetscInt i, cN = C->cmap->N, proc, k, j, lda; 2049 const PetscInt *ranges; 2050 2051 PetscFunctionBegin; 2052 MatCheckProduct(C, 3); 2053 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 2054 atb = (Mat_TransMatMultDense *)C->product->data; 2055 recvcounts = atb->recvcounts; 2056 sendbuf = atb->sendbuf; 2057 2058 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 2059 PetscCallMPI(MPI_Comm_size(comm, &size)); 2060 2061 /* compute atbarray = aseq^T * bseq */ 2062 PetscCall(MatTransposeMatMult(a->A, b->A, atb->atb ? MAT_REUSE_MATRIX : MAT_INITIAL_MATRIX, PETSC_DEFAULT, &atb->atb)); 2063 2064 PetscCall(MatGetOwnershipRanges(C, &ranges)); 2065 2066 /* arrange atbarray into sendbuf */ 2067 PetscCall(MatDenseGetArrayRead(atb->atb, &atbarray)); 2068 PetscCall(MatDenseGetLDA(atb->atb, &lda)); 2069 for (proc = 0, k = 0; proc < size; proc++) { 2070 for (j = 0; j < cN; j++) { 2071 for (i = ranges[proc]; i < ranges[proc + 1]; i++) sendbuf[k++] = atbarray[i + j * lda]; 2072 } 2073 } 2074 PetscCall(MatDenseRestoreArrayRead(atb->atb, &atbarray)); 2075 2076 /* sum all atbarray to local values of C */ 2077 PetscCall(MatDenseGetArrayWrite(c->A, &carray)); 2078 PetscCallMPI(MPI_Reduce_scatter(sendbuf, carray, recvcounts, MPIU_SCALAR, MPIU_SUM, comm)); 2079 PetscCall(MatDenseRestoreArrayWrite(c->A, &carray)); 2080 PetscCall(MatSetOption(C, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 2081 PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY)); 2082 PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY)); 2083 PetscFunctionReturn(PETSC_SUCCESS); 2084 } 2085 2086 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C) 2087 { 2088 MPI_Comm comm; 2089 PetscMPIInt size; 2090 PetscInt cm = A->cmap->n, cM, cN = B->cmap->N; 2091 Mat_TransMatMultDense *atb; 2092 PetscBool cisdense = PETSC_FALSE; 2093 PetscInt i; 2094 const PetscInt *ranges; 2095 2096 PetscFunctionBegin; 2097 MatCheckProduct(C, 4); 2098 PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty"); 2099 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 2100 if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) { 2101 SETERRQ(comm, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend); 2102 } 2103 2104 /* create matrix product C */ 2105 PetscCall(MatSetSizes(C, cm, B->cmap->n, A->cmap->N, B->cmap->N)); 2106 #if defined(PETSC_HAVE_CUDA) 2107 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSECUDA, "")); 2108 #endif 2109 #if defined(PETSC_HAVE_HIP) 2110 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSEHIP, "")); 2111 #endif 2112 if (!cisdense) PetscCall(MatSetType(C, ((PetscObject)A)->type_name)); 2113 PetscCall(MatSetUp(C)); 2114 2115 /* create data structure for reuse C */ 2116 PetscCallMPI(MPI_Comm_size(comm, &size)); 2117 PetscCall(PetscNew(&atb)); 2118 cM = C->rmap->N; 2119 PetscCall(PetscMalloc2(cM * cN, &atb->sendbuf, size, &atb->recvcounts)); 2120 PetscCall(MatGetOwnershipRanges(C, &ranges)); 2121 for (i = 0; i < size; i++) atb->recvcounts[i] = (ranges[i + 1] - ranges[i]) * cN; 2122 2123 C->product->data = atb; 2124 C->product->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense; 2125 PetscFunctionReturn(PETSC_SUCCESS); 2126 } 2127 2128 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C) 2129 { 2130 MPI_Comm comm; 2131 PetscMPIInt i, size; 2132 PetscInt maxRows, bufsiz; 2133 PetscMPIInt tag; 2134 PetscInt alg; 2135 Mat_MatTransMultDense *abt; 2136 Mat_Product *product = C->product; 2137 PetscBool flg; 2138 2139 PetscFunctionBegin; 2140 MatCheckProduct(C, 4); 2141 PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty"); 2142 /* check local size of A and B */ 2143 PetscCheck(A->cmap->n == B->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local column dimensions are incompatible, A (%" PetscInt_FMT ") != B (%" PetscInt_FMT ")", A->cmap->n, B->cmap->n); 2144 2145 PetscCall(PetscStrcmp(product->alg, "allgatherv", &flg)); 2146 alg = flg ? 0 : 1; 2147 2148 /* setup matrix product C */ 2149 PetscCall(MatSetSizes(C, A->rmap->n, B->rmap->n, A->rmap->N, B->rmap->N)); 2150 PetscCall(MatSetType(C, MATMPIDENSE)); 2151 PetscCall(MatSetUp(C)); 2152 PetscCall(PetscObjectGetNewTag((PetscObject)C, &tag)); 2153 2154 /* create data structure for reuse C */ 2155 PetscCall(PetscObjectGetComm((PetscObject)C, &comm)); 2156 PetscCallMPI(MPI_Comm_size(comm, &size)); 2157 PetscCall(PetscNew(&abt)); 2158 abt->tag = tag; 2159 abt->alg = alg; 2160 switch (alg) { 2161 case 1: /* alg: "cyclic" */ 2162 for (maxRows = 0, i = 0; i < size; i++) maxRows = PetscMax(maxRows, (B->rmap->range[i + 1] - B->rmap->range[i])); 2163 bufsiz = A->cmap->N * maxRows; 2164 PetscCall(PetscMalloc2(bufsiz, &abt->buf[0], bufsiz, &abt->buf[1])); 2165 break; 2166 default: /* alg: "allgatherv" */ 2167 PetscCall(PetscMalloc2(B->rmap->n * B->cmap->N, &abt->buf[0], B->rmap->N * B->cmap->N, &abt->buf[1])); 2168 PetscCall(PetscMalloc2(size, &abt->recvcounts, size + 1, &abt->recvdispls)); 2169 for (i = 0; i <= size; i++) abt->recvdispls[i] = B->rmap->range[i] * A->cmap->N; 2170 for (i = 0; i < size; i++) abt->recvcounts[i] = abt->recvdispls[i + 1] - abt->recvdispls[i]; 2171 break; 2172 } 2173 2174 C->product->data = abt; 2175 C->product->destroy = MatDestroy_MatMatTransMult_MPIDense_MPIDense; 2176 C->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_MPIDense_MPIDense; 2177 PetscFunctionReturn(PETSC_SUCCESS); 2178 } 2179 2180 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(Mat A, Mat B, Mat C) 2181 { 2182 Mat_MPIDense *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data; 2183 Mat_MatTransMultDense *abt; 2184 MPI_Comm comm; 2185 PetscMPIInt rank, size, sendsiz, recvsiz, sendto, recvfrom, recvisfrom; 2186 PetscScalar *sendbuf, *recvbuf = NULL, *cv; 2187 PetscInt i, cK = A->cmap->N, k, j, bn; 2188 PetscScalar _DOne = 1.0, _DZero = 0.0; 2189 const PetscScalar *av, *bv; 2190 PetscBLASInt cm, cn, ck, alda, blda = 0, clda; 2191 MPI_Request reqs[2]; 2192 const PetscInt *ranges; 2193 2194 PetscFunctionBegin; 2195 MatCheckProduct(C, 3); 2196 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 2197 abt = (Mat_MatTransMultDense *)C->product->data; 2198 PetscCall(PetscObjectGetComm((PetscObject)C, &comm)); 2199 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 2200 PetscCallMPI(MPI_Comm_size(comm, &size)); 2201 PetscCall(MatDenseGetArrayRead(a->A, &av)); 2202 PetscCall(MatDenseGetArrayRead(b->A, &bv)); 2203 PetscCall(MatDenseGetArrayWrite(c->A, &cv)); 2204 PetscCall(MatDenseGetLDA(a->A, &i)); 2205 PetscCall(PetscBLASIntCast(i, &alda)); 2206 PetscCall(MatDenseGetLDA(b->A, &i)); 2207 PetscCall(PetscBLASIntCast(i, &blda)); 2208 PetscCall(MatDenseGetLDA(c->A, &i)); 2209 PetscCall(PetscBLASIntCast(i, &clda)); 2210 PetscCall(MatGetOwnershipRanges(B, &ranges)); 2211 bn = B->rmap->n; 2212 if (blda == bn) { 2213 sendbuf = (PetscScalar *)bv; 2214 } else { 2215 sendbuf = abt->buf[0]; 2216 for (k = 0, i = 0; i < cK; i++) { 2217 for (j = 0; j < bn; j++, k++) sendbuf[k] = bv[i * blda + j]; 2218 } 2219 } 2220 if (size > 1) { 2221 sendto = (rank + size - 1) % size; 2222 recvfrom = (rank + size + 1) % size; 2223 } else { 2224 sendto = recvfrom = 0; 2225 } 2226 PetscCall(PetscBLASIntCast(cK, &ck)); 2227 PetscCall(PetscBLASIntCast(c->A->rmap->n, &cm)); 2228 recvisfrom = rank; 2229 for (i = 0; i < size; i++) { 2230 /* we have finished receiving in sending, bufs can be read/modified */ 2231 PetscInt nextrecvisfrom = (recvisfrom + 1) % size; /* which process the next recvbuf will originate on */ 2232 PetscInt nextbn = ranges[nextrecvisfrom + 1] - ranges[nextrecvisfrom]; 2233 2234 if (nextrecvisfrom != rank) { 2235 /* start the cyclic sends from sendbuf, to recvbuf (which will switch to sendbuf) */ 2236 sendsiz = cK * bn; 2237 recvsiz = cK * nextbn; 2238 recvbuf = (i & 1) ? abt->buf[0] : abt->buf[1]; 2239 PetscCallMPI(MPI_Isend(sendbuf, sendsiz, MPIU_SCALAR, sendto, abt->tag, comm, &reqs[0])); 2240 PetscCallMPI(MPI_Irecv(recvbuf, recvsiz, MPIU_SCALAR, recvfrom, abt->tag, comm, &reqs[1])); 2241 } 2242 2243 /* local aseq * sendbuf^T */ 2244 PetscCall(PetscBLASIntCast(ranges[recvisfrom + 1] - ranges[recvisfrom], &cn)); 2245 if (cm && cn && ck) PetscCallBLAS("BLASgemm", BLASgemm_("N", "T", &cm, &cn, &ck, &_DOne, av, &alda, sendbuf, &cn, &_DZero, cv + clda * ranges[recvisfrom], &clda)); 2246 2247 if (nextrecvisfrom != rank) { 2248 /* wait for the sends and receives to complete, swap sendbuf and recvbuf */ 2249 PetscCallMPI(MPI_Waitall(2, reqs, MPI_STATUSES_IGNORE)); 2250 } 2251 bn = nextbn; 2252 recvisfrom = nextrecvisfrom; 2253 sendbuf = recvbuf; 2254 } 2255 PetscCall(MatDenseRestoreArrayRead(a->A, &av)); 2256 PetscCall(MatDenseRestoreArrayRead(b->A, &bv)); 2257 PetscCall(MatDenseRestoreArrayWrite(c->A, &cv)); 2258 PetscCall(MatSetOption(C, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 2259 PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY)); 2260 PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY)); 2261 PetscFunctionReturn(PETSC_SUCCESS); 2262 } 2263 2264 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(Mat A, Mat B, Mat C) 2265 { 2266 Mat_MPIDense *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data; 2267 Mat_MatTransMultDense *abt; 2268 MPI_Comm comm; 2269 PetscMPIInt size; 2270 PetscScalar *cv, *sendbuf, *recvbuf; 2271 const PetscScalar *av, *bv; 2272 PetscInt blda, i, cK = A->cmap->N, k, j, bn; 2273 PetscScalar _DOne = 1.0, _DZero = 0.0; 2274 PetscBLASInt cm, cn, ck, alda, clda; 2275 2276 PetscFunctionBegin; 2277 MatCheckProduct(C, 3); 2278 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 2279 abt = (Mat_MatTransMultDense *)C->product->data; 2280 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 2281 PetscCallMPI(MPI_Comm_size(comm, &size)); 2282 PetscCall(MatDenseGetArrayRead(a->A, &av)); 2283 PetscCall(MatDenseGetArrayRead(b->A, &bv)); 2284 PetscCall(MatDenseGetArrayWrite(c->A, &cv)); 2285 PetscCall(MatDenseGetLDA(a->A, &i)); 2286 PetscCall(PetscBLASIntCast(i, &alda)); 2287 PetscCall(MatDenseGetLDA(b->A, &blda)); 2288 PetscCall(MatDenseGetLDA(c->A, &i)); 2289 PetscCall(PetscBLASIntCast(i, &clda)); 2290 /* copy transpose of B into buf[0] */ 2291 bn = B->rmap->n; 2292 sendbuf = abt->buf[0]; 2293 recvbuf = abt->buf[1]; 2294 for (k = 0, j = 0; j < bn; j++) { 2295 for (i = 0; i < cK; i++, k++) sendbuf[k] = bv[i * blda + j]; 2296 } 2297 PetscCall(MatDenseRestoreArrayRead(b->A, &bv)); 2298 PetscCallMPI(MPI_Allgatherv(sendbuf, bn * cK, MPIU_SCALAR, recvbuf, abt->recvcounts, abt->recvdispls, MPIU_SCALAR, comm)); 2299 PetscCall(PetscBLASIntCast(cK, &ck)); 2300 PetscCall(PetscBLASIntCast(c->A->rmap->n, &cm)); 2301 PetscCall(PetscBLASIntCast(c->A->cmap->n, &cn)); 2302 if (cm && cn && ck) PetscCallBLAS("BLASgemm", BLASgemm_("N", "N", &cm, &cn, &ck, &_DOne, av, &alda, recvbuf, &ck, &_DZero, cv, &clda)); 2303 PetscCall(MatDenseRestoreArrayRead(a->A, &av)); 2304 PetscCall(MatDenseRestoreArrayRead(b->A, &bv)); 2305 PetscCall(MatDenseRestoreArrayWrite(c->A, &cv)); 2306 PetscCall(MatSetOption(C, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 2307 PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY)); 2308 PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY)); 2309 PetscFunctionReturn(PETSC_SUCCESS); 2310 } 2311 2312 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C) 2313 { 2314 Mat_MatTransMultDense *abt; 2315 2316 PetscFunctionBegin; 2317 MatCheckProduct(C, 3); 2318 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 2319 abt = (Mat_MatTransMultDense *)C->product->data; 2320 switch (abt->alg) { 2321 case 1: 2322 PetscCall(MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(A, B, C)); 2323 break; 2324 default: 2325 PetscCall(MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(A, B, C)); 2326 break; 2327 } 2328 PetscFunctionReturn(PETSC_SUCCESS); 2329 } 2330 2331 static PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(void *data) 2332 { 2333 Mat_MatMultDense *ab = (Mat_MatMultDense *)data; 2334 2335 PetscFunctionBegin; 2336 PetscCall(MatDestroy(&ab->Ce)); 2337 PetscCall(MatDestroy(&ab->Ae)); 2338 PetscCall(MatDestroy(&ab->Be)); 2339 PetscCall(PetscFree(ab)); 2340 PetscFunctionReturn(PETSC_SUCCESS); 2341 } 2342 2343 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C) 2344 { 2345 Mat_MatMultDense *ab; 2346 Mat_MPIDense *mdn = (Mat_MPIDense *)A->data; 2347 2348 PetscFunctionBegin; 2349 MatCheckProduct(C, 3); 2350 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Missing product data"); 2351 ab = (Mat_MatMultDense *)C->product->data; 2352 if (ab->Ae && ab->Ce) { 2353 #if PetscDefined(HAVE_ELEMENTAL) 2354 PetscCall(MatConvert_MPIDense_Elemental(A, MATELEMENTAL, MAT_REUSE_MATRIX, &ab->Ae)); 2355 PetscCall(MatConvert_MPIDense_Elemental(B, MATELEMENTAL, MAT_REUSE_MATRIX, &ab->Be)); 2356 PetscCall(MatMatMultNumeric_Elemental(ab->Ae, ab->Be, ab->Ce)); 2357 PetscCall(MatConvert(ab->Ce, MATMPIDENSE, MAT_REUSE_MATRIX, &C)); 2358 #else 2359 SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "PETSC_HAVE_ELEMENTAL not defined"); 2360 #endif 2361 } else { 2362 const PetscScalar *read; 2363 PetscScalar *write; 2364 PetscInt lda; 2365 2366 PetscCall(MatDenseGetLDA(B, &lda)); 2367 PetscCall(MatDenseGetArrayRead(B, &read)); 2368 PetscCall(MatDenseGetArrayWrite(ab->Be, &write)); 2369 if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A)); /* cannot be done during the symbolic phase because of possible calls to MatProductReplaceMats() */ 2370 for (PetscInt i = 0; i < C->cmap->N; ++i) { 2371 PetscCall(PetscSFBcastBegin(mdn->Mvctx, MPIU_SCALAR, read + i * lda, write + i * ab->Be->rmap->n, MPI_REPLACE)); 2372 PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, read + i * lda, write + i * ab->Be->rmap->n, MPI_REPLACE)); 2373 } 2374 PetscCall(MatDenseRestoreArrayWrite(ab->Be, &write)); 2375 PetscCall(MatDenseRestoreArrayRead(B, &read)); 2376 PetscCall(MatMatMultNumeric_SeqDense_SeqDense(((Mat_MPIDense *)A->data)->A, ab->Be, ((Mat_MPIDense *)C->data)->A)); 2377 } 2378 PetscFunctionReturn(PETSC_SUCCESS); 2379 } 2380 2381 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C) 2382 { 2383 Mat_Product *product = C->product; 2384 PetscInt alg; 2385 Mat_MatMultDense *ab; 2386 PetscBool flg; 2387 2388 PetscFunctionBegin; 2389 MatCheckProduct(C, 4); 2390 PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty"); 2391 /* check local size of A and B */ 2392 PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ", %" PetscInt_FMT ")", 2393 A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend); 2394 2395 PetscCall(PetscStrcmp(product->alg, "petsc", &flg)); 2396 alg = flg ? 0 : 1; 2397 2398 /* setup C */ 2399 PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, A->rmap->N, B->cmap->N)); 2400 PetscCall(MatSetType(C, MATMPIDENSE)); 2401 PetscCall(MatSetUp(C)); 2402 2403 /* create data structure for reuse Cdense */ 2404 PetscCall(PetscNew(&ab)); 2405 2406 switch (alg) { 2407 case 1: /* alg: "elemental" */ 2408 #if PetscDefined(HAVE_ELEMENTAL) 2409 /* create elemental matrices Ae and Be */ 2410 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &ab->Ae)); 2411 PetscCall(MatSetSizes(ab->Ae, PETSC_DECIDE, PETSC_DECIDE, A->rmap->N, A->cmap->N)); 2412 PetscCall(MatSetType(ab->Ae, MATELEMENTAL)); 2413 PetscCall(MatSetUp(ab->Ae)); 2414 PetscCall(MatSetOption(ab->Ae, MAT_ROW_ORIENTED, PETSC_FALSE)); 2415 2416 PetscCall(MatCreate(PetscObjectComm((PetscObject)B), &ab->Be)); 2417 PetscCall(MatSetSizes(ab->Be, PETSC_DECIDE, PETSC_DECIDE, B->rmap->N, B->cmap->N)); 2418 PetscCall(MatSetType(ab->Be, MATELEMENTAL)); 2419 PetscCall(MatSetUp(ab->Be)); 2420 PetscCall(MatSetOption(ab->Be, MAT_ROW_ORIENTED, PETSC_FALSE)); 2421 2422 /* compute symbolic Ce = Ae*Be */ 2423 PetscCall(MatCreate(PetscObjectComm((PetscObject)C), &ab->Ce)); 2424 PetscCall(MatMatMultSymbolic_Elemental(ab->Ae, ab->Be, fill, ab->Ce)); 2425 #else 2426 SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "PETSC_HAVE_ELEMENTAL not defined"); 2427 #endif 2428 break; 2429 default: /* alg: "petsc" */ 2430 ab->Ae = NULL; 2431 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, A->cmap->N, B->cmap->N, NULL, &ab->Be)); 2432 ab->Ce = NULL; 2433 break; 2434 } 2435 2436 C->product->data = ab; 2437 C->product->destroy = MatDestroy_MatMatMult_MPIDense_MPIDense; 2438 C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense; 2439 PetscFunctionReturn(PETSC_SUCCESS); 2440 } 2441 2442 static PetscErrorCode MatProductSetFromOptions_MPIDense_AB(Mat C) 2443 { 2444 Mat_Product *product = C->product; 2445 const char *algTypes[2] = {"petsc", "elemental"}; 2446 PetscInt alg, nalg = PetscDefined(HAVE_ELEMENTAL) ? 2 : 1; 2447 PetscBool flg = PETSC_FALSE; 2448 2449 PetscFunctionBegin; 2450 /* Set default algorithm */ 2451 alg = 0; /* default is petsc */ 2452 PetscCall(PetscStrcmp(product->alg, "default", &flg)); 2453 if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg])); 2454 2455 /* Get runtime option */ 2456 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_AB", "Mat"); 2457 PetscCall(PetscOptionsEList("-mat_product_algorithm", "Algorithmic approach", "MatProduct_AB", algTypes, nalg, algTypes[alg], &alg, &flg)); 2458 PetscOptionsEnd(); 2459 if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg])); 2460 2461 C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIDense; 2462 C->ops->productsymbolic = MatProductSymbolic_AB; 2463 PetscFunctionReturn(PETSC_SUCCESS); 2464 } 2465 2466 static PetscErrorCode MatProductSetFromOptions_MPIDense_AtB(Mat C) 2467 { 2468 Mat_Product *product = C->product; 2469 Mat A = product->A, B = product->B; 2470 2471 PetscFunctionBegin; 2472 PetscCheck(A->rmap->rstart == B->rmap->rstart && A->rmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", 2473 A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend); 2474 C->ops->transposematmultsymbolic = MatTransposeMatMultSymbolic_MPIDense_MPIDense; 2475 C->ops->productsymbolic = MatProductSymbolic_AtB; 2476 PetscFunctionReturn(PETSC_SUCCESS); 2477 } 2478 2479 static PetscErrorCode MatProductSetFromOptions_MPIDense_ABt(Mat C) 2480 { 2481 Mat_Product *product = C->product; 2482 const char *algTypes[2] = {"allgatherv", "cyclic"}; 2483 PetscInt alg, nalg = 2; 2484 PetscBool flg = PETSC_FALSE; 2485 2486 PetscFunctionBegin; 2487 /* Set default algorithm */ 2488 alg = 0; /* default is allgatherv */ 2489 PetscCall(PetscStrcmp(product->alg, "default", &flg)); 2490 if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg])); 2491 2492 /* Get runtime option */ 2493 if (product->api_user) { 2494 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatMatTransposeMult", "Mat"); 2495 PetscCall(PetscOptionsEList("-matmattransmult_mpidense_mpidense_via", "Algorithmic approach", "MatMatTransposeMult", algTypes, nalg, algTypes[alg], &alg, &flg)); 2496 PetscOptionsEnd(); 2497 } else { 2498 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_ABt", "Mat"); 2499 PetscCall(PetscOptionsEList("-mat_product_algorithm", "Algorithmic approach", "MatProduct_ABt", algTypes, nalg, algTypes[alg], &alg, &flg)); 2500 PetscOptionsEnd(); 2501 } 2502 if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg])); 2503 2504 C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_MPIDense_MPIDense; 2505 C->ops->productsymbolic = MatProductSymbolic_ABt; 2506 PetscFunctionReturn(PETSC_SUCCESS); 2507 } 2508 2509 static PetscErrorCode MatProductSetFromOptions_MPIDense(Mat C) 2510 { 2511 Mat_Product *product = C->product; 2512 2513 PetscFunctionBegin; 2514 switch (product->type) { 2515 case MATPRODUCT_AB: 2516 PetscCall(MatProductSetFromOptions_MPIDense_AB(C)); 2517 break; 2518 case MATPRODUCT_AtB: 2519 PetscCall(MatProductSetFromOptions_MPIDense_AtB(C)); 2520 break; 2521 case MATPRODUCT_ABt: 2522 PetscCall(MatProductSetFromOptions_MPIDense_ABt(C)); 2523 break; 2524 default: 2525 break; 2526 } 2527 PetscFunctionReturn(PETSC_SUCCESS); 2528 } 2529