1 /* 2 Basic functions for basic parallel dense matrices. 3 Portions of this code are under: 4 Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. 5 */ 6 7 #include <../src/mat/impls/dense/mpi/mpidense.h> /*I "petscmat.h" I*/ 8 #include <../src/mat/impls/aij/mpi/mpiaij.h> 9 #include <petscblaslapack.h> 10 #include <petsc/private/vecimpl.h> 11 12 /*@ 13 MatDenseGetLocalMatrix - For a `MATMPIDENSE` or `MATSEQDENSE` matrix returns the sequential 14 matrix that represents the operator. For sequential matrices it returns itself. 15 16 Input Parameter: 17 . A - the sequential or MPI `MATDENSE` matrix 18 19 Output Parameter: 20 . B - the inner matrix 21 22 Level: intermediate 23 24 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MATMPIDENSE`, `MATSEQDENSE` 25 @*/ 26 PetscErrorCode MatDenseGetLocalMatrix(Mat A, Mat *B) 27 { 28 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 29 PetscBool flg; 30 31 PetscFunctionBegin; 32 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 33 PetscAssertPointer(B, 2); 34 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIDENSE, &flg)); 35 if (flg) *B = mat->A; 36 else { 37 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQDENSE, &flg)); 38 PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Not for matrix type %s", ((PetscObject)A)->type_name); 39 *B = A; 40 } 41 PetscFunctionReturn(PETSC_SUCCESS); 42 } 43 44 static PetscErrorCode MatCopy_MPIDense(Mat A, Mat B, MatStructure s) 45 { 46 Mat_MPIDense *Amat = (Mat_MPIDense *)A->data; 47 Mat_MPIDense *Bmat = (Mat_MPIDense *)B->data; 48 49 PetscFunctionBegin; 50 PetscCall(MatCopy(Amat->A, Bmat->A, s)); 51 PetscFunctionReturn(PETSC_SUCCESS); 52 } 53 54 PetscErrorCode MatShift_MPIDense(Mat A, PetscScalar alpha) 55 { 56 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 57 PetscInt j, lda, rstart = A->rmap->rstart, rend = A->rmap->rend, rend2; 58 PetscScalar *v; 59 60 PetscFunctionBegin; 61 PetscCall(MatDenseGetArray(mat->A, &v)); 62 PetscCall(MatDenseGetLDA(mat->A, &lda)); 63 rend2 = PetscMin(rend, A->cmap->N); 64 if (rend2 > rstart) { 65 for (j = rstart; j < rend2; j++) v[j - rstart + j * lda] += alpha; 66 PetscCall(PetscLogFlops(rend2 - rstart)); 67 } 68 PetscCall(MatDenseRestoreArray(mat->A, &v)); 69 PetscFunctionReturn(PETSC_SUCCESS); 70 } 71 72 static PetscErrorCode MatGetRow_MPIDense(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 73 { 74 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 75 PetscInt lrow, rstart = A->rmap->rstart, rend = A->rmap->rend; 76 77 PetscFunctionBegin; 78 PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "only local rows"); 79 lrow = row - rstart; 80 PetscCall(MatGetRow(mat->A, lrow, nz, (const PetscInt **)idx, (const PetscScalar **)v)); 81 PetscFunctionReturn(PETSC_SUCCESS); 82 } 83 84 static PetscErrorCode MatRestoreRow_MPIDense(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 85 { 86 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 87 PetscInt lrow, rstart = A->rmap->rstart, rend = A->rmap->rend; 88 89 PetscFunctionBegin; 90 PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "only local rows"); 91 lrow = row - rstart; 92 PetscCall(MatRestoreRow(mat->A, lrow, nz, (const PetscInt **)idx, (const PetscScalar **)v)); 93 PetscFunctionReturn(PETSC_SUCCESS); 94 } 95 96 static PetscErrorCode MatGetDiagonalBlock_MPIDense(Mat A, Mat *a) 97 { 98 Mat_MPIDense *mdn = (Mat_MPIDense *)A->data; 99 PetscInt m = A->rmap->n, rstart = A->rmap->rstart; 100 PetscScalar *array; 101 MPI_Comm comm; 102 PetscBool flg; 103 Mat B; 104 105 PetscFunctionBegin; 106 PetscCall(MatHasCongruentLayouts(A, &flg)); 107 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only square matrices supported."); 108 PetscCall(PetscObjectQuery((PetscObject)A, "DiagonalBlock", (PetscObject *)&B)); 109 if (!B) { /* This should use MatDenseGetSubMatrix (not create), but we would need a call like MatRestoreDiagonalBlock */ 110 #if PetscDefined(HAVE_CUDA) 111 PetscCall(PetscObjectTypeCompare((PetscObject)mdn->A, MATSEQDENSECUDA, &flg)); 112 PetscCheck(!flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature", MATSEQDENSECUDA); 113 #elif PetscDefined(HAVE_HIP) 114 PetscCall(PetscObjectTypeCompare((PetscObject)mdn->A, MATSEQDENSEHIP, &flg)); 115 PetscCheck(!flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature", MATSEQDENSEHIP); 116 #endif 117 PetscCall(PetscObjectGetComm((PetscObject)mdn->A, &comm)); 118 PetscCall(MatCreate(comm, &B)); 119 PetscCall(MatSetSizes(B, m, m, m, m)); 120 PetscCall(MatSetType(B, ((PetscObject)mdn->A)->type_name)); 121 PetscCall(MatDenseGetArrayRead(mdn->A, (const PetscScalar **)&array)); 122 PetscCall(MatSeqDenseSetPreallocation(B, array + m * rstart)); 123 PetscCall(MatDenseRestoreArrayRead(mdn->A, (const PetscScalar **)&array)); 124 PetscCall(PetscObjectCompose((PetscObject)A, "DiagonalBlock", (PetscObject)B)); 125 *a = B; 126 PetscCall(MatDestroy(&B)); 127 } else *a = B; 128 PetscFunctionReturn(PETSC_SUCCESS); 129 } 130 131 static PetscErrorCode MatSetValues_MPIDense(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], const PetscScalar v[], InsertMode addv) 132 { 133 Mat_MPIDense *A = (Mat_MPIDense *)mat->data; 134 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend, row; 135 PetscBool roworiented = A->roworiented; 136 137 PetscFunctionBegin; 138 for (i = 0; i < m; i++) { 139 if (idxm[i] < 0) continue; 140 PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large"); 141 if (idxm[i] >= rstart && idxm[i] < rend) { 142 row = idxm[i] - rstart; 143 if (roworiented) { 144 PetscCall(MatSetValues(A->A, 1, &row, n, idxn, PetscSafePointerPlusOffset(v, i * n), addv)); 145 } else { 146 for (j = 0; j < n; j++) { 147 if (idxn[j] < 0) continue; 148 PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large"); 149 PetscCall(MatSetValues(A->A, 1, &row, 1, &idxn[j], PetscSafePointerPlusOffset(v, i + j * m), addv)); 150 } 151 } 152 } else if (!A->donotstash) { 153 mat->assembled = PETSC_FALSE; 154 if (roworiented) { 155 PetscCall(MatStashValuesRow_Private(&mat->stash, idxm[i], n, idxn, PetscSafePointerPlusOffset(v, i * n), PETSC_FALSE)); 156 } else { 157 PetscCall(MatStashValuesCol_Private(&mat->stash, idxm[i], n, idxn, PetscSafePointerPlusOffset(v, i), m, PETSC_FALSE)); 158 } 159 } 160 } 161 PetscFunctionReturn(PETSC_SUCCESS); 162 } 163 164 static PetscErrorCode MatGetValues_MPIDense(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[]) 165 { 166 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 167 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend, row; 168 169 PetscFunctionBegin; 170 for (i = 0; i < m; i++) { 171 if (idxm[i] < 0) continue; /* negative row */ 172 PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large"); 173 if (idxm[i] >= rstart && idxm[i] < rend) { 174 row = idxm[i] - rstart; 175 for (j = 0; j < n; j++) { 176 if (idxn[j] < 0) continue; /* negative column */ 177 PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large"); 178 PetscCall(MatGetValues(mdn->A, 1, &row, 1, &idxn[j], v + i * n + j)); 179 } 180 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported"); 181 } 182 PetscFunctionReturn(PETSC_SUCCESS); 183 } 184 185 static PetscErrorCode MatDenseGetLDA_MPIDense(Mat A, PetscInt *lda) 186 { 187 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 188 189 PetscFunctionBegin; 190 PetscCall(MatDenseGetLDA(a->A, lda)); 191 PetscFunctionReturn(PETSC_SUCCESS); 192 } 193 194 static PetscErrorCode MatDenseSetLDA_MPIDense(Mat A, PetscInt lda) 195 { 196 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 197 MatType mtype = MATSEQDENSE; 198 199 PetscFunctionBegin; 200 if (!a->A) { 201 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 202 PetscCall(PetscLayoutSetUp(A->rmap)); 203 PetscCall(PetscLayoutSetUp(A->cmap)); 204 PetscCall(MatCreate(PETSC_COMM_SELF, &a->A)); 205 PetscCall(MatSetSizes(a->A, A->rmap->n, A->cmap->N, A->rmap->n, A->cmap->N)); 206 #if PetscDefined(HAVE_CUDA) 207 PetscBool iscuda; 208 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIDENSECUDA, &iscuda)); 209 if (iscuda) mtype = MATSEQDENSECUDA; 210 #elif PetscDefined(HAVE_HIP) 211 PetscBool iship; 212 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIDENSEHIP, &iship)); 213 if (iship) mtype = MATSEQDENSEHIP; 214 #endif 215 PetscCall(MatSetType(a->A, mtype)); 216 } 217 PetscCall(MatDenseSetLDA(a->A, lda)); 218 PetscFunctionReturn(PETSC_SUCCESS); 219 } 220 221 static PetscErrorCode MatDenseGetArray_MPIDense(Mat A, PetscScalar **array) 222 { 223 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 224 225 PetscFunctionBegin; 226 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 227 PetscCall(MatDenseGetArray(a->A, array)); 228 PetscFunctionReturn(PETSC_SUCCESS); 229 } 230 231 static PetscErrorCode MatDenseGetArrayRead_MPIDense(Mat A, PetscScalar **array) 232 { 233 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 234 235 PetscFunctionBegin; 236 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 237 PetscCall(MatDenseGetArrayRead(a->A, (const PetscScalar **)array)); 238 PetscFunctionReturn(PETSC_SUCCESS); 239 } 240 241 static PetscErrorCode MatDenseGetArrayWrite_MPIDense(Mat A, PetscScalar **array) 242 { 243 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 244 245 PetscFunctionBegin; 246 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 247 PetscCall(MatDenseGetArrayWrite(a->A, array)); 248 PetscFunctionReturn(PETSC_SUCCESS); 249 } 250 251 static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A, const PetscScalar *array) 252 { 253 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 254 255 PetscFunctionBegin; 256 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 257 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 258 PetscCall(MatDensePlaceArray(a->A, array)); 259 PetscFunctionReturn(PETSC_SUCCESS); 260 } 261 262 static PetscErrorCode MatDenseResetArray_MPIDense(Mat A) 263 { 264 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 265 266 PetscFunctionBegin; 267 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 268 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 269 PetscCall(MatDenseResetArray(a->A)); 270 PetscFunctionReturn(PETSC_SUCCESS); 271 } 272 273 static PetscErrorCode MatDenseReplaceArray_MPIDense(Mat A, const PetscScalar *array) 274 { 275 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 276 277 PetscFunctionBegin; 278 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 279 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 280 PetscCall(MatDenseReplaceArray(a->A, array)); 281 PetscFunctionReturn(PETSC_SUCCESS); 282 } 283 284 static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A, IS isrow, IS iscol, MatReuse scall, Mat *B) 285 { 286 Mat_MPIDense *mat = (Mat_MPIDense *)A->data, *newmatd; 287 PetscInt lda, i, j, rstart, rend, nrows, ncols, Ncols, nlrows, nlcols; 288 const PetscInt *irow, *icol; 289 const PetscScalar *v; 290 PetscScalar *bv; 291 Mat newmat; 292 IS iscol_local; 293 MPI_Comm comm_is, comm_mat; 294 295 PetscFunctionBegin; 296 PetscCall(PetscObjectGetComm((PetscObject)A, &comm_mat)); 297 PetscCall(PetscObjectGetComm((PetscObject)iscol, &comm_is)); 298 PetscCheck(comm_mat == comm_is, PETSC_COMM_SELF, PETSC_ERR_ARG_NOTSAMECOMM, "IS communicator must match matrix communicator"); 299 300 PetscCall(ISAllGather(iscol, &iscol_local)); 301 PetscCall(ISGetIndices(isrow, &irow)); 302 PetscCall(ISGetIndices(iscol_local, &icol)); 303 PetscCall(ISGetLocalSize(isrow, &nrows)); 304 PetscCall(ISGetLocalSize(iscol, &ncols)); 305 PetscCall(ISGetSize(iscol, &Ncols)); /* global number of columns, size of iscol_local */ 306 307 /* No parallel redistribution currently supported! Should really check each index set 308 to confirm that it is OK. ... Currently supports only submatrix same partitioning as 309 original matrix! */ 310 311 PetscCall(MatGetLocalSize(A, &nlrows, &nlcols)); 312 PetscCall(MatGetOwnershipRange(A, &rstart, &rend)); 313 314 /* Check submatrix call */ 315 if (scall == MAT_REUSE_MATRIX) { 316 /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */ 317 /* Really need to test rows and column sizes! */ 318 newmat = *B; 319 } else { 320 /* Create and fill new matrix */ 321 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &newmat)); 322 PetscCall(MatSetSizes(newmat, nrows, ncols, PETSC_DECIDE, Ncols)); 323 PetscCall(MatSetType(newmat, ((PetscObject)A)->type_name)); 324 PetscCall(MatMPIDenseSetPreallocation(newmat, NULL)); 325 } 326 327 /* Now extract the data pointers and do the copy, column at a time */ 328 newmatd = (Mat_MPIDense *)newmat->data; 329 PetscCall(MatDenseGetArray(newmatd->A, &bv)); 330 PetscCall(MatDenseGetArrayRead(mat->A, &v)); 331 PetscCall(MatDenseGetLDA(mat->A, &lda)); 332 for (i = 0; i < Ncols; i++) { 333 const PetscScalar *av = v + lda * icol[i]; 334 for (j = 0; j < nrows; j++) *bv++ = av[irow[j] - rstart]; 335 } 336 PetscCall(MatDenseRestoreArrayRead(mat->A, &v)); 337 PetscCall(MatDenseRestoreArray(newmatd->A, &bv)); 338 339 /* Assemble the matrices so that the correct flags are set */ 340 PetscCall(MatAssemblyBegin(newmat, MAT_FINAL_ASSEMBLY)); 341 PetscCall(MatAssemblyEnd(newmat, MAT_FINAL_ASSEMBLY)); 342 343 /* Free work space */ 344 PetscCall(ISRestoreIndices(isrow, &irow)); 345 PetscCall(ISRestoreIndices(iscol_local, &icol)); 346 PetscCall(ISDestroy(&iscol_local)); 347 *B = newmat; 348 PetscFunctionReturn(PETSC_SUCCESS); 349 } 350 351 static PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A, PetscScalar **array) 352 { 353 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 354 355 PetscFunctionBegin; 356 PetscCall(MatDenseRestoreArray(a->A, array)); 357 PetscFunctionReturn(PETSC_SUCCESS); 358 } 359 360 static PetscErrorCode MatDenseRestoreArrayRead_MPIDense(Mat A, PetscScalar **array) 361 { 362 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 363 364 PetscFunctionBegin; 365 PetscCall(MatDenseRestoreArrayRead(a->A, (const PetscScalar **)array)); 366 PetscFunctionReturn(PETSC_SUCCESS); 367 } 368 369 static PetscErrorCode MatDenseRestoreArrayWrite_MPIDense(Mat A, PetscScalar **array) 370 { 371 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 372 373 PetscFunctionBegin; 374 PetscCall(MatDenseRestoreArrayWrite(a->A, array)); 375 PetscFunctionReturn(PETSC_SUCCESS); 376 } 377 378 static PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat, MatAssemblyType mode) 379 { 380 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 381 PetscInt nstash, reallocs; 382 383 PetscFunctionBegin; 384 if (mdn->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS); 385 386 PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range)); 387 PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs)); 388 PetscCall(PetscInfo(mdn->A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 389 PetscFunctionReturn(PETSC_SUCCESS); 390 } 391 392 static PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat, MatAssemblyType mode) 393 { 394 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 395 PetscInt i, *row, *col, flg, j, rstart, ncols; 396 PetscMPIInt n; 397 PetscScalar *val; 398 399 PetscFunctionBegin; 400 if (!mdn->donotstash && !mat->nooffprocentries) { 401 /* wait on receives */ 402 while (1) { 403 PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg)); 404 if (!flg) break; 405 406 for (i = 0; i < n;) { 407 /* Now identify the consecutive vals belonging to the same row */ 408 for (j = i, rstart = row[j]; j < n; j++) { 409 if (row[j] != rstart) break; 410 } 411 if (j < n) ncols = j - i; 412 else ncols = n - i; 413 /* Now assemble all these values with a single function call */ 414 PetscCall(MatSetValues_MPIDense(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode)); 415 i = j; 416 } 417 } 418 PetscCall(MatStashScatterEnd_Private(&mat->stash)); 419 } 420 421 PetscCall(MatAssemblyBegin(mdn->A, mode)); 422 PetscCall(MatAssemblyEnd(mdn->A, mode)); 423 PetscFunctionReturn(PETSC_SUCCESS); 424 } 425 426 static PetscErrorCode MatZeroEntries_MPIDense(Mat A) 427 { 428 Mat_MPIDense *l = (Mat_MPIDense *)A->data; 429 430 PetscFunctionBegin; 431 PetscCall(MatZeroEntries(l->A)); 432 PetscFunctionReturn(PETSC_SUCCESS); 433 } 434 435 static PetscErrorCode MatZeroRows_MPIDense(Mat A, PetscInt n, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 436 { 437 Mat_MPIDense *l = (Mat_MPIDense *)A->data; 438 PetscInt i, len, *lrows; 439 440 PetscFunctionBegin; 441 /* get locally owned rows */ 442 PetscCall(PetscLayoutMapLocal(A->rmap, n, rows, &len, &lrows, NULL)); 443 /* fix right-hand side if needed */ 444 if (x && b) { 445 const PetscScalar *xx; 446 PetscScalar *bb; 447 448 PetscCall(VecGetArrayRead(x, &xx)); 449 PetscCall(VecGetArrayWrite(b, &bb)); 450 for (i = 0; i < len; ++i) bb[lrows[i]] = diag * xx[lrows[i]]; 451 PetscCall(VecRestoreArrayRead(x, &xx)); 452 PetscCall(VecRestoreArrayWrite(b, &bb)); 453 } 454 PetscCall(MatZeroRows(l->A, len, lrows, 0.0, NULL, NULL)); 455 if (diag != 0.0) { 456 Vec d; 457 458 PetscCall(MatCreateVecs(A, NULL, &d)); 459 PetscCall(VecSet(d, diag)); 460 PetscCall(MatDiagonalSet(A, d, INSERT_VALUES)); 461 PetscCall(VecDestroy(&d)); 462 } 463 PetscCall(PetscFree(lrows)); 464 PetscFunctionReturn(PETSC_SUCCESS); 465 } 466 467 PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat, Vec, Vec); 468 PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat, Vec, Vec, Vec); 469 PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat, Vec, Vec); 470 PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat, Vec, Vec, Vec); 471 472 static PetscErrorCode MatMult_MPIDense(Mat mat, Vec xx, Vec yy) 473 { 474 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 475 const PetscScalar *ax; 476 PetscScalar *ay; 477 PetscMemType axmtype, aymtype; 478 479 PetscFunctionBegin; 480 if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat)); 481 PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype)); 482 PetscCall(VecGetArrayAndMemType(mdn->lvec, &ay, &aymtype)); 483 PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE)); 484 PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE)); 485 PetscCall(VecRestoreArrayAndMemType(mdn->lvec, &ay)); 486 PetscCall(VecRestoreArrayReadAndMemType(xx, &ax)); 487 PetscCall((*mdn->A->ops->mult)(mdn->A, mdn->lvec, yy)); 488 PetscFunctionReturn(PETSC_SUCCESS); 489 } 490 491 static PetscErrorCode MatMultAddColumnRange_MPIDense(Mat mat, Vec xx, Vec yy, Vec zz, PetscInt c_start, PetscInt c_end) 492 { 493 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 494 const PetscScalar *ax; 495 PetscScalar *ay; 496 PetscMemType axmtype, aymtype; 497 498 PetscFunctionBegin; 499 if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat)); 500 PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype)); 501 PetscCall(VecGetArrayAndMemType(mdn->lvec, &ay, &aymtype)); 502 PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE)); 503 PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE)); 504 PetscCall(VecRestoreArrayAndMemType(mdn->lvec, &ay)); 505 PetscCall(VecRestoreArrayReadAndMemType(xx, &ax)); 506 PetscUseMethod(mdn->A, "MatMultAddColumnRange_C", (Mat, Vec, Vec, Vec, PetscInt, PetscInt), (mdn->A, mdn->lvec, yy, zz, c_start, c_end)); 507 PetscFunctionReturn(PETSC_SUCCESS); 508 } 509 510 static PetscErrorCode MatMultAdd_MPIDense(Mat mat, Vec xx, Vec yy, Vec zz) 511 { 512 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 513 const PetscScalar *ax; 514 PetscScalar *ay; 515 PetscMemType axmtype, aymtype; 516 517 PetscFunctionBegin; 518 if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat)); 519 PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype)); 520 PetscCall(VecGetArrayAndMemType(mdn->lvec, &ay, &aymtype)); 521 PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE)); 522 PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE)); 523 PetscCall(VecRestoreArrayAndMemType(mdn->lvec, &ay)); 524 PetscCall(VecRestoreArrayReadAndMemType(xx, &ax)); 525 PetscCall((*mdn->A->ops->multadd)(mdn->A, mdn->lvec, yy, zz)); 526 PetscFunctionReturn(PETSC_SUCCESS); 527 } 528 529 static PetscErrorCode MatMultHermitianTransposeColumnRange_MPIDense(Mat A, Vec xx, Vec yy, PetscInt c_start, PetscInt c_end) 530 { 531 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 532 const PetscScalar *ax; 533 PetscScalar *ay; 534 PetscMemType axmtype, aymtype; 535 536 PetscFunctionBegin; 537 if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A)); 538 PetscCall(VecSet(yy, 0.0)); 539 PetscUseMethod(a->A, "MatMultHermitianTransposeColumnRange_C", (Mat, Vec, Vec, PetscInt, PetscInt), (a->A, xx, a->lvec, c_start, c_end)); 540 PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype)); 541 PetscCall(VecGetArrayAndMemType(yy, &ay, &aymtype)); 542 PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM)); 543 PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM)); 544 PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax)); 545 PetscCall(VecRestoreArrayAndMemType(yy, &ay)); 546 PetscFunctionReturn(PETSC_SUCCESS); 547 } 548 549 static PetscErrorCode MatMultTransposeKernel_MPIDense(Mat A, Vec xx, Vec yy, PetscBool herm) 550 { 551 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 552 const PetscScalar *ax; 553 PetscScalar *ay; 554 PetscMemType axmtype, aymtype; 555 556 PetscFunctionBegin; 557 if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A)); 558 PetscCall(VecSet(yy, 0.0)); 559 if (herm) PetscCall((*a->A->ops->multhermitiantranspose)(a->A, xx, a->lvec)); 560 else PetscCall((*a->A->ops->multtranspose)(a->A, xx, a->lvec)); 561 PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype)); 562 PetscCall(VecGetArrayAndMemType(yy, &ay, &aymtype)); 563 PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM)); 564 PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM)); 565 PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax)); 566 PetscCall(VecRestoreArrayAndMemType(yy, &ay)); 567 PetscFunctionReturn(PETSC_SUCCESS); 568 } 569 570 static PetscErrorCode MatMultHermitianTransposeAddColumnRange_MPIDense(Mat A, Vec xx, Vec yy, Vec zz, PetscInt c_start, PetscInt c_end) 571 { 572 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 573 const PetscScalar *ax; 574 PetscScalar *ay; 575 PetscMemType axmtype, aymtype; 576 577 PetscFunctionBegin; 578 if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A)); 579 PetscCall(VecCopy(yy, zz)); 580 PetscMPIInt rank; 581 PetscCallMPI(MPI_Comm_rank(MPI_COMM_WORLD, &rank)); 582 PetscUseMethod(a->A, "MatMultHermitianTransposeColumnRange_C", (Mat, Vec, Vec, PetscInt, PetscInt), (a->A, xx, a->lvec, c_start, c_end)); 583 PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype)); 584 PetscCall(VecGetArrayAndMemType(zz, &ay, &aymtype)); 585 PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM)); 586 PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM)); 587 PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax)); 588 PetscCall(VecRestoreArrayAndMemType(zz, &ay)); 589 PetscFunctionReturn(PETSC_SUCCESS); 590 } 591 592 static PetscErrorCode MatMultTransposeAddKernel_MPIDense(Mat A, Vec xx, Vec yy, Vec zz, PetscBool herm) 593 { 594 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 595 const PetscScalar *ax; 596 PetscScalar *ay; 597 PetscMemType axmtype, aymtype; 598 599 PetscFunctionBegin; 600 if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A)); 601 PetscCall(VecCopy(yy, zz)); 602 if (herm) PetscCall((*a->A->ops->multhermitiantranspose)(a->A, xx, a->lvec)); 603 else PetscCall((*a->A->ops->multtranspose)(a->A, xx, a->lvec)); 604 PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype)); 605 PetscCall(VecGetArrayAndMemType(zz, &ay, &aymtype)); 606 PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM)); 607 PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM)); 608 PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax)); 609 PetscCall(VecRestoreArrayAndMemType(zz, &ay)); 610 PetscFunctionReturn(PETSC_SUCCESS); 611 } 612 613 static PetscErrorCode MatMultTranspose_MPIDense(Mat A, Vec xx, Vec yy) 614 { 615 PetscFunctionBegin; 616 PetscCall(MatMultTransposeKernel_MPIDense(A, xx, yy, PETSC_FALSE)); 617 PetscFunctionReturn(PETSC_SUCCESS); 618 } 619 620 static PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A, Vec xx, Vec yy, Vec zz) 621 { 622 PetscFunctionBegin; 623 PetscCall(MatMultTransposeAddKernel_MPIDense(A, xx, yy, zz, PETSC_FALSE)); 624 PetscFunctionReturn(PETSC_SUCCESS); 625 } 626 627 static PetscErrorCode MatMultHermitianTranspose_MPIDense(Mat A, Vec xx, Vec yy) 628 { 629 PetscFunctionBegin; 630 PetscCall(MatMultTransposeKernel_MPIDense(A, xx, yy, PETSC_TRUE)); 631 PetscFunctionReturn(PETSC_SUCCESS); 632 } 633 634 static PetscErrorCode MatMultHermitianTransposeAdd_MPIDense(Mat A, Vec xx, Vec yy, Vec zz) 635 { 636 PetscFunctionBegin; 637 PetscCall(MatMultTransposeAddKernel_MPIDense(A, xx, yy, zz, PETSC_TRUE)); 638 PetscFunctionReturn(PETSC_SUCCESS); 639 } 640 641 PetscErrorCode MatGetDiagonal_MPIDense(Mat A, Vec v) 642 { 643 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 644 PetscInt lda, len, i, nl, ng, m = A->rmap->n, radd; 645 PetscScalar *x; 646 const PetscScalar *av; 647 648 PetscFunctionBegin; 649 PetscCall(VecGetArray(v, &x)); 650 PetscCall(VecGetSize(v, &ng)); 651 PetscCheck(ng == A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming mat and vec"); 652 PetscCall(VecGetLocalSize(v, &nl)); 653 len = PetscMin(a->A->rmap->n, a->A->cmap->n); 654 radd = A->rmap->rstart * m; 655 PetscCall(MatDenseGetArrayRead(a->A, &av)); 656 PetscCall(MatDenseGetLDA(a->A, &lda)); 657 for (i = 0; i < len; i++) x[i] = av[radd + i * lda + i]; 658 PetscCall(MatDenseRestoreArrayRead(a->A, &av)); 659 if (nl - i > 0) PetscCall(PetscArrayzero(x + i, nl - i)); 660 PetscCall(VecRestoreArray(v, &x)); 661 PetscFunctionReturn(PETSC_SUCCESS); 662 } 663 664 static PetscErrorCode MatDestroy_MPIDense(Mat mat) 665 { 666 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 667 668 PetscFunctionBegin; 669 PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N)); 670 PetscCall(MatStashDestroy_Private(&mat->stash)); 671 PetscCheck(!mdn->vecinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 672 PetscCheck(!mdn->matinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 673 PetscCall(MatDestroy(&mdn->A)); 674 PetscCall(VecDestroy(&mdn->lvec)); 675 PetscCall(PetscSFDestroy(&mdn->Mvctx)); 676 PetscCall(VecDestroy(&mdn->cvec)); 677 PetscCall(MatDestroy(&mdn->cmat)); 678 679 PetscCall(PetscFree(mat->data)); 680 PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL)); 681 682 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetLDA_C", NULL)); 683 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseSetLDA_C", NULL)); 684 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArray_C", NULL)); 685 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArray_C", NULL)); 686 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayRead_C", NULL)); 687 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayRead_C", NULL)); 688 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayWrite_C", NULL)); 689 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayWrite_C", NULL)); 690 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDensePlaceArray_C", NULL)); 691 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseResetArray_C", NULL)); 692 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseReplaceArray_C", NULL)); 693 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", NULL)); 694 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpiaij_C", NULL)); 695 #if defined(PETSC_HAVE_ELEMENTAL) 696 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_elemental_C", NULL)); 697 #endif 698 #if defined(PETSC_HAVE_SCALAPACK) 699 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_scalapack_C", NULL)); 700 #endif 701 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIDenseSetPreallocation_C", NULL)); 702 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidense_C", NULL)); 703 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaij_C", NULL)); 704 #if defined(PETSC_HAVE_CUDA) 705 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidense_C", NULL)); 706 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijcusparse_C", NULL)); 707 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensecuda_C", NULL)); 708 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidensecuda_mpidense_C", NULL)); 709 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidensecuda_C", NULL)); 710 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C", NULL)); 711 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensecuda_mpiaij_C", NULL)); 712 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C", NULL)); 713 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArray_C", NULL)); 714 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArrayRead_C", NULL)); 715 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArrayWrite_C", NULL)); 716 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArray_C", NULL)); 717 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArrayRead_C", NULL)); 718 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArrayWrite_C", NULL)); 719 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAPlaceArray_C", NULL)); 720 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAResetArray_C", NULL)); 721 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAReplaceArray_C", NULL)); 722 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDASetPreallocation_C", NULL)); 723 #endif 724 #if defined(PETSC_HAVE_HIP) 725 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidense_C", NULL)); 726 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijhipsparse_C", NULL)); 727 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensehip_C", NULL)); 728 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidensehip_mpidense_C", NULL)); 729 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidensehip_C", NULL)); 730 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidensehip_C", NULL)); 731 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensehip_mpiaij_C", NULL)); 732 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensehip_mpiaijhipsparse_C", NULL)); 733 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArray_C", NULL)); 734 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArrayRead_C", NULL)); 735 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArrayWrite_C", NULL)); 736 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArray_C", NULL)); 737 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArrayRead_C", NULL)); 738 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArrayWrite_C", NULL)); 739 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPPlaceArray_C", NULL)); 740 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPResetArray_C", NULL)); 741 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPReplaceArray_C", NULL)); 742 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPSetPreallocation_C", NULL)); 743 #endif 744 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumn_C", NULL)); 745 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumn_C", NULL)); 746 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", NULL)); 747 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", NULL)); 748 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", NULL)); 749 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", NULL)); 750 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", NULL)); 751 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", NULL)); 752 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetSubMatrix_C", NULL)); 753 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreSubMatrix_C", NULL)); 754 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultAddColumnRange_C", NULL)); 755 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultHermitianTransposeColumnRange_C", NULL)); 756 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultHermitianTransposeAddColumnRange_C", NULL)); 757 758 PetscCall(PetscObjectCompose((PetscObject)mat, "DiagonalBlock", NULL)); 759 PetscFunctionReturn(PETSC_SUCCESS); 760 } 761 762 #include <petscdraw.h> 763 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat, PetscViewer viewer) 764 { 765 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 766 PetscMPIInt rank; 767 PetscViewerType vtype; 768 PetscBool iascii, isdraw; 769 PetscViewer sviewer; 770 PetscViewerFormat format; 771 772 PetscFunctionBegin; 773 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 774 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 775 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 776 if (iascii) { 777 PetscCall(PetscViewerGetType(viewer, &vtype)); 778 PetscCall(PetscViewerGetFormat(viewer, &format)); 779 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 780 MatInfo info; 781 PetscCall(MatGetInfo(mat, MAT_LOCAL, &info)); 782 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 783 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %" PetscInt_FMT " \n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, 784 (PetscInt)info.memory)); 785 PetscCall(PetscViewerFlush(viewer)); 786 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 787 if (mdn->Mvctx) PetscCall(PetscSFView(mdn->Mvctx, viewer)); 788 PetscFunctionReturn(PETSC_SUCCESS); 789 } else if (format == PETSC_VIEWER_ASCII_INFO) { 790 PetscFunctionReturn(PETSC_SUCCESS); 791 } 792 } else if (isdraw) { 793 PetscDraw draw; 794 PetscBool isnull; 795 796 PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw)); 797 PetscCall(PetscDrawIsNull(draw, &isnull)); 798 if (isnull) PetscFunctionReturn(PETSC_SUCCESS); 799 } 800 801 { 802 /* assemble the entire matrix onto first processor. */ 803 Mat A; 804 PetscInt M = mat->rmap->N, N = mat->cmap->N, m, row, i, nz; 805 PetscInt *cols; 806 PetscScalar *vals; 807 808 PetscCall(MatCreate(PetscObjectComm((PetscObject)mat), &A)); 809 if (rank == 0) { 810 PetscCall(MatSetSizes(A, M, N, M, N)); 811 } else { 812 PetscCall(MatSetSizes(A, 0, 0, M, N)); 813 } 814 /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */ 815 PetscCall(MatSetType(A, MATMPIDENSE)); 816 PetscCall(MatMPIDenseSetPreallocation(A, NULL)); 817 818 /* Copy the matrix ... This isn't the most efficient means, 819 but it's quick for now */ 820 A->insertmode = INSERT_VALUES; 821 822 row = mat->rmap->rstart; 823 m = mdn->A->rmap->n; 824 for (i = 0; i < m; i++) { 825 PetscCall(MatGetRow_MPIDense(mat, row, &nz, &cols, &vals)); 826 PetscCall(MatSetValues_MPIDense(A, 1, &row, nz, cols, vals, INSERT_VALUES)); 827 PetscCall(MatRestoreRow_MPIDense(mat, row, &nz, &cols, &vals)); 828 row++; 829 } 830 831 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 832 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 833 PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 834 if (rank == 0) { 835 PetscCall(PetscObjectSetName((PetscObject)((Mat_MPIDense *)A->data)->A, ((PetscObject)mat)->name)); 836 PetscCall(MatView_SeqDense(((Mat_MPIDense *)A->data)->A, sviewer)); 837 } 838 PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 839 PetscCall(MatDestroy(&A)); 840 } 841 PetscFunctionReturn(PETSC_SUCCESS); 842 } 843 844 static PetscErrorCode MatView_MPIDense(Mat mat, PetscViewer viewer) 845 { 846 PetscBool iascii, isbinary, isdraw, issocket; 847 848 PetscFunctionBegin; 849 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 850 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 851 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket)); 852 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 853 854 if (iascii || issocket || isdraw) { 855 PetscCall(MatView_MPIDense_ASCIIorDraworSocket(mat, viewer)); 856 } else if (isbinary) PetscCall(MatView_Dense_Binary(mat, viewer)); 857 PetscFunctionReturn(PETSC_SUCCESS); 858 } 859 860 static PetscErrorCode MatGetInfo_MPIDense(Mat A, MatInfoType flag, MatInfo *info) 861 { 862 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 863 Mat mdn = mat->A; 864 PetscLogDouble isend[5], irecv[5]; 865 866 PetscFunctionBegin; 867 info->block_size = 1.0; 868 869 PetscCall(MatGetInfo(mdn, MAT_LOCAL, info)); 870 871 isend[0] = info->nz_used; 872 isend[1] = info->nz_allocated; 873 isend[2] = info->nz_unneeded; 874 isend[3] = info->memory; 875 isend[4] = info->mallocs; 876 if (flag == MAT_LOCAL) { 877 info->nz_used = isend[0]; 878 info->nz_allocated = isend[1]; 879 info->nz_unneeded = isend[2]; 880 info->memory = isend[3]; 881 info->mallocs = isend[4]; 882 } else if (flag == MAT_GLOBAL_MAX) { 883 PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)A))); 884 885 info->nz_used = irecv[0]; 886 info->nz_allocated = irecv[1]; 887 info->nz_unneeded = irecv[2]; 888 info->memory = irecv[3]; 889 info->mallocs = irecv[4]; 890 } else if (flag == MAT_GLOBAL_SUM) { 891 PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)A))); 892 893 info->nz_used = irecv[0]; 894 info->nz_allocated = irecv[1]; 895 info->nz_unneeded = irecv[2]; 896 info->memory = irecv[3]; 897 info->mallocs = irecv[4]; 898 } 899 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 900 info->fill_ratio_needed = 0; 901 info->factor_mallocs = 0; 902 PetscFunctionReturn(PETSC_SUCCESS); 903 } 904 905 static PetscErrorCode MatSetOption_MPIDense(Mat A, MatOption op, PetscBool flg) 906 { 907 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 908 909 PetscFunctionBegin; 910 switch (op) { 911 case MAT_NEW_NONZERO_LOCATIONS: 912 case MAT_NEW_NONZERO_LOCATION_ERR: 913 case MAT_NEW_NONZERO_ALLOCATION_ERR: 914 MatCheckPreallocated(A, 1); 915 PetscCall(MatSetOption(a->A, op, flg)); 916 break; 917 case MAT_ROW_ORIENTED: 918 MatCheckPreallocated(A, 1); 919 a->roworiented = flg; 920 PetscCall(MatSetOption(a->A, op, flg)); 921 break; 922 case MAT_IGNORE_OFF_PROC_ENTRIES: 923 a->donotstash = flg; 924 break; 925 case MAT_SYMMETRIC: 926 case MAT_STRUCTURALLY_SYMMETRIC: 927 case MAT_HERMITIAN: 928 case MAT_SYMMETRY_ETERNAL: 929 case MAT_STRUCTURAL_SYMMETRY_ETERNAL: 930 case MAT_SPD: 931 case MAT_SPD_ETERNAL: 932 /* if the diagonal matrix is square it inherits some of the properties above */ 933 if (a->A && A->rmap->n == A->cmap->n) PetscCall(MatSetOption(a->A, op, flg)); 934 break; 935 default: 936 break; 937 } 938 PetscFunctionReturn(PETSC_SUCCESS); 939 } 940 941 static PetscErrorCode MatDiagonalScale_MPIDense(Mat A, Vec ll, Vec rr) 942 { 943 Mat_MPIDense *mdn = (Mat_MPIDense *)A->data; 944 const PetscScalar *l; 945 PetscScalar x, *v, *vv, *r; 946 PetscInt i, j, s2a, s3a, s2, s3, m = mdn->A->rmap->n, n = mdn->A->cmap->n, lda; 947 948 PetscFunctionBegin; 949 PetscCall(MatDenseGetArray(mdn->A, &vv)); 950 PetscCall(MatDenseGetLDA(mdn->A, &lda)); 951 PetscCall(MatGetLocalSize(A, &s2, &s3)); 952 if (ll) { 953 PetscCall(VecGetLocalSize(ll, &s2a)); 954 PetscCheck(s2a == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Left scaling vector non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT, s2a, s2); 955 PetscCall(VecGetArrayRead(ll, &l)); 956 for (i = 0; i < m; i++) { 957 x = l[i]; 958 v = vv + i; 959 for (j = 0; j < n; j++) { 960 (*v) *= x; 961 v += lda; 962 } 963 } 964 PetscCall(VecRestoreArrayRead(ll, &l)); 965 PetscCall(PetscLogFlops(1.0 * n * m)); 966 } 967 if (rr) { 968 const PetscScalar *ar; 969 970 PetscCall(VecGetLocalSize(rr, &s3a)); 971 PetscCheck(s3a == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Right scaling vec non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT ".", s3a, s3); 972 PetscCall(VecGetArrayRead(rr, &ar)); 973 if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A)); 974 PetscCall(VecGetArray(mdn->lvec, &r)); 975 PetscCall(PetscSFBcastBegin(mdn->Mvctx, MPIU_SCALAR, ar, r, MPI_REPLACE)); 976 PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ar, r, MPI_REPLACE)); 977 PetscCall(VecRestoreArrayRead(rr, &ar)); 978 for (i = 0; i < n; i++) { 979 x = r[i]; 980 v = vv + i * lda; 981 for (j = 0; j < m; j++) (*v++) *= x; 982 } 983 PetscCall(VecRestoreArray(mdn->lvec, &r)); 984 PetscCall(PetscLogFlops(1.0 * n * m)); 985 } 986 PetscCall(MatDenseRestoreArray(mdn->A, &vv)); 987 PetscFunctionReturn(PETSC_SUCCESS); 988 } 989 990 static PetscErrorCode MatNorm_MPIDense(Mat A, NormType type, PetscReal *nrm) 991 { 992 Mat_MPIDense *mdn = (Mat_MPIDense *)A->data; 993 PetscInt i, j; 994 PetscMPIInt size; 995 PetscReal sum = 0.0; 996 const PetscScalar *av, *v; 997 998 PetscFunctionBegin; 999 PetscCall(MatDenseGetArrayRead(mdn->A, &av)); 1000 v = av; 1001 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 1002 if (size == 1) { 1003 PetscCall(MatNorm(mdn->A, type, nrm)); 1004 } else { 1005 if (type == NORM_FROBENIUS) { 1006 for (i = 0; i < mdn->A->cmap->n * mdn->A->rmap->n; i++) { 1007 sum += PetscRealPart(PetscConj(*v) * (*v)); 1008 v++; 1009 } 1010 PetscCallMPI(MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A))); 1011 *nrm = PetscSqrtReal(*nrm); 1012 PetscCall(PetscLogFlops(2.0 * mdn->A->cmap->n * mdn->A->rmap->n)); 1013 } else if (type == NORM_1) { 1014 PetscReal *tmp; 1015 1016 PetscCall(PetscCalloc1(A->cmap->N, &tmp)); 1017 *nrm = 0.0; 1018 v = av; 1019 for (j = 0; j < mdn->A->cmap->n; j++) { 1020 for (i = 0; i < mdn->A->rmap->n; i++) { 1021 tmp[j] += PetscAbsScalar(*v); 1022 v++; 1023 } 1024 } 1025 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, tmp, A->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A))); 1026 for (j = 0; j < A->cmap->N; j++) { 1027 if (tmp[j] > *nrm) *nrm = tmp[j]; 1028 } 1029 PetscCall(PetscFree(tmp)); 1030 PetscCall(PetscLogFlops(A->cmap->n * A->rmap->n)); 1031 } else if (type == NORM_INFINITY) { /* max row norm */ 1032 PetscCall(MatNorm(mdn->A, type, nrm)); 1033 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, nrm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A))); 1034 } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "No support for two norm"); 1035 } 1036 PetscCall(MatDenseRestoreArrayRead(mdn->A, &av)); 1037 PetscFunctionReturn(PETSC_SUCCESS); 1038 } 1039 1040 static PetscErrorCode MatTranspose_MPIDense(Mat A, MatReuse reuse, Mat *matout) 1041 { 1042 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1043 Mat B; 1044 PetscInt M = A->rmap->N, N = A->cmap->N, m, n, *rwork, rstart = A->rmap->rstart; 1045 PetscInt j, i, lda; 1046 PetscScalar *v; 1047 1048 PetscFunctionBegin; 1049 if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout)); 1050 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) { 1051 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 1052 PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M)); 1053 PetscCall(MatSetType(B, ((PetscObject)A)->type_name)); 1054 PetscCall(MatMPIDenseSetPreallocation(B, NULL)); 1055 } else B = *matout; 1056 1057 m = a->A->rmap->n; 1058 n = a->A->cmap->n; 1059 PetscCall(MatDenseGetArrayRead(a->A, (const PetscScalar **)&v)); 1060 PetscCall(MatDenseGetLDA(a->A, &lda)); 1061 PetscCall(PetscMalloc1(m, &rwork)); 1062 for (i = 0; i < m; i++) rwork[i] = rstart + i; 1063 for (j = 0; j < n; j++) { 1064 PetscCall(MatSetValues(B, 1, &j, m, rwork, v, INSERT_VALUES)); 1065 v = PetscSafePointerPlusOffset(v, lda); 1066 } 1067 PetscCall(MatDenseRestoreArrayRead(a->A, (const PetscScalar **)&v)); 1068 PetscCall(PetscFree(rwork)); 1069 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 1070 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 1071 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 1072 *matout = B; 1073 } else { 1074 PetscCall(MatHeaderMerge(A, &B)); 1075 } 1076 PetscFunctionReturn(PETSC_SUCCESS); 1077 } 1078 1079 static PetscErrorCode MatDuplicate_MPIDense(Mat, MatDuplicateOption, Mat *); 1080 PETSC_INTERN PetscErrorCode MatScale_MPIDense(Mat, PetscScalar); 1081 1082 static PetscErrorCode MatSetUp_MPIDense(Mat A) 1083 { 1084 PetscFunctionBegin; 1085 PetscCall(PetscLayoutSetUp(A->rmap)); 1086 PetscCall(PetscLayoutSetUp(A->cmap)); 1087 if (!A->preallocated) PetscCall(MatMPIDenseSetPreallocation(A, NULL)); 1088 PetscFunctionReturn(PETSC_SUCCESS); 1089 } 1090 1091 static PetscErrorCode MatAXPY_MPIDense(Mat Y, PetscScalar alpha, Mat X, MatStructure str) 1092 { 1093 Mat_MPIDense *A = (Mat_MPIDense *)Y->data, *B = (Mat_MPIDense *)X->data; 1094 1095 PetscFunctionBegin; 1096 PetscCall(MatAXPY(A->A, alpha, B->A, str)); 1097 PetscFunctionReturn(PETSC_SUCCESS); 1098 } 1099 1100 static PetscErrorCode MatConjugate_MPIDense(Mat mat) 1101 { 1102 Mat_MPIDense *a = (Mat_MPIDense *)mat->data; 1103 1104 PetscFunctionBegin; 1105 PetscCall(MatConjugate(a->A)); 1106 PetscFunctionReturn(PETSC_SUCCESS); 1107 } 1108 1109 static PetscErrorCode MatRealPart_MPIDense(Mat A) 1110 { 1111 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1112 1113 PetscFunctionBegin; 1114 PetscCall(MatRealPart(a->A)); 1115 PetscFunctionReturn(PETSC_SUCCESS); 1116 } 1117 1118 static PetscErrorCode MatImaginaryPart_MPIDense(Mat A) 1119 { 1120 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1121 1122 PetscFunctionBegin; 1123 PetscCall(MatImaginaryPart(a->A)); 1124 PetscFunctionReturn(PETSC_SUCCESS); 1125 } 1126 1127 static PetscErrorCode MatGetColumnVector_MPIDense(Mat A, Vec v, PetscInt col) 1128 { 1129 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1130 1131 PetscFunctionBegin; 1132 PetscCheck(a->A, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Missing local matrix"); 1133 PetscCheck(a->A->ops->getcolumnvector, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Missing get column operation"); 1134 PetscCall((*a->A->ops->getcolumnvector)(a->A, v, col)); 1135 PetscFunctionReturn(PETSC_SUCCESS); 1136 } 1137 1138 PETSC_INTERN PetscErrorCode MatGetColumnReductions_SeqDense(Mat, PetscInt, PetscReal *); 1139 1140 static PetscErrorCode MatGetColumnReductions_MPIDense(Mat A, PetscInt type, PetscReal *reductions) 1141 { 1142 PetscInt i, m, n; 1143 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1144 1145 PetscFunctionBegin; 1146 PetscCall(MatGetSize(A, &m, &n)); 1147 if (type == REDUCTION_MEAN_REALPART) { 1148 PetscCall(MatGetColumnReductions_SeqDense(a->A, (PetscInt)REDUCTION_SUM_REALPART, reductions)); 1149 } else if (type == REDUCTION_MEAN_IMAGINARYPART) { 1150 PetscCall(MatGetColumnReductions_SeqDense(a->A, (PetscInt)REDUCTION_SUM_IMAGINARYPART, reductions)); 1151 } else { 1152 PetscCall(MatGetColumnReductions_SeqDense(a->A, type, reductions)); 1153 } 1154 if (type == NORM_2) { 1155 for (i = 0; i < n; i++) reductions[i] *= reductions[i]; 1156 } 1157 if (type == NORM_INFINITY) { 1158 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, reductions, n, MPIU_REAL, MPIU_MAX, A->hdr.comm)); 1159 } else { 1160 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, reductions, n, MPIU_REAL, MPIU_SUM, A->hdr.comm)); 1161 } 1162 if (type == NORM_2) { 1163 for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]); 1164 } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 1165 for (i = 0; i < n; i++) reductions[i] /= m; 1166 } 1167 PetscFunctionReturn(PETSC_SUCCESS); 1168 } 1169 1170 static PetscErrorCode MatSetRandom_MPIDense(Mat x, PetscRandom rctx) 1171 { 1172 Mat_MPIDense *d = (Mat_MPIDense *)x->data; 1173 1174 PetscFunctionBegin; 1175 PetscCall(MatSetRandom(d->A, rctx)); 1176 #if defined(PETSC_HAVE_DEVICE) 1177 x->offloadmask = d->A->offloadmask; 1178 #endif 1179 PetscFunctionReturn(PETSC_SUCCESS); 1180 } 1181 1182 static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A, PetscBool *missing, PetscInt *d) 1183 { 1184 PetscFunctionBegin; 1185 *missing = PETSC_FALSE; 1186 PetscFunctionReturn(PETSC_SUCCESS); 1187 } 1188 1189 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat, Mat, PetscReal, Mat); 1190 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat, Mat, Mat); 1191 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat, Mat, PetscReal, Mat); 1192 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat, Mat, Mat); 1193 static PetscErrorCode MatEqual_MPIDense(Mat, Mat, PetscBool *); 1194 static PetscErrorCode MatLoad_MPIDense(Mat, PetscViewer); 1195 static PetscErrorCode MatProductSetFromOptions_MPIDense(Mat); 1196 1197 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense, 1198 MatGetRow_MPIDense, 1199 MatRestoreRow_MPIDense, 1200 MatMult_MPIDense, 1201 /* 4*/ MatMultAdd_MPIDense, 1202 MatMultTranspose_MPIDense, 1203 MatMultTransposeAdd_MPIDense, 1204 NULL, 1205 NULL, 1206 NULL, 1207 /* 10*/ NULL, 1208 NULL, 1209 NULL, 1210 NULL, 1211 MatTranspose_MPIDense, 1212 /* 15*/ MatGetInfo_MPIDense, 1213 MatEqual_MPIDense, 1214 MatGetDiagonal_MPIDense, 1215 MatDiagonalScale_MPIDense, 1216 MatNorm_MPIDense, 1217 /* 20*/ MatAssemblyBegin_MPIDense, 1218 MatAssemblyEnd_MPIDense, 1219 MatSetOption_MPIDense, 1220 MatZeroEntries_MPIDense, 1221 /* 24*/ MatZeroRows_MPIDense, 1222 NULL, 1223 NULL, 1224 NULL, 1225 NULL, 1226 /* 29*/ MatSetUp_MPIDense, 1227 NULL, 1228 NULL, 1229 MatGetDiagonalBlock_MPIDense, 1230 NULL, 1231 /* 34*/ MatDuplicate_MPIDense, 1232 NULL, 1233 NULL, 1234 NULL, 1235 NULL, 1236 /* 39*/ MatAXPY_MPIDense, 1237 MatCreateSubMatrices_MPIDense, 1238 NULL, 1239 MatGetValues_MPIDense, 1240 MatCopy_MPIDense, 1241 /* 44*/ NULL, 1242 MatScale_MPIDense, 1243 MatShift_MPIDense, 1244 NULL, 1245 NULL, 1246 /* 49*/ MatSetRandom_MPIDense, 1247 NULL, 1248 NULL, 1249 NULL, 1250 NULL, 1251 /* 54*/ NULL, 1252 NULL, 1253 NULL, 1254 NULL, 1255 NULL, 1256 /* 59*/ MatCreateSubMatrix_MPIDense, 1257 MatDestroy_MPIDense, 1258 MatView_MPIDense, 1259 NULL, 1260 NULL, 1261 /* 64*/ NULL, 1262 NULL, 1263 NULL, 1264 NULL, 1265 NULL, 1266 /* 69*/ NULL, 1267 NULL, 1268 NULL, 1269 NULL, 1270 NULL, 1271 /* 74*/ NULL, 1272 NULL, 1273 NULL, 1274 NULL, 1275 NULL, 1276 /* 79*/ NULL, 1277 NULL, 1278 NULL, 1279 NULL, 1280 /* 83*/ MatLoad_MPIDense, 1281 NULL, 1282 NULL, 1283 NULL, 1284 NULL, 1285 NULL, 1286 /* 89*/ NULL, 1287 NULL, 1288 NULL, 1289 NULL, 1290 NULL, 1291 /* 94*/ NULL, 1292 NULL, 1293 MatMatTransposeMultSymbolic_MPIDense_MPIDense, 1294 MatMatTransposeMultNumeric_MPIDense_MPIDense, 1295 NULL, 1296 /* 99*/ MatProductSetFromOptions_MPIDense, 1297 NULL, 1298 NULL, 1299 MatConjugate_MPIDense, 1300 NULL, 1301 /*104*/ NULL, 1302 MatRealPart_MPIDense, 1303 MatImaginaryPart_MPIDense, 1304 NULL, 1305 NULL, 1306 /*109*/ NULL, 1307 NULL, 1308 NULL, 1309 MatGetColumnVector_MPIDense, 1310 MatMissingDiagonal_MPIDense, 1311 /*114*/ NULL, 1312 NULL, 1313 NULL, 1314 NULL, 1315 NULL, 1316 /*119*/ NULL, 1317 NULL, 1318 MatMultHermitianTranspose_MPIDense, 1319 MatMultHermitianTransposeAdd_MPIDense, 1320 NULL, 1321 /*124*/ NULL, 1322 MatGetColumnReductions_MPIDense, 1323 NULL, 1324 NULL, 1325 NULL, 1326 /*129*/ NULL, 1327 NULL, 1328 MatTransposeMatMultSymbolic_MPIDense_MPIDense, 1329 MatTransposeMatMultNumeric_MPIDense_MPIDense, 1330 NULL, 1331 /*134*/ NULL, 1332 NULL, 1333 NULL, 1334 NULL, 1335 NULL, 1336 /*139*/ NULL, 1337 NULL, 1338 NULL, 1339 NULL, 1340 NULL, 1341 MatCreateMPIMatConcatenateSeqMat_MPIDense, 1342 /*145*/ NULL, 1343 NULL, 1344 NULL, 1345 NULL, 1346 NULL, 1347 /*150*/ NULL, 1348 NULL, 1349 NULL, 1350 NULL, 1351 NULL, 1352 /*155*/ NULL, 1353 NULL}; 1354 1355 static PetscErrorCode MatMPIDenseSetPreallocation_MPIDense(Mat mat, PetscScalar *data) 1356 { 1357 Mat_MPIDense *a = (Mat_MPIDense *)mat->data; 1358 MatType mtype = MATSEQDENSE; 1359 1360 PetscFunctionBegin; 1361 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1362 PetscCall(PetscLayoutSetUp(mat->rmap)); 1363 PetscCall(PetscLayoutSetUp(mat->cmap)); 1364 if (!a->A) { 1365 PetscCall(MatCreate(PETSC_COMM_SELF, &a->A)); 1366 PetscCall(MatSetSizes(a->A, mat->rmap->n, mat->cmap->N, mat->rmap->n, mat->cmap->N)); 1367 } 1368 #if defined(PETSC_HAVE_CUDA) 1369 PetscBool iscuda; 1370 PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPIDENSECUDA, &iscuda)); 1371 if (iscuda) mtype = MATSEQDENSECUDA; 1372 #endif 1373 #if defined(PETSC_HAVE_HIP) 1374 PetscBool iship; 1375 PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPIDENSEHIP, &iship)); 1376 if (iship) mtype = MATSEQDENSEHIP; 1377 #endif 1378 PetscCall(MatSetType(a->A, mtype)); 1379 PetscCall(MatSeqDenseSetPreallocation(a->A, data)); 1380 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 1381 mat->offloadmask = a->A->offloadmask; 1382 #endif 1383 mat->preallocated = PETSC_TRUE; 1384 mat->assembled = PETSC_TRUE; 1385 PetscFunctionReturn(PETSC_SUCCESS); 1386 } 1387 1388 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIDense(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 1389 { 1390 Mat B, C; 1391 1392 PetscFunctionBegin; 1393 PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &C)); 1394 PetscCall(MatConvert_SeqAIJ_SeqDense(C, MATSEQDENSE, MAT_INITIAL_MATRIX, &B)); 1395 PetscCall(MatDestroy(&C)); 1396 if (reuse == MAT_REUSE_MATRIX) { 1397 C = *newmat; 1398 } else C = NULL; 1399 PetscCall(MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A), B, A->cmap->n, !C ? MAT_INITIAL_MATRIX : MAT_REUSE_MATRIX, &C)); 1400 PetscCall(MatDestroy(&B)); 1401 if (reuse == MAT_INPLACE_MATRIX) { 1402 PetscCall(MatHeaderReplace(A, &C)); 1403 } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C; 1404 PetscFunctionReturn(PETSC_SUCCESS); 1405 } 1406 1407 static PetscErrorCode MatConvert_MPIDense_MPIAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 1408 { 1409 Mat B, C; 1410 1411 PetscFunctionBegin; 1412 PetscCall(MatDenseGetLocalMatrix(A, &C)); 1413 PetscCall(MatConvert_SeqDense_SeqAIJ(C, MATSEQAIJ, MAT_INITIAL_MATRIX, &B)); 1414 if (reuse == MAT_REUSE_MATRIX) { 1415 C = *newmat; 1416 } else C = NULL; 1417 PetscCall(MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A), B, A->cmap->n, !C ? MAT_INITIAL_MATRIX : MAT_REUSE_MATRIX, &C)); 1418 PetscCall(MatDestroy(&B)); 1419 if (reuse == MAT_INPLACE_MATRIX) { 1420 PetscCall(MatHeaderReplace(A, &C)); 1421 } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C; 1422 PetscFunctionReturn(PETSC_SUCCESS); 1423 } 1424 1425 #if defined(PETSC_HAVE_ELEMENTAL) 1426 PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 1427 { 1428 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1429 Mat mat_elemental; 1430 PetscScalar *v; 1431 PetscInt m = A->rmap->n, N = A->cmap->N, rstart = A->rmap->rstart, i, *rows, *cols, lda; 1432 1433 PetscFunctionBegin; 1434 if (reuse == MAT_REUSE_MATRIX) { 1435 mat_elemental = *newmat; 1436 PetscCall(MatZeroEntries(*newmat)); 1437 } else { 1438 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental)); 1439 PetscCall(MatSetSizes(mat_elemental, PETSC_DECIDE, PETSC_DECIDE, A->rmap->N, A->cmap->N)); 1440 PetscCall(MatSetType(mat_elemental, MATELEMENTAL)); 1441 PetscCall(MatSetUp(mat_elemental)); 1442 PetscCall(MatSetOption(mat_elemental, MAT_ROW_ORIENTED, PETSC_FALSE)); 1443 } 1444 1445 PetscCall(PetscMalloc2(m, &rows, N, &cols)); 1446 for (i = 0; i < N; i++) cols[i] = i; 1447 for (i = 0; i < m; i++) rows[i] = rstart + i; 1448 1449 /* PETSc-Elemental interface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */ 1450 PetscCall(MatDenseGetArray(A, &v)); 1451 PetscCall(MatDenseGetLDA(a->A, &lda)); 1452 if (lda == m) PetscCall(MatSetValues(mat_elemental, m, rows, N, cols, v, ADD_VALUES)); 1453 else { 1454 for (i = 0; i < N; i++) PetscCall(MatSetValues(mat_elemental, m, rows, 1, &i, v + lda * i, ADD_VALUES)); 1455 } 1456 PetscCall(MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY)); 1457 PetscCall(MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY)); 1458 PetscCall(MatDenseRestoreArray(A, &v)); 1459 PetscCall(PetscFree2(rows, cols)); 1460 1461 if (reuse == MAT_INPLACE_MATRIX) { 1462 PetscCall(MatHeaderReplace(A, &mat_elemental)); 1463 } else { 1464 *newmat = mat_elemental; 1465 } 1466 PetscFunctionReturn(PETSC_SUCCESS); 1467 } 1468 #endif 1469 1470 static PetscErrorCode MatDenseGetColumn_MPIDense(Mat A, PetscInt col, PetscScalar **vals) 1471 { 1472 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 1473 1474 PetscFunctionBegin; 1475 PetscCall(MatDenseGetColumn(mat->A, col, vals)); 1476 PetscFunctionReturn(PETSC_SUCCESS); 1477 } 1478 1479 static PetscErrorCode MatDenseRestoreColumn_MPIDense(Mat A, PetscScalar **vals) 1480 { 1481 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 1482 1483 PetscFunctionBegin; 1484 PetscCall(MatDenseRestoreColumn(mat->A, vals)); 1485 PetscFunctionReturn(PETSC_SUCCESS); 1486 } 1487 1488 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIDense(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat) 1489 { 1490 Mat_MPIDense *mat; 1491 PetscInt m, nloc, N; 1492 1493 PetscFunctionBegin; 1494 PetscCall(MatGetSize(inmat, &m, &N)); 1495 PetscCall(MatGetLocalSize(inmat, NULL, &nloc)); 1496 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 1497 PetscInt sum; 1498 1499 if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnership(comm, &n, &N)); 1500 /* Check sum(n) = N */ 1501 PetscCallMPI(MPIU_Allreduce(&n, &sum, 1, MPIU_INT, MPI_SUM, comm)); 1502 PetscCheck(sum == N, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT, sum, N); 1503 1504 PetscCall(MatCreateDense(comm, m, n, PETSC_DETERMINE, N, NULL, outmat)); 1505 PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 1506 } 1507 1508 /* numeric phase */ 1509 mat = (Mat_MPIDense *)(*outmat)->data; 1510 PetscCall(MatCopy(inmat, mat->A, SAME_NONZERO_PATTERN)); 1511 PetscFunctionReturn(PETSC_SUCCESS); 1512 } 1513 1514 PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat A, PetscInt col, Vec *v) 1515 { 1516 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1517 PetscInt lda; 1518 1519 PetscFunctionBegin; 1520 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1521 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1522 if (!a->cvec) PetscCall(MatDenseCreateColumnVec_Private(A, &a->cvec)); 1523 a->vecinuse = col + 1; 1524 PetscCall(MatDenseGetLDA(a->A, &lda)); 1525 PetscCall(MatDenseGetArray(a->A, (PetscScalar **)&a->ptrinuse)); 1526 PetscCall(VecPlaceArray(a->cvec, PetscSafePointerPlusOffset(a->ptrinuse, (size_t)col * (size_t)lda))); 1527 *v = a->cvec; 1528 PetscFunctionReturn(PETSC_SUCCESS); 1529 } 1530 1531 PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat A, PetscInt col, Vec *v) 1532 { 1533 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1534 1535 PetscFunctionBegin; 1536 PetscCheck(a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first"); 1537 PetscCheck(a->cvec, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing internal column vector"); 1538 VecCheckAssembled(a->cvec); 1539 a->vecinuse = 0; 1540 PetscCall(MatDenseRestoreArray(a->A, (PetscScalar **)&a->ptrinuse)); 1541 PetscCall(VecResetArray(a->cvec)); 1542 if (v) *v = NULL; 1543 PetscFunctionReturn(PETSC_SUCCESS); 1544 } 1545 1546 PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat A, PetscInt col, Vec *v) 1547 { 1548 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1549 PetscInt lda; 1550 1551 PetscFunctionBegin; 1552 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1553 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1554 if (!a->cvec) PetscCall(MatDenseCreateColumnVec_Private(A, &a->cvec)); 1555 a->vecinuse = col + 1; 1556 PetscCall(MatDenseGetLDA(a->A, &lda)); 1557 PetscCall(MatDenseGetArrayRead(a->A, &a->ptrinuse)); 1558 PetscCall(VecPlaceArray(a->cvec, PetscSafePointerPlusOffset(a->ptrinuse, (size_t)col * (size_t)lda))); 1559 PetscCall(VecLockReadPush(a->cvec)); 1560 *v = a->cvec; 1561 PetscFunctionReturn(PETSC_SUCCESS); 1562 } 1563 1564 PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat A, PetscInt col, Vec *v) 1565 { 1566 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1567 1568 PetscFunctionBegin; 1569 PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first"); 1570 PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector"); 1571 VecCheckAssembled(a->cvec); 1572 a->vecinuse = 0; 1573 PetscCall(MatDenseRestoreArrayRead(a->A, &a->ptrinuse)); 1574 PetscCall(VecLockReadPop(a->cvec)); 1575 PetscCall(VecResetArray(a->cvec)); 1576 if (v) *v = NULL; 1577 PetscFunctionReturn(PETSC_SUCCESS); 1578 } 1579 1580 PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat A, PetscInt col, Vec *v) 1581 { 1582 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1583 PetscInt lda; 1584 1585 PetscFunctionBegin; 1586 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1587 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1588 if (!a->cvec) PetscCall(MatDenseCreateColumnVec_Private(A, &a->cvec)); 1589 a->vecinuse = col + 1; 1590 PetscCall(MatDenseGetLDA(a->A, &lda)); 1591 PetscCall(MatDenseGetArrayWrite(a->A, (PetscScalar **)&a->ptrinuse)); 1592 PetscCall(VecPlaceArray(a->cvec, PetscSafePointerPlusOffset(a->ptrinuse, (size_t)col * (size_t)lda))); 1593 *v = a->cvec; 1594 PetscFunctionReturn(PETSC_SUCCESS); 1595 } 1596 1597 PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat A, PetscInt col, Vec *v) 1598 { 1599 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1600 1601 PetscFunctionBegin; 1602 PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first"); 1603 PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector"); 1604 VecCheckAssembled(a->cvec); 1605 a->vecinuse = 0; 1606 PetscCall(MatDenseRestoreArrayWrite(a->A, (PetscScalar **)&a->ptrinuse)); 1607 PetscCall(VecResetArray(a->cvec)); 1608 if (v) *v = NULL; 1609 PetscFunctionReturn(PETSC_SUCCESS); 1610 } 1611 1612 static PetscErrorCode MatDenseGetSubMatrix_MPIDense(Mat A, PetscInt rbegin, PetscInt rend, PetscInt cbegin, PetscInt cend, Mat *v) 1613 { 1614 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1615 Mat_MPIDense *c; 1616 MPI_Comm comm; 1617 PetscInt pbegin, pend; 1618 1619 PetscFunctionBegin; 1620 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 1621 PetscCheck(!a->vecinuse, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1622 PetscCheck(!a->matinuse, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1623 pbegin = PetscMax(0, PetscMin(A->rmap->rend, rbegin) - A->rmap->rstart); 1624 pend = PetscMin(A->rmap->n, PetscMax(0, rend - A->rmap->rstart)); 1625 if (!a->cmat) { 1626 PetscCall(MatCreate(comm, &a->cmat)); 1627 PetscCall(MatSetType(a->cmat, ((PetscObject)A)->type_name)); 1628 if (rend - rbegin == A->rmap->N) PetscCall(PetscLayoutReference(A->rmap, &a->cmat->rmap)); 1629 else { 1630 PetscCall(PetscLayoutSetLocalSize(a->cmat->rmap, pend - pbegin)); 1631 PetscCall(PetscLayoutSetSize(a->cmat->rmap, rend - rbegin)); 1632 PetscCall(PetscLayoutSetUp(a->cmat->rmap)); 1633 } 1634 PetscCall(PetscLayoutSetSize(a->cmat->cmap, cend - cbegin)); 1635 PetscCall(PetscLayoutSetUp(a->cmat->cmap)); 1636 } else { 1637 PetscBool same = (PetscBool)(rend - rbegin == a->cmat->rmap->N); 1638 if (same && a->cmat->rmap->N != A->rmap->N) { 1639 same = (PetscBool)(pend - pbegin == a->cmat->rmap->n); 1640 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &same, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A))); 1641 } 1642 if (!same) { 1643 PetscCall(PetscLayoutDestroy(&a->cmat->rmap)); 1644 PetscCall(PetscLayoutCreate(comm, &a->cmat->rmap)); 1645 PetscCall(PetscLayoutSetLocalSize(a->cmat->rmap, pend - pbegin)); 1646 PetscCall(PetscLayoutSetSize(a->cmat->rmap, rend - rbegin)); 1647 PetscCall(PetscLayoutSetUp(a->cmat->rmap)); 1648 } 1649 if (cend - cbegin != a->cmat->cmap->N) { 1650 PetscCall(PetscLayoutDestroy(&a->cmat->cmap)); 1651 PetscCall(PetscLayoutCreate(comm, &a->cmat->cmap)); 1652 PetscCall(PetscLayoutSetSize(a->cmat->cmap, cend - cbegin)); 1653 PetscCall(PetscLayoutSetUp(a->cmat->cmap)); 1654 } 1655 } 1656 c = (Mat_MPIDense *)a->cmat->data; 1657 PetscCheck(!c->A, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1658 PetscCall(MatDenseGetSubMatrix(a->A, pbegin, pend, cbegin, cend, &c->A)); 1659 1660 a->cmat->preallocated = PETSC_TRUE; 1661 a->cmat->assembled = PETSC_TRUE; 1662 #if defined(PETSC_HAVE_DEVICE) 1663 a->cmat->offloadmask = c->A->offloadmask; 1664 #endif 1665 a->matinuse = cbegin + 1; 1666 *v = a->cmat; 1667 PetscFunctionReturn(PETSC_SUCCESS); 1668 } 1669 1670 static PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat A, Mat *v) 1671 { 1672 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1673 Mat_MPIDense *c; 1674 1675 PetscFunctionBegin; 1676 PetscCheck(a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetSubMatrix() first"); 1677 PetscCheck(a->cmat, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal matrix"); 1678 PetscCheck(*v == a->cmat, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Not the matrix obtained from MatDenseGetSubMatrix()"); 1679 a->matinuse = 0; 1680 c = (Mat_MPIDense *)a->cmat->data; 1681 PetscCall(MatDenseRestoreSubMatrix(a->A, &c->A)); 1682 if (v) *v = NULL; 1683 #if defined(PETSC_HAVE_DEVICE) 1684 A->offloadmask = a->A->offloadmask; 1685 #endif 1686 PetscFunctionReturn(PETSC_SUCCESS); 1687 } 1688 1689 /*MC 1690 MATMPIDENSE - MATMPIDENSE = "mpidense" - A matrix type to be used for distributed dense matrices. 1691 1692 Options Database Key: 1693 . -mat_type mpidense - sets the matrix type to `MATMPIDENSE` during a call to `MatSetFromOptions()` 1694 1695 Level: beginner 1696 1697 .seealso: [](ch_matrices), `Mat`, `MatCreateDense()`, `MATSEQDENSE`, `MATDENSE` 1698 M*/ 1699 PetscErrorCode MatCreate_MPIDense(Mat mat) 1700 { 1701 Mat_MPIDense *a; 1702 1703 PetscFunctionBegin; 1704 PetscCall(PetscNew(&a)); 1705 mat->data = (void *)a; 1706 mat->ops[0] = MatOps_Values; 1707 1708 mat->insertmode = NOT_SET_VALUES; 1709 1710 /* build cache for off array entries formed */ 1711 a->donotstash = PETSC_FALSE; 1712 1713 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)mat), 1, &mat->stash)); 1714 1715 /* stuff used for matrix vector multiply */ 1716 a->lvec = NULL; 1717 a->Mvctx = NULL; 1718 a->roworiented = PETSC_TRUE; 1719 1720 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetLDA_C", MatDenseGetLDA_MPIDense)); 1721 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseSetLDA_C", MatDenseSetLDA_MPIDense)); 1722 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArray_C", MatDenseGetArray_MPIDense)); 1723 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArray_C", MatDenseRestoreArray_MPIDense)); 1724 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayRead_C", MatDenseGetArrayRead_MPIDense)); 1725 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayRead_C", MatDenseRestoreArrayRead_MPIDense)); 1726 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayWrite_C", MatDenseGetArrayWrite_MPIDense)); 1727 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayWrite_C", MatDenseRestoreArrayWrite_MPIDense)); 1728 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDensePlaceArray_C", MatDensePlaceArray_MPIDense)); 1729 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseResetArray_C", MatDenseResetArray_MPIDense)); 1730 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseReplaceArray_C", MatDenseReplaceArray_MPIDense)); 1731 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", MatDenseGetColumnVec_MPIDense)); 1732 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", MatDenseRestoreColumnVec_MPIDense)); 1733 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", MatDenseGetColumnVecRead_MPIDense)); 1734 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", MatDenseRestoreColumnVecRead_MPIDense)); 1735 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", MatDenseGetColumnVecWrite_MPIDense)); 1736 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", MatDenseRestoreColumnVecWrite_MPIDense)); 1737 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetSubMatrix_C", MatDenseGetSubMatrix_MPIDense)); 1738 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreSubMatrix_C", MatDenseRestoreSubMatrix_MPIDense)); 1739 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", MatConvert_MPIAIJ_MPIDense)); 1740 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpiaij_C", MatConvert_MPIDense_MPIAIJ)); 1741 #if defined(PETSC_HAVE_ELEMENTAL) 1742 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_elemental_C", MatConvert_MPIDense_Elemental)); 1743 #endif 1744 #if defined(PETSC_HAVE_SCALAPACK) 1745 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_scalapack_C", MatConvert_Dense_ScaLAPACK)); 1746 #endif 1747 #if defined(PETSC_HAVE_CUDA) 1748 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensecuda_C", MatConvert_MPIDense_MPIDenseCUDA)); 1749 #endif 1750 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIDenseSetPreallocation_C", MatMPIDenseSetPreallocation_MPIDense)); 1751 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense)); 1752 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaij_C", MatProductSetFromOptions_MPIDense_MPIAIJ)); 1753 #if defined(PETSC_HAVE_CUDA) 1754 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense)); 1755 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijcusparse_C", MatProductSetFromOptions_MPIDense_MPIAIJ)); 1756 #endif 1757 #if defined(PETSC_HAVE_HIP) 1758 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensehip_C", MatConvert_MPIDense_MPIDenseHIP)); 1759 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense)); 1760 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijhipsparse_C", MatProductSetFromOptions_MPIDense_MPIAIJ)); 1761 #endif 1762 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumn_C", MatDenseGetColumn_MPIDense)); 1763 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumn_C", MatDenseRestoreColumn_MPIDense)); 1764 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultAddColumnRange_C", MatMultAddColumnRange_MPIDense)); 1765 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultHermitianTransposeColumnRange_C", MatMultHermitianTransposeColumnRange_MPIDense)); 1766 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMultHermitianTransposeAddColumnRange_C", MatMultHermitianTransposeAddColumnRange_MPIDense)); 1767 PetscCall(PetscObjectChangeTypeName((PetscObject)mat, MATMPIDENSE)); 1768 PetscFunctionReturn(PETSC_SUCCESS); 1769 } 1770 1771 /*MC 1772 MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices. 1773 1774 This matrix type is identical to `MATSEQDENSE` when constructed with a single process communicator, 1775 and `MATMPIDENSE` otherwise. 1776 1777 Options Database Key: 1778 . -mat_type dense - sets the matrix type to `MATDENSE` during a call to `MatSetFromOptions()` 1779 1780 Level: beginner 1781 1782 .seealso: [](ch_matrices), `Mat`, `MATSEQDENSE`, `MATMPIDENSE`, `MATDENSECUDA`, `MATDENSEHIP` 1783 M*/ 1784 1785 /*@ 1786 MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries 1787 1788 Collective 1789 1790 Input Parameters: 1791 + B - the matrix 1792 - data - optional location of matrix data. Set to `NULL` for PETSc 1793 to control all matrix memory allocation. 1794 1795 Level: intermediate 1796 1797 Notes: 1798 The dense format is fully compatible with standard Fortran 1799 storage by columns. 1800 1801 The data input variable is intended primarily for Fortran programmers 1802 who wish to allocate their own matrix memory space. Most users should 1803 set `data` to `NULL`. 1804 1805 .seealso: [](ch_matrices), `Mat`, `MATMPIDENSE`, `MatCreate()`, `MatCreateSeqDense()`, `MatSetValues()` 1806 @*/ 1807 PetscErrorCode MatMPIDenseSetPreallocation(Mat B, PetscScalar *data) 1808 { 1809 PetscFunctionBegin; 1810 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 1811 PetscTryMethod(B, "MatMPIDenseSetPreallocation_C", (Mat, PetscScalar *), (B, data)); 1812 PetscFunctionReturn(PETSC_SUCCESS); 1813 } 1814 1815 /*@ 1816 MatDensePlaceArray - Allows one to replace the array in a `MATDENSE` matrix with an 1817 array provided by the user. This is useful to avoid copying an array 1818 into a matrix 1819 1820 Not Collective 1821 1822 Input Parameters: 1823 + mat - the matrix 1824 - array - the array in column major order 1825 1826 Level: developer 1827 1828 Note: 1829 Adding `const` to `array` was an oversight, see notes in `VecPlaceArray()`. 1830 1831 You can return to the original array with a call to `MatDenseResetArray()`. The user is responsible for freeing this array; it will not be 1832 freed when the matrix is destroyed. 1833 1834 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseGetArray()`, `MatDenseResetArray()`, `VecPlaceArray()`, `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`, `VecResetArray()`, 1835 `MatDenseReplaceArray()` 1836 @*/ 1837 PetscErrorCode MatDensePlaceArray(Mat mat, const PetscScalar *array) 1838 { 1839 PetscFunctionBegin; 1840 PetscValidHeaderSpecific(mat, MAT_CLASSID, 1); 1841 PetscUseMethod(mat, "MatDensePlaceArray_C", (Mat, const PetscScalar *), (mat, array)); 1842 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 1843 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 1844 mat->offloadmask = PETSC_OFFLOAD_CPU; 1845 #endif 1846 PetscFunctionReturn(PETSC_SUCCESS); 1847 } 1848 1849 /*@ 1850 MatDenseResetArray - Resets the matrix array to that it previously had before the call to `MatDensePlaceArray()` 1851 1852 Not Collective 1853 1854 Input Parameter: 1855 . mat - the matrix 1856 1857 Level: developer 1858 1859 Note: 1860 You can only call this after a call to `MatDensePlaceArray()` 1861 1862 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseGetArray()`, `MatDensePlaceArray()`, `VecPlaceArray()`, `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`, `VecResetArray()` 1863 @*/ 1864 PetscErrorCode MatDenseResetArray(Mat mat) 1865 { 1866 PetscFunctionBegin; 1867 PetscValidHeaderSpecific(mat, MAT_CLASSID, 1); 1868 PetscUseMethod(mat, "MatDenseResetArray_C", (Mat), (mat)); 1869 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 1870 PetscFunctionReturn(PETSC_SUCCESS); 1871 } 1872 1873 /*@ 1874 MatDenseReplaceArray - Allows one to replace the array in a dense matrix with an 1875 array provided by the user. This is useful to avoid copying an array 1876 into a matrix 1877 1878 Not Collective 1879 1880 Input Parameters: 1881 + mat - the matrix 1882 - array - the array in column major order 1883 1884 Level: developer 1885 1886 Note: 1887 Adding `const` to `array` was an oversight, see notes in `VecPlaceArray()`. 1888 1889 The memory passed in MUST be obtained with `PetscMalloc()` and CANNOT be 1890 freed by the user. It will be freed when the matrix is destroyed. 1891 1892 .seealso: [](ch_matrices), `Mat`, `MatDensePlaceArray()`, `MatDenseGetArray()`, `VecReplaceArray()` 1893 @*/ 1894 PetscErrorCode MatDenseReplaceArray(Mat mat, const PetscScalar *array) 1895 { 1896 PetscFunctionBegin; 1897 PetscValidHeaderSpecific(mat, MAT_CLASSID, 1); 1898 PetscUseMethod(mat, "MatDenseReplaceArray_C", (Mat, const PetscScalar *), (mat, array)); 1899 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 1900 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 1901 mat->offloadmask = PETSC_OFFLOAD_CPU; 1902 #endif 1903 PetscFunctionReturn(PETSC_SUCCESS); 1904 } 1905 1906 /*@ 1907 MatCreateDense - Creates a matrix in `MATDENSE` format. 1908 1909 Collective 1910 1911 Input Parameters: 1912 + comm - MPI communicator 1913 . m - number of local rows (or `PETSC_DECIDE` to have calculated if `M` is given) 1914 . n - number of local columns (or `PETSC_DECIDE` to have calculated if `N` is given) 1915 . M - number of global rows (or `PETSC_DECIDE` to have calculated if `m` is given) 1916 . N - number of global columns (or `PETSC_DECIDE` to have calculated if `n` is given) 1917 - data - optional location of matrix data. Set data to `NULL` (`PETSC_NULL_SCALAR` for Fortran users) for PETSc 1918 to control all matrix memory allocation. 1919 1920 Output Parameter: 1921 . A - the matrix 1922 1923 Level: intermediate 1924 1925 Notes: 1926 The dense format is fully compatible with standard Fortran 1927 storage by columns. 1928 1929 Although local portions of the matrix are stored in column-major 1930 order, the matrix is partitioned across MPI ranks by row. 1931 1932 The data input variable is intended primarily for Fortran programmers 1933 who wish to allocate their own matrix memory space. Most users should 1934 set `data` to `NULL` (`PETSC_NULL_SCALAR` for Fortran users). 1935 1936 The user MUST specify either the local or global matrix dimensions 1937 (possibly both). 1938 1939 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatCreate()`, `MatCreateSeqDense()`, `MatSetValues()` 1940 @*/ 1941 PetscErrorCode MatCreateDense(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscScalar data[], Mat *A) 1942 { 1943 PetscFunctionBegin; 1944 PetscCall(MatCreate(comm, A)); 1945 PetscCall(MatSetSizes(*A, m, n, M, N)); 1946 PetscCall(MatSetType(*A, MATDENSE)); 1947 PetscCall(MatSeqDenseSetPreallocation(*A, data)); 1948 PetscCall(MatMPIDenseSetPreallocation(*A, data)); 1949 PetscFunctionReturn(PETSC_SUCCESS); 1950 } 1951 1952 static PetscErrorCode MatDuplicate_MPIDense(Mat A, MatDuplicateOption cpvalues, Mat *newmat) 1953 { 1954 Mat mat; 1955 Mat_MPIDense *a, *oldmat = (Mat_MPIDense *)A->data; 1956 1957 PetscFunctionBegin; 1958 *newmat = NULL; 1959 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat)); 1960 PetscCall(MatSetSizes(mat, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 1961 PetscCall(MatSetType(mat, ((PetscObject)A)->type_name)); 1962 a = (Mat_MPIDense *)mat->data; 1963 1964 mat->factortype = A->factortype; 1965 mat->assembled = PETSC_TRUE; 1966 mat->preallocated = PETSC_TRUE; 1967 1968 mat->insertmode = NOT_SET_VALUES; 1969 a->donotstash = oldmat->donotstash; 1970 1971 PetscCall(PetscLayoutReference(A->rmap, &mat->rmap)); 1972 PetscCall(PetscLayoutReference(A->cmap, &mat->cmap)); 1973 1974 PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A)); 1975 1976 *newmat = mat; 1977 PetscFunctionReturn(PETSC_SUCCESS); 1978 } 1979 1980 static PetscErrorCode MatLoad_MPIDense(Mat newMat, PetscViewer viewer) 1981 { 1982 PetscBool isbinary; 1983 #if defined(PETSC_HAVE_HDF5) 1984 PetscBool ishdf5; 1985 #endif 1986 1987 PetscFunctionBegin; 1988 PetscValidHeaderSpecific(newMat, MAT_CLASSID, 1); 1989 PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2); 1990 /* force binary viewer to load .info file if it has not yet done so */ 1991 PetscCall(PetscViewerSetUp(viewer)); 1992 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 1993 #if defined(PETSC_HAVE_HDF5) 1994 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5)); 1995 #endif 1996 if (isbinary) { 1997 PetscCall(MatLoad_Dense_Binary(newMat, viewer)); 1998 #if defined(PETSC_HAVE_HDF5) 1999 } else if (ishdf5) { 2000 PetscCall(MatLoad_Dense_HDF5(newMat, viewer)); 2001 #endif 2002 } else SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name); 2003 PetscFunctionReturn(PETSC_SUCCESS); 2004 } 2005 2006 static PetscErrorCode MatEqual_MPIDense(Mat A, Mat B, PetscBool *flag) 2007 { 2008 Mat_MPIDense *matB = (Mat_MPIDense *)B->data, *matA = (Mat_MPIDense *)A->data; 2009 Mat a, b; 2010 2011 PetscFunctionBegin; 2012 a = matA->A; 2013 b = matB->A; 2014 PetscCall(MatEqual(a, b, flag)); 2015 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A))); 2016 PetscFunctionReturn(PETSC_SUCCESS); 2017 } 2018 2019 static PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(void *data) 2020 { 2021 Mat_TransMatMultDense *atb = (Mat_TransMatMultDense *)data; 2022 2023 PetscFunctionBegin; 2024 PetscCall(PetscFree2(atb->sendbuf, atb->recvcounts)); 2025 PetscCall(MatDestroy(&atb->atb)); 2026 PetscCall(PetscFree(atb)); 2027 PetscFunctionReturn(PETSC_SUCCESS); 2028 } 2029 2030 static PetscErrorCode MatDestroy_MatMatTransMult_MPIDense_MPIDense(void *data) 2031 { 2032 Mat_MatTransMultDense *abt = (Mat_MatTransMultDense *)data; 2033 2034 PetscFunctionBegin; 2035 PetscCall(PetscFree2(abt->buf[0], abt->buf[1])); 2036 PetscCall(PetscFree2(abt->recvcounts, abt->recvdispls)); 2037 PetscCall(PetscFree(abt)); 2038 PetscFunctionReturn(PETSC_SUCCESS); 2039 } 2040 2041 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C) 2042 { 2043 Mat_MPIDense *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data; 2044 Mat_TransMatMultDense *atb; 2045 MPI_Comm comm; 2046 PetscMPIInt size, *recvcounts; 2047 PetscScalar *carray, *sendbuf; 2048 const PetscScalar *atbarray; 2049 PetscInt i, cN = C->cmap->N, proc, k, j, lda; 2050 const PetscInt *ranges; 2051 2052 PetscFunctionBegin; 2053 MatCheckProduct(C, 3); 2054 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 2055 atb = (Mat_TransMatMultDense *)C->product->data; 2056 recvcounts = atb->recvcounts; 2057 sendbuf = atb->sendbuf; 2058 2059 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 2060 PetscCallMPI(MPI_Comm_size(comm, &size)); 2061 2062 /* compute atbarray = aseq^T * bseq */ 2063 PetscCall(MatTransposeMatMult(a->A, b->A, atb->atb ? MAT_REUSE_MATRIX : MAT_INITIAL_MATRIX, PETSC_DETERMINE, &atb->atb)); 2064 2065 PetscCall(MatGetOwnershipRanges(C, &ranges)); 2066 2067 /* arrange atbarray into sendbuf */ 2068 PetscCall(MatDenseGetArrayRead(atb->atb, &atbarray)); 2069 PetscCall(MatDenseGetLDA(atb->atb, &lda)); 2070 for (proc = 0, k = 0; proc < size; proc++) { 2071 for (j = 0; j < cN; j++) { 2072 for (i = ranges[proc]; i < ranges[proc + 1]; i++) sendbuf[k++] = atbarray[i + j * lda]; 2073 } 2074 } 2075 PetscCall(MatDenseRestoreArrayRead(atb->atb, &atbarray)); 2076 2077 /* sum all atbarray to local values of C */ 2078 PetscCall(MatDenseGetArrayWrite(c->A, &carray)); 2079 PetscCallMPI(MPI_Reduce_scatter(sendbuf, carray, recvcounts, MPIU_SCALAR, MPIU_SUM, comm)); 2080 PetscCall(MatDenseRestoreArrayWrite(c->A, &carray)); 2081 PetscCall(MatSetOption(C, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 2082 PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY)); 2083 PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY)); 2084 PetscFunctionReturn(PETSC_SUCCESS); 2085 } 2086 2087 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C) 2088 { 2089 MPI_Comm comm; 2090 PetscMPIInt size; 2091 PetscInt cm = A->cmap->n, cM, cN = B->cmap->N; 2092 Mat_TransMatMultDense *atb; 2093 PetscBool cisdense = PETSC_FALSE; 2094 const PetscInt *ranges; 2095 2096 PetscFunctionBegin; 2097 MatCheckProduct(C, 4); 2098 PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty"); 2099 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 2100 if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) { 2101 SETERRQ(comm, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend); 2102 } 2103 2104 /* create matrix product C */ 2105 PetscCall(MatSetSizes(C, cm, B->cmap->n, A->cmap->N, B->cmap->N)); 2106 #if defined(PETSC_HAVE_CUDA) 2107 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSECUDA, "")); 2108 #endif 2109 #if defined(PETSC_HAVE_HIP) 2110 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSEHIP, "")); 2111 #endif 2112 if (!cisdense) PetscCall(MatSetType(C, ((PetscObject)A)->type_name)); 2113 PetscCall(MatSetUp(C)); 2114 2115 /* create data structure for reuse C */ 2116 PetscCallMPI(MPI_Comm_size(comm, &size)); 2117 PetscCall(PetscNew(&atb)); 2118 cM = C->rmap->N; 2119 PetscCall(PetscMalloc2(cM * cN, &atb->sendbuf, size, &atb->recvcounts)); 2120 PetscCall(MatGetOwnershipRanges(C, &ranges)); 2121 for (PetscMPIInt i = 0; i < size; i++) PetscCall(PetscMPIIntCast((ranges[i + 1] - ranges[i]) * cN, &atb->recvcounts[i])); 2122 C->product->data = atb; 2123 C->product->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense; 2124 PetscFunctionReturn(PETSC_SUCCESS); 2125 } 2126 2127 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C) 2128 { 2129 MPI_Comm comm; 2130 PetscMPIInt i, size; 2131 PetscInt maxRows, bufsiz; 2132 PetscMPIInt tag; 2133 PetscInt alg; 2134 Mat_MatTransMultDense *abt; 2135 Mat_Product *product = C->product; 2136 PetscBool flg; 2137 2138 PetscFunctionBegin; 2139 MatCheckProduct(C, 4); 2140 PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty"); 2141 /* check local size of A and B */ 2142 PetscCheck(A->cmap->n == B->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local column dimensions are incompatible, A (%" PetscInt_FMT ") != B (%" PetscInt_FMT ")", A->cmap->n, B->cmap->n); 2143 2144 PetscCall(PetscStrcmp(product->alg, "allgatherv", &flg)); 2145 alg = flg ? 0 : 1; 2146 2147 /* setup matrix product C */ 2148 PetscCall(MatSetSizes(C, A->rmap->n, B->rmap->n, A->rmap->N, B->rmap->N)); 2149 PetscCall(MatSetType(C, MATMPIDENSE)); 2150 PetscCall(MatSetUp(C)); 2151 PetscCall(PetscObjectGetNewTag((PetscObject)C, &tag)); 2152 2153 /* create data structure for reuse C */ 2154 PetscCall(PetscObjectGetComm((PetscObject)C, &comm)); 2155 PetscCallMPI(MPI_Comm_size(comm, &size)); 2156 PetscCall(PetscNew(&abt)); 2157 abt->tag = tag; 2158 abt->alg = alg; 2159 switch (alg) { 2160 case 1: /* alg: "cyclic" */ 2161 for (maxRows = 0, i = 0; i < size; i++) maxRows = PetscMax(maxRows, B->rmap->range[i + 1] - B->rmap->range[i]); 2162 bufsiz = A->cmap->N * maxRows; 2163 PetscCall(PetscMalloc2(bufsiz, &abt->buf[0], bufsiz, &abt->buf[1])); 2164 break; 2165 default: /* alg: "allgatherv" */ 2166 PetscCall(PetscMalloc2(B->rmap->n * B->cmap->N, &abt->buf[0], B->rmap->N * B->cmap->N, &abt->buf[1])); 2167 PetscCall(PetscMalloc2(size, &abt->recvcounts, size + 1, &abt->recvdispls)); 2168 for (i = 0; i <= size; i++) PetscCall(PetscMPIIntCast(B->rmap->range[i] * A->cmap->N, &abt->recvdispls[i])); 2169 for (i = 0; i < size; i++) PetscCall(PetscMPIIntCast(abt->recvdispls[i + 1] - abt->recvdispls[i], &abt->recvcounts[i])); 2170 break; 2171 } 2172 2173 C->product->data = abt; 2174 C->product->destroy = MatDestroy_MatMatTransMult_MPIDense_MPIDense; 2175 C->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_MPIDense_MPIDense; 2176 PetscFunctionReturn(PETSC_SUCCESS); 2177 } 2178 2179 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(Mat A, Mat B, Mat C) 2180 { 2181 Mat_MPIDense *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data; 2182 Mat_MatTransMultDense *abt; 2183 MPI_Comm comm; 2184 PetscMPIInt rank, size, sendto, recvfrom, recvisfrom; 2185 PetscScalar *sendbuf, *recvbuf = NULL, *cv; 2186 PetscInt i, cK = A->cmap->N, sendsiz, recvsiz, k, j, bn; 2187 PetscScalar _DOne = 1.0, _DZero = 0.0; 2188 const PetscScalar *av, *bv; 2189 PetscBLASInt cm, cn, ck, alda, blda = 0, clda; 2190 MPI_Request reqs[2]; 2191 const PetscInt *ranges; 2192 2193 PetscFunctionBegin; 2194 MatCheckProduct(C, 3); 2195 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 2196 abt = (Mat_MatTransMultDense *)C->product->data; 2197 PetscCall(PetscObjectGetComm((PetscObject)C, &comm)); 2198 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 2199 PetscCallMPI(MPI_Comm_size(comm, &size)); 2200 PetscCall(MatDenseGetArrayRead(a->A, &av)); 2201 PetscCall(MatDenseGetArrayRead(b->A, &bv)); 2202 PetscCall(MatDenseGetArrayWrite(c->A, &cv)); 2203 PetscCall(MatDenseGetLDA(a->A, &i)); 2204 PetscCall(PetscBLASIntCast(i, &alda)); 2205 PetscCall(MatDenseGetLDA(b->A, &i)); 2206 PetscCall(PetscBLASIntCast(i, &blda)); 2207 PetscCall(MatDenseGetLDA(c->A, &i)); 2208 PetscCall(PetscBLASIntCast(i, &clda)); 2209 PetscCall(MatGetOwnershipRanges(B, &ranges)); 2210 bn = B->rmap->n; 2211 if (blda == bn) { 2212 sendbuf = (PetscScalar *)bv; 2213 } else { 2214 sendbuf = abt->buf[0]; 2215 for (k = 0, i = 0; i < cK; i++) { 2216 for (j = 0; j < bn; j++, k++) sendbuf[k] = bv[i * blda + j]; 2217 } 2218 } 2219 if (size > 1) { 2220 sendto = (rank + size - 1) % size; 2221 recvfrom = (rank + size + 1) % size; 2222 } else { 2223 sendto = recvfrom = 0; 2224 } 2225 PetscCall(PetscBLASIntCast(cK, &ck)); 2226 PetscCall(PetscBLASIntCast(c->A->rmap->n, &cm)); 2227 recvisfrom = rank; 2228 for (i = 0; i < size; i++) { 2229 /* we have finished receiving in sending, bufs can be read/modified */ 2230 PetscMPIInt nextrecvisfrom = (recvisfrom + 1) % size; /* which process the next recvbuf will originate on */ 2231 PetscInt nextbn = ranges[nextrecvisfrom + 1] - ranges[nextrecvisfrom]; 2232 2233 if (nextrecvisfrom != rank) { 2234 /* start the cyclic sends from sendbuf, to recvbuf (which will switch to sendbuf) */ 2235 sendsiz = cK * bn; 2236 recvsiz = cK * nextbn; 2237 recvbuf = (i & 1) ? abt->buf[0] : abt->buf[1]; 2238 PetscCallMPI(MPIU_Isend(sendbuf, sendsiz, MPIU_SCALAR, sendto, abt->tag, comm, &reqs[0])); 2239 PetscCallMPI(MPIU_Irecv(recvbuf, recvsiz, MPIU_SCALAR, recvfrom, abt->tag, comm, &reqs[1])); 2240 } 2241 2242 /* local aseq * sendbuf^T */ 2243 PetscCall(PetscBLASIntCast(ranges[recvisfrom + 1] - ranges[recvisfrom], &cn)); 2244 if (cm && cn && ck) PetscCallBLAS("BLASgemm", BLASgemm_("N", "T", &cm, &cn, &ck, &_DOne, av, &alda, sendbuf, &cn, &_DZero, cv + clda * ranges[recvisfrom], &clda)); 2245 2246 if (nextrecvisfrom != rank) { 2247 /* wait for the sends and receives to complete, swap sendbuf and recvbuf */ 2248 PetscCallMPI(MPI_Waitall(2, reqs, MPI_STATUSES_IGNORE)); 2249 } 2250 bn = nextbn; 2251 recvisfrom = nextrecvisfrom; 2252 sendbuf = recvbuf; 2253 } 2254 PetscCall(MatDenseRestoreArrayRead(a->A, &av)); 2255 PetscCall(MatDenseRestoreArrayRead(b->A, &bv)); 2256 PetscCall(MatDenseRestoreArrayWrite(c->A, &cv)); 2257 PetscCall(MatSetOption(C, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 2258 PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY)); 2259 PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY)); 2260 PetscFunctionReturn(PETSC_SUCCESS); 2261 } 2262 2263 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(Mat A, Mat B, Mat C) 2264 { 2265 Mat_MPIDense *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data; 2266 Mat_MatTransMultDense *abt; 2267 MPI_Comm comm; 2268 PetscMPIInt size, ibn; 2269 PetscScalar *cv, *sendbuf, *recvbuf; 2270 const PetscScalar *av, *bv; 2271 PetscInt blda, i, cK = A->cmap->N, k, j, bn; 2272 PetscScalar _DOne = 1.0, _DZero = 0.0; 2273 PetscBLASInt cm, cn, ck, alda, clda; 2274 2275 PetscFunctionBegin; 2276 MatCheckProduct(C, 3); 2277 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 2278 abt = (Mat_MatTransMultDense *)C->product->data; 2279 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 2280 PetscCallMPI(MPI_Comm_size(comm, &size)); 2281 PetscCall(MatDenseGetArrayRead(a->A, &av)); 2282 PetscCall(MatDenseGetArrayRead(b->A, &bv)); 2283 PetscCall(MatDenseGetArrayWrite(c->A, &cv)); 2284 PetscCall(MatDenseGetLDA(a->A, &i)); 2285 PetscCall(PetscBLASIntCast(i, &alda)); 2286 PetscCall(MatDenseGetLDA(b->A, &blda)); 2287 PetscCall(MatDenseGetLDA(c->A, &i)); 2288 PetscCall(PetscBLASIntCast(i, &clda)); 2289 /* copy transpose of B into buf[0] */ 2290 bn = B->rmap->n; 2291 sendbuf = abt->buf[0]; 2292 recvbuf = abt->buf[1]; 2293 for (k = 0, j = 0; j < bn; j++) { 2294 for (i = 0; i < cK; i++, k++) sendbuf[k] = bv[i * blda + j]; 2295 } 2296 PetscCall(MatDenseRestoreArrayRead(b->A, &bv)); 2297 PetscCall(PetscMPIIntCast(bn * cK, &ibn)); 2298 PetscCallMPI(MPI_Allgatherv(sendbuf, ibn, MPIU_SCALAR, recvbuf, abt->recvcounts, abt->recvdispls, MPIU_SCALAR, comm)); 2299 PetscCall(PetscBLASIntCast(cK, &ck)); 2300 PetscCall(PetscBLASIntCast(c->A->rmap->n, &cm)); 2301 PetscCall(PetscBLASIntCast(c->A->cmap->n, &cn)); 2302 if (cm && cn && ck) PetscCallBLAS("BLASgemm", BLASgemm_("N", "N", &cm, &cn, &ck, &_DOne, av, &alda, recvbuf, &ck, &_DZero, cv, &clda)); 2303 PetscCall(MatDenseRestoreArrayRead(a->A, &av)); 2304 PetscCall(MatDenseRestoreArrayRead(b->A, &bv)); 2305 PetscCall(MatDenseRestoreArrayWrite(c->A, &cv)); 2306 PetscCall(MatSetOption(C, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 2307 PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY)); 2308 PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY)); 2309 PetscFunctionReturn(PETSC_SUCCESS); 2310 } 2311 2312 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C) 2313 { 2314 Mat_MatTransMultDense *abt; 2315 2316 PetscFunctionBegin; 2317 MatCheckProduct(C, 3); 2318 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 2319 abt = (Mat_MatTransMultDense *)C->product->data; 2320 switch (abt->alg) { 2321 case 1: 2322 PetscCall(MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(A, B, C)); 2323 break; 2324 default: 2325 PetscCall(MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(A, B, C)); 2326 break; 2327 } 2328 PetscFunctionReturn(PETSC_SUCCESS); 2329 } 2330 2331 static PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(void *data) 2332 { 2333 Mat_MatMultDense *ab = (Mat_MatMultDense *)data; 2334 2335 PetscFunctionBegin; 2336 PetscCall(MatDestroy(&ab->Ce)); 2337 PetscCall(MatDestroy(&ab->Ae)); 2338 PetscCall(MatDestroy(&ab->Be)); 2339 PetscCall(PetscFree(ab)); 2340 PetscFunctionReturn(PETSC_SUCCESS); 2341 } 2342 2343 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C) 2344 { 2345 Mat_MatMultDense *ab; 2346 Mat_MPIDense *mdn = (Mat_MPIDense *)A->data; 2347 2348 PetscFunctionBegin; 2349 MatCheckProduct(C, 3); 2350 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Missing product data"); 2351 ab = (Mat_MatMultDense *)C->product->data; 2352 if (ab->Ae && ab->Ce) { 2353 #if PetscDefined(HAVE_ELEMENTAL) 2354 PetscCall(MatConvert_MPIDense_Elemental(A, MATELEMENTAL, MAT_REUSE_MATRIX, &ab->Ae)); 2355 PetscCall(MatConvert_MPIDense_Elemental(B, MATELEMENTAL, MAT_REUSE_MATRIX, &ab->Be)); 2356 PetscCall(MatMatMultNumeric_Elemental(ab->Ae, ab->Be, ab->Ce)); 2357 PetscCall(MatConvert(ab->Ce, MATMPIDENSE, MAT_REUSE_MATRIX, &C)); 2358 #else 2359 SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "PETSC_HAVE_ELEMENTAL not defined"); 2360 #endif 2361 } else { 2362 const PetscScalar *read; 2363 PetscScalar *write; 2364 PetscInt lda; 2365 2366 PetscCall(MatDenseGetLDA(B, &lda)); 2367 PetscCall(MatDenseGetArrayRead(B, &read)); 2368 PetscCall(MatDenseGetArrayWrite(ab->Be, &write)); 2369 if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A)); /* cannot be done during the symbolic phase because of possible calls to MatProductReplaceMats() */ 2370 for (PetscInt i = 0; i < C->cmap->N; ++i) { 2371 PetscCall(PetscSFBcastBegin(mdn->Mvctx, MPIU_SCALAR, read + i * lda, write + i * ab->Be->rmap->n, MPI_REPLACE)); 2372 PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, read + i * lda, write + i * ab->Be->rmap->n, MPI_REPLACE)); 2373 } 2374 PetscCall(MatDenseRestoreArrayWrite(ab->Be, &write)); 2375 PetscCall(MatDenseRestoreArrayRead(B, &read)); 2376 PetscCall(MatMatMultNumeric_SeqDense_SeqDense(((Mat_MPIDense *)A->data)->A, ab->Be, ((Mat_MPIDense *)C->data)->A)); 2377 } 2378 PetscFunctionReturn(PETSC_SUCCESS); 2379 } 2380 2381 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C) 2382 { 2383 Mat_Product *product = C->product; 2384 PetscInt alg; 2385 Mat_MatMultDense *ab; 2386 PetscBool flg; 2387 2388 PetscFunctionBegin; 2389 MatCheckProduct(C, 4); 2390 PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty"); 2391 /* check local size of A and B */ 2392 PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ", %" PetscInt_FMT ")", 2393 A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend); 2394 2395 PetscCall(PetscStrcmp(product->alg, "petsc", &flg)); 2396 alg = flg ? 0 : 1; 2397 2398 /* setup C */ 2399 PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, A->rmap->N, B->cmap->N)); 2400 PetscCall(MatSetType(C, MATMPIDENSE)); 2401 PetscCall(MatSetUp(C)); 2402 2403 /* create data structure for reuse Cdense */ 2404 PetscCall(PetscNew(&ab)); 2405 2406 switch (alg) { 2407 case 1: /* alg: "elemental" */ 2408 #if PetscDefined(HAVE_ELEMENTAL) 2409 /* create elemental matrices Ae and Be */ 2410 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &ab->Ae)); 2411 PetscCall(MatSetSizes(ab->Ae, PETSC_DECIDE, PETSC_DECIDE, A->rmap->N, A->cmap->N)); 2412 PetscCall(MatSetType(ab->Ae, MATELEMENTAL)); 2413 PetscCall(MatSetUp(ab->Ae)); 2414 PetscCall(MatSetOption(ab->Ae, MAT_ROW_ORIENTED, PETSC_FALSE)); 2415 2416 PetscCall(MatCreate(PetscObjectComm((PetscObject)B), &ab->Be)); 2417 PetscCall(MatSetSizes(ab->Be, PETSC_DECIDE, PETSC_DECIDE, B->rmap->N, B->cmap->N)); 2418 PetscCall(MatSetType(ab->Be, MATELEMENTAL)); 2419 PetscCall(MatSetUp(ab->Be)); 2420 PetscCall(MatSetOption(ab->Be, MAT_ROW_ORIENTED, PETSC_FALSE)); 2421 2422 /* compute symbolic Ce = Ae*Be */ 2423 PetscCall(MatCreate(PetscObjectComm((PetscObject)C), &ab->Ce)); 2424 PetscCall(MatMatMultSymbolic_Elemental(ab->Ae, ab->Be, fill, ab->Ce)); 2425 #else 2426 SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "PETSC_HAVE_ELEMENTAL not defined"); 2427 #endif 2428 break; 2429 default: /* alg: "petsc" */ 2430 ab->Ae = NULL; 2431 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, A->cmap->N, B->cmap->N, NULL, &ab->Be)); 2432 ab->Ce = NULL; 2433 break; 2434 } 2435 2436 C->product->data = ab; 2437 C->product->destroy = MatDestroy_MatMatMult_MPIDense_MPIDense; 2438 C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense; 2439 PetscFunctionReturn(PETSC_SUCCESS); 2440 } 2441 2442 static PetscErrorCode MatProductSetFromOptions_MPIDense_AB(Mat C) 2443 { 2444 Mat_Product *product = C->product; 2445 const char *algTypes[2] = {"petsc", "elemental"}; 2446 PetscInt alg, nalg = PetscDefined(HAVE_ELEMENTAL) ? 2 : 1; 2447 PetscBool flg = PETSC_FALSE; 2448 2449 PetscFunctionBegin; 2450 /* Set default algorithm */ 2451 alg = 0; /* default is PETSc */ 2452 PetscCall(PetscStrcmp(product->alg, "default", &flg)); 2453 if (flg) PetscCall(MatProductSetAlgorithm(C, algTypes[alg])); 2454 2455 /* Get runtime option */ 2456 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_AB", "Mat"); 2457 PetscCall(PetscOptionsEList("-mat_product_algorithm", "Algorithmic approach", "MatProduct_AB", algTypes, nalg, algTypes[alg], &alg, &flg)); 2458 PetscOptionsEnd(); 2459 if (flg) PetscCall(MatProductSetAlgorithm(C, algTypes[alg])); 2460 2461 C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIDense; 2462 C->ops->productsymbolic = MatProductSymbolic_AB; 2463 PetscFunctionReturn(PETSC_SUCCESS); 2464 } 2465 2466 static PetscErrorCode MatProductSetFromOptions_MPIDense_AtB(Mat C) 2467 { 2468 Mat_Product *product = C->product; 2469 Mat A = product->A, B = product->B; 2470 2471 PetscFunctionBegin; 2472 PetscCheck(A->rmap->rstart == B->rmap->rstart && A->rmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", 2473 A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend); 2474 C->ops->transposematmultsymbolic = MatTransposeMatMultSymbolic_MPIDense_MPIDense; 2475 C->ops->productsymbolic = MatProductSymbolic_AtB; 2476 PetscFunctionReturn(PETSC_SUCCESS); 2477 } 2478 2479 static PetscErrorCode MatProductSetFromOptions_MPIDense_ABt(Mat C) 2480 { 2481 Mat_Product *product = C->product; 2482 const char *algTypes[2] = {"allgatherv", "cyclic"}; 2483 PetscInt alg, nalg = 2; 2484 PetscBool flg = PETSC_FALSE; 2485 2486 PetscFunctionBegin; 2487 /* Set default algorithm */ 2488 alg = 0; /* default is allgatherv */ 2489 PetscCall(PetscStrcmp(product->alg, "default", &flg)); 2490 if (flg) PetscCall(MatProductSetAlgorithm(C, algTypes[alg])); 2491 2492 /* Get runtime option */ 2493 if (product->api_user) { 2494 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatMatTransposeMult", "Mat"); 2495 PetscCall(PetscOptionsEList("-matmattransmult_mpidense_mpidense_via", "Algorithmic approach", "MatMatTransposeMult", algTypes, nalg, algTypes[alg], &alg, &flg)); 2496 PetscOptionsEnd(); 2497 } else { 2498 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_ABt", "Mat"); 2499 PetscCall(PetscOptionsEList("-mat_product_algorithm", "Algorithmic approach", "MatProduct_ABt", algTypes, nalg, algTypes[alg], &alg, &flg)); 2500 PetscOptionsEnd(); 2501 } 2502 if (flg) PetscCall(MatProductSetAlgorithm(C, algTypes[alg])); 2503 2504 C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_MPIDense_MPIDense; 2505 C->ops->productsymbolic = MatProductSymbolic_ABt; 2506 PetscFunctionReturn(PETSC_SUCCESS); 2507 } 2508 2509 static PetscErrorCode MatProductSetFromOptions_MPIDense(Mat C) 2510 { 2511 Mat_Product *product = C->product; 2512 2513 PetscFunctionBegin; 2514 switch (product->type) { 2515 case MATPRODUCT_AB: 2516 PetscCall(MatProductSetFromOptions_MPIDense_AB(C)); 2517 break; 2518 case MATPRODUCT_AtB: 2519 PetscCall(MatProductSetFromOptions_MPIDense_AtB(C)); 2520 break; 2521 case MATPRODUCT_ABt: 2522 PetscCall(MatProductSetFromOptions_MPIDense_ABt(C)); 2523 break; 2524 default: 2525 break; 2526 } 2527 PetscFunctionReturn(PETSC_SUCCESS); 2528 } 2529