1 /* 2 Basic functions for basic parallel dense matrices. 3 Portions of this code are under: 4 Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. 5 */ 6 7 #include <../src/mat/impls/dense/mpi/mpidense.h> /*I "petscmat.h" I*/ 8 #include <../src/mat/impls/aij/mpi/mpiaij.h> 9 #include <petscblaslapack.h> 10 11 /*@ 12 MatDenseGetLocalMatrix - For a `MATMPIDENSE` or `MATSEQDENSE` matrix returns the sequential 13 matrix that represents the operator. For sequential matrices it returns itself. 14 15 Input Parameter: 16 . A - the sequential or MPI `MATDENSE` matrix 17 18 Output Parameter: 19 . B - the inner matrix 20 21 Level: intermediate 22 23 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MATMPIDENSE`, `MATSEQDENSE` 24 @*/ 25 PetscErrorCode MatDenseGetLocalMatrix(Mat A, Mat *B) 26 { 27 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 28 PetscBool flg; 29 30 PetscFunctionBegin; 31 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 32 PetscAssertPointer(B, 2); 33 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIDENSE, &flg)); 34 if (flg) *B = mat->A; 35 else { 36 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQDENSE, &flg)); 37 PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Not for matrix type %s", ((PetscObject)A)->type_name); 38 *B = A; 39 } 40 PetscFunctionReturn(PETSC_SUCCESS); 41 } 42 43 static PetscErrorCode MatCopy_MPIDense(Mat A, Mat B, MatStructure s) 44 { 45 Mat_MPIDense *Amat = (Mat_MPIDense *)A->data; 46 Mat_MPIDense *Bmat = (Mat_MPIDense *)B->data; 47 48 PetscFunctionBegin; 49 PetscCall(MatCopy(Amat->A, Bmat->A, s)); 50 PetscFunctionReturn(PETSC_SUCCESS); 51 } 52 53 PetscErrorCode MatShift_MPIDense(Mat A, PetscScalar alpha) 54 { 55 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 56 PetscInt j, lda, rstart = A->rmap->rstart, rend = A->rmap->rend, rend2; 57 PetscScalar *v; 58 59 PetscFunctionBegin; 60 PetscCall(MatDenseGetArray(mat->A, &v)); 61 PetscCall(MatDenseGetLDA(mat->A, &lda)); 62 rend2 = PetscMin(rend, A->cmap->N); 63 if (rend2 > rstart) { 64 for (j = rstart; j < rend2; j++) v[j - rstart + j * lda] += alpha; 65 PetscCall(PetscLogFlops(rend2 - rstart)); 66 } 67 PetscCall(MatDenseRestoreArray(mat->A, &v)); 68 PetscFunctionReturn(PETSC_SUCCESS); 69 } 70 71 static PetscErrorCode MatGetRow_MPIDense(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 72 { 73 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 74 PetscInt lrow, rstart = A->rmap->rstart, rend = A->rmap->rend; 75 76 PetscFunctionBegin; 77 PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "only local rows"); 78 lrow = row - rstart; 79 PetscCall(MatGetRow(mat->A, lrow, nz, (const PetscInt **)idx, (const PetscScalar **)v)); 80 PetscFunctionReturn(PETSC_SUCCESS); 81 } 82 83 static PetscErrorCode MatRestoreRow_MPIDense(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 84 { 85 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 86 PetscInt lrow, rstart = A->rmap->rstart, rend = A->rmap->rend; 87 88 PetscFunctionBegin; 89 PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "only local rows"); 90 lrow = row - rstart; 91 PetscCall(MatRestoreRow(mat->A, lrow, nz, (const PetscInt **)idx, (const PetscScalar **)v)); 92 PetscFunctionReturn(PETSC_SUCCESS); 93 } 94 95 static PetscErrorCode MatGetDiagonalBlock_MPIDense(Mat A, Mat *a) 96 { 97 Mat_MPIDense *mdn = (Mat_MPIDense *)A->data; 98 PetscInt m = A->rmap->n, rstart = A->rmap->rstart; 99 PetscScalar *array; 100 MPI_Comm comm; 101 PetscBool flg; 102 Mat B; 103 104 PetscFunctionBegin; 105 PetscCall(MatHasCongruentLayouts(A, &flg)); 106 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only square matrices supported."); 107 PetscCall(PetscObjectQuery((PetscObject)A, "DiagonalBlock", (PetscObject *)&B)); 108 if (!B) { /* This should use MatDenseGetSubMatrix (not create), but we would need a call like MatRestoreDiagonalBlock */ 109 #if PetscDefined(HAVE_CUDA) 110 PetscCall(PetscObjectTypeCompare((PetscObject)mdn->A, MATSEQDENSECUDA, &flg)); 111 PetscCheck(!flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature", MATSEQDENSECUDA); 112 #elif PetscDefined(HAVE_HIP) 113 PetscCall(PetscObjectTypeCompare((PetscObject)mdn->A, MATSEQDENSEHIP, &flg)); 114 PetscCheck(!flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature", MATSEQDENSEHIP); 115 #endif 116 PetscCall(PetscObjectGetComm((PetscObject)mdn->A, &comm)); 117 PetscCall(MatCreate(comm, &B)); 118 PetscCall(MatSetSizes(B, m, m, m, m)); 119 PetscCall(MatSetType(B, ((PetscObject)mdn->A)->type_name)); 120 PetscCall(MatDenseGetArrayRead(mdn->A, (const PetscScalar **)&array)); 121 PetscCall(MatSeqDenseSetPreallocation(B, array + m * rstart)); 122 PetscCall(MatDenseRestoreArrayRead(mdn->A, (const PetscScalar **)&array)); 123 PetscCall(PetscObjectCompose((PetscObject)A, "DiagonalBlock", (PetscObject)B)); 124 *a = B; 125 PetscCall(MatDestroy(&B)); 126 } else *a = B; 127 PetscFunctionReturn(PETSC_SUCCESS); 128 } 129 130 static PetscErrorCode MatSetValues_MPIDense(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], const PetscScalar v[], InsertMode addv) 131 { 132 Mat_MPIDense *A = (Mat_MPIDense *)mat->data; 133 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend, row; 134 PetscBool roworiented = A->roworiented; 135 136 PetscFunctionBegin; 137 for (i = 0; i < m; i++) { 138 if (idxm[i] < 0) continue; 139 PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large"); 140 if (idxm[i] >= rstart && idxm[i] < rend) { 141 row = idxm[i] - rstart; 142 if (roworiented) { 143 PetscCall(MatSetValues(A->A, 1, &row, n, idxn, v ? v + i * n : NULL, addv)); 144 } else { 145 for (j = 0; j < n; j++) { 146 if (idxn[j] < 0) continue; 147 PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large"); 148 PetscCall(MatSetValues(A->A, 1, &row, 1, &idxn[j], v ? v + i + j * m : NULL, addv)); 149 } 150 } 151 } else if (!A->donotstash) { 152 mat->assembled = PETSC_FALSE; 153 if (roworiented) { 154 PetscCall(MatStashValuesRow_Private(&mat->stash, idxm[i], n, idxn, v ? v + i * n : NULL, PETSC_FALSE)); 155 } else { 156 PetscCall(MatStashValuesCol_Private(&mat->stash, idxm[i], n, idxn, v ? v + i : NULL, m, PETSC_FALSE)); 157 } 158 } 159 } 160 PetscFunctionReturn(PETSC_SUCCESS); 161 } 162 163 static PetscErrorCode MatGetValues_MPIDense(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[]) 164 { 165 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 166 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend, row; 167 168 PetscFunctionBegin; 169 for (i = 0; i < m; i++) { 170 if (idxm[i] < 0) continue; /* negative row */ 171 PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large"); 172 if (idxm[i] >= rstart && idxm[i] < rend) { 173 row = idxm[i] - rstart; 174 for (j = 0; j < n; j++) { 175 if (idxn[j] < 0) continue; /* negative column */ 176 PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large"); 177 PetscCall(MatGetValues(mdn->A, 1, &row, 1, &idxn[j], v + i * n + j)); 178 } 179 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported"); 180 } 181 PetscFunctionReturn(PETSC_SUCCESS); 182 } 183 184 static PetscErrorCode MatDenseGetLDA_MPIDense(Mat A, PetscInt *lda) 185 { 186 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 187 188 PetscFunctionBegin; 189 PetscCall(MatDenseGetLDA(a->A, lda)); 190 PetscFunctionReturn(PETSC_SUCCESS); 191 } 192 193 static PetscErrorCode MatDenseSetLDA_MPIDense(Mat A, PetscInt lda) 194 { 195 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 196 MatType mtype = MATSEQDENSE; 197 198 PetscFunctionBegin; 199 if (!a->A) { 200 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 201 PetscCall(PetscLayoutSetUp(A->rmap)); 202 PetscCall(PetscLayoutSetUp(A->cmap)); 203 PetscCall(MatCreate(PETSC_COMM_SELF, &a->A)); 204 PetscCall(MatSetSizes(a->A, A->rmap->n, A->cmap->N, A->rmap->n, A->cmap->N)); 205 #if PetscDefined(HAVE_CUDA) 206 PetscBool iscuda; 207 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIDENSECUDA, &iscuda)); 208 if (iscuda) mtype = MATSEQDENSECUDA; 209 #elif PetscDefined(HAVE_HIP) 210 PetscBool iship; 211 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIDENSEHIP, &iship)); 212 if (iship) mtype = MATSEQDENSEHIP; 213 #endif 214 PetscCall(MatSetType(a->A, mtype)); 215 } 216 PetscCall(MatDenseSetLDA(a->A, lda)); 217 PetscFunctionReturn(PETSC_SUCCESS); 218 } 219 220 static PetscErrorCode MatDenseGetArray_MPIDense(Mat A, PetscScalar **array) 221 { 222 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 223 224 PetscFunctionBegin; 225 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 226 PetscCall(MatDenseGetArray(a->A, array)); 227 PetscFunctionReturn(PETSC_SUCCESS); 228 } 229 230 static PetscErrorCode MatDenseGetArrayRead_MPIDense(Mat A, PetscScalar **array) 231 { 232 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 233 234 PetscFunctionBegin; 235 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 236 PetscCall(MatDenseGetArrayRead(a->A, (const PetscScalar **)array)); 237 PetscFunctionReturn(PETSC_SUCCESS); 238 } 239 240 static PetscErrorCode MatDenseGetArrayWrite_MPIDense(Mat A, PetscScalar **array) 241 { 242 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 243 244 PetscFunctionBegin; 245 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 246 PetscCall(MatDenseGetArrayWrite(a->A, array)); 247 PetscFunctionReturn(PETSC_SUCCESS); 248 } 249 250 static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A, const PetscScalar *array) 251 { 252 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 253 254 PetscFunctionBegin; 255 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 256 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 257 PetscCall(MatDensePlaceArray(a->A, array)); 258 PetscFunctionReturn(PETSC_SUCCESS); 259 } 260 261 static PetscErrorCode MatDenseResetArray_MPIDense(Mat A) 262 { 263 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 264 265 PetscFunctionBegin; 266 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 267 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 268 PetscCall(MatDenseResetArray(a->A)); 269 PetscFunctionReturn(PETSC_SUCCESS); 270 } 271 272 static PetscErrorCode MatDenseReplaceArray_MPIDense(Mat A, const PetscScalar *array) 273 { 274 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 275 276 PetscFunctionBegin; 277 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 278 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 279 PetscCall(MatDenseReplaceArray(a->A, array)); 280 PetscFunctionReturn(PETSC_SUCCESS); 281 } 282 283 static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A, IS isrow, IS iscol, MatReuse scall, Mat *B) 284 { 285 Mat_MPIDense *mat = (Mat_MPIDense *)A->data, *newmatd; 286 PetscInt lda, i, j, rstart, rend, nrows, ncols, Ncols, nlrows, nlcols; 287 const PetscInt *irow, *icol; 288 const PetscScalar *v; 289 PetscScalar *bv; 290 Mat newmat; 291 IS iscol_local; 292 MPI_Comm comm_is, comm_mat; 293 294 PetscFunctionBegin; 295 PetscCall(PetscObjectGetComm((PetscObject)A, &comm_mat)); 296 PetscCall(PetscObjectGetComm((PetscObject)iscol, &comm_is)); 297 PetscCheck(comm_mat == comm_is, PETSC_COMM_SELF, PETSC_ERR_ARG_NOTSAMECOMM, "IS communicator must match matrix communicator"); 298 299 PetscCall(ISAllGather(iscol, &iscol_local)); 300 PetscCall(ISGetIndices(isrow, &irow)); 301 PetscCall(ISGetIndices(iscol_local, &icol)); 302 PetscCall(ISGetLocalSize(isrow, &nrows)); 303 PetscCall(ISGetLocalSize(iscol, &ncols)); 304 PetscCall(ISGetSize(iscol, &Ncols)); /* global number of columns, size of iscol_local */ 305 306 /* No parallel redistribution currently supported! Should really check each index set 307 to confirm that it is OK. ... Currently supports only submatrix same partitioning as 308 original matrix! */ 309 310 PetscCall(MatGetLocalSize(A, &nlrows, &nlcols)); 311 PetscCall(MatGetOwnershipRange(A, &rstart, &rend)); 312 313 /* Check submatrix call */ 314 if (scall == MAT_REUSE_MATRIX) { 315 /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */ 316 /* Really need to test rows and column sizes! */ 317 newmat = *B; 318 } else { 319 /* Create and fill new matrix */ 320 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &newmat)); 321 PetscCall(MatSetSizes(newmat, nrows, ncols, PETSC_DECIDE, Ncols)); 322 PetscCall(MatSetType(newmat, ((PetscObject)A)->type_name)); 323 PetscCall(MatMPIDenseSetPreallocation(newmat, NULL)); 324 } 325 326 /* Now extract the data pointers and do the copy, column at a time */ 327 newmatd = (Mat_MPIDense *)newmat->data; 328 PetscCall(MatDenseGetArray(newmatd->A, &bv)); 329 PetscCall(MatDenseGetArrayRead(mat->A, &v)); 330 PetscCall(MatDenseGetLDA(mat->A, &lda)); 331 for (i = 0; i < Ncols; i++) { 332 const PetscScalar *av = v + lda * icol[i]; 333 for (j = 0; j < nrows; j++) *bv++ = av[irow[j] - rstart]; 334 } 335 PetscCall(MatDenseRestoreArrayRead(mat->A, &v)); 336 PetscCall(MatDenseRestoreArray(newmatd->A, &bv)); 337 338 /* Assemble the matrices so that the correct flags are set */ 339 PetscCall(MatAssemblyBegin(newmat, MAT_FINAL_ASSEMBLY)); 340 PetscCall(MatAssemblyEnd(newmat, MAT_FINAL_ASSEMBLY)); 341 342 /* Free work space */ 343 PetscCall(ISRestoreIndices(isrow, &irow)); 344 PetscCall(ISRestoreIndices(iscol_local, &icol)); 345 PetscCall(ISDestroy(&iscol_local)); 346 *B = newmat; 347 PetscFunctionReturn(PETSC_SUCCESS); 348 } 349 350 static PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A, PetscScalar **array) 351 { 352 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 353 354 PetscFunctionBegin; 355 PetscCall(MatDenseRestoreArray(a->A, array)); 356 PetscFunctionReturn(PETSC_SUCCESS); 357 } 358 359 static PetscErrorCode MatDenseRestoreArrayRead_MPIDense(Mat A, PetscScalar **array) 360 { 361 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 362 363 PetscFunctionBegin; 364 PetscCall(MatDenseRestoreArrayRead(a->A, (const PetscScalar **)array)); 365 PetscFunctionReturn(PETSC_SUCCESS); 366 } 367 368 static PetscErrorCode MatDenseRestoreArrayWrite_MPIDense(Mat A, PetscScalar **array) 369 { 370 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 371 372 PetscFunctionBegin; 373 PetscCall(MatDenseRestoreArrayWrite(a->A, array)); 374 PetscFunctionReturn(PETSC_SUCCESS); 375 } 376 377 static PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat, MatAssemblyType mode) 378 { 379 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 380 PetscInt nstash, reallocs; 381 382 PetscFunctionBegin; 383 if (mdn->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS); 384 385 PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range)); 386 PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs)); 387 PetscCall(PetscInfo(mdn->A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 388 PetscFunctionReturn(PETSC_SUCCESS); 389 } 390 391 static PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat, MatAssemblyType mode) 392 { 393 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 394 PetscInt i, *row, *col, flg, j, rstart, ncols; 395 PetscMPIInt n; 396 PetscScalar *val; 397 398 PetscFunctionBegin; 399 if (!mdn->donotstash && !mat->nooffprocentries) { 400 /* wait on receives */ 401 while (1) { 402 PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg)); 403 if (!flg) break; 404 405 for (i = 0; i < n;) { 406 /* Now identify the consecutive vals belonging to the same row */ 407 for (j = i, rstart = row[j]; j < n; j++) { 408 if (row[j] != rstart) break; 409 } 410 if (j < n) ncols = j - i; 411 else ncols = n - i; 412 /* Now assemble all these values with a single function call */ 413 PetscCall(MatSetValues_MPIDense(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode)); 414 i = j; 415 } 416 } 417 PetscCall(MatStashScatterEnd_Private(&mat->stash)); 418 } 419 420 PetscCall(MatAssemblyBegin(mdn->A, mode)); 421 PetscCall(MatAssemblyEnd(mdn->A, mode)); 422 PetscFunctionReturn(PETSC_SUCCESS); 423 } 424 425 static PetscErrorCode MatZeroEntries_MPIDense(Mat A) 426 { 427 Mat_MPIDense *l = (Mat_MPIDense *)A->data; 428 429 PetscFunctionBegin; 430 PetscCall(MatZeroEntries(l->A)); 431 PetscFunctionReturn(PETSC_SUCCESS); 432 } 433 434 static PetscErrorCode MatZeroRows_MPIDense(Mat A, PetscInt n, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 435 { 436 Mat_MPIDense *l = (Mat_MPIDense *)A->data; 437 PetscInt i, len, *lrows; 438 439 PetscFunctionBegin; 440 /* get locally owned rows */ 441 PetscCall(PetscLayoutMapLocal(A->rmap, n, rows, &len, &lrows, NULL)); 442 /* fix right-hand side if needed */ 443 if (x && b) { 444 const PetscScalar *xx; 445 PetscScalar *bb; 446 447 PetscCall(VecGetArrayRead(x, &xx)); 448 PetscCall(VecGetArrayWrite(b, &bb)); 449 for (i = 0; i < len; ++i) bb[lrows[i]] = diag * xx[lrows[i]]; 450 PetscCall(VecRestoreArrayRead(x, &xx)); 451 PetscCall(VecRestoreArrayWrite(b, &bb)); 452 } 453 PetscCall(MatZeroRows(l->A, len, lrows, 0.0, NULL, NULL)); 454 if (diag != 0.0) { 455 Vec d; 456 457 PetscCall(MatCreateVecs(A, NULL, &d)); 458 PetscCall(VecSet(d, diag)); 459 PetscCall(MatDiagonalSet(A, d, INSERT_VALUES)); 460 PetscCall(VecDestroy(&d)); 461 } 462 PetscCall(PetscFree(lrows)); 463 PetscFunctionReturn(PETSC_SUCCESS); 464 } 465 466 static PetscErrorCode MatMult_MPIDense(Mat mat, Vec xx, Vec yy) 467 { 468 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 469 const PetscScalar *ax; 470 PetscScalar *ay; 471 PetscMemType axmtype, aymtype; 472 473 PetscFunctionBegin; 474 if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat)); 475 PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype)); 476 PetscCall(VecGetArrayAndMemType(mdn->lvec, &ay, &aymtype)); 477 PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE)); 478 PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE)); 479 PetscCall(VecRestoreArrayAndMemType(mdn->lvec, &ay)); 480 PetscCall(VecRestoreArrayReadAndMemType(xx, &ax)); 481 PetscCall((*mdn->A->ops->mult)(mdn->A, mdn->lvec, yy)); 482 PetscFunctionReturn(PETSC_SUCCESS); 483 } 484 485 static PetscErrorCode MatMultAdd_MPIDense(Mat mat, Vec xx, Vec yy, Vec zz) 486 { 487 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 488 const PetscScalar *ax; 489 PetscScalar *ay; 490 PetscMemType axmtype, aymtype; 491 492 PetscFunctionBegin; 493 if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat)); 494 PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype)); 495 PetscCall(VecGetArrayAndMemType(mdn->lvec, &ay, &aymtype)); 496 PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE)); 497 PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE)); 498 PetscCall(VecRestoreArrayAndMemType(mdn->lvec, &ay)); 499 PetscCall(VecRestoreArrayReadAndMemType(xx, &ax)); 500 PetscCall((*mdn->A->ops->multadd)(mdn->A, mdn->lvec, yy, zz)); 501 PetscFunctionReturn(PETSC_SUCCESS); 502 } 503 504 static PetscErrorCode MatMultTransposeKernel_MPIDense(Mat A, Vec xx, Vec yy, PetscBool herm) 505 { 506 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 507 const PetscScalar *ax; 508 PetscScalar *ay; 509 PetscMemType axmtype, aymtype; 510 511 PetscFunctionBegin; 512 if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A)); 513 PetscCall(VecSet(yy, 0.0)); 514 if (herm) PetscCall((*a->A->ops->multhermitiantranspose)(a->A, xx, a->lvec)); 515 else PetscCall((*a->A->ops->multtranspose)(a->A, xx, a->lvec)); 516 PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype)); 517 PetscCall(VecGetArrayAndMemType(yy, &ay, &aymtype)); 518 PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM)); 519 PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM)); 520 PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax)); 521 PetscCall(VecRestoreArrayAndMemType(yy, &ay)); 522 PetscFunctionReturn(PETSC_SUCCESS); 523 } 524 525 static PetscErrorCode MatMultTransposeAddKernel_MPIDense(Mat A, Vec xx, Vec yy, Vec zz, PetscBool herm) 526 { 527 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 528 const PetscScalar *ax; 529 PetscScalar *ay; 530 PetscMemType axmtype, aymtype; 531 532 PetscFunctionBegin; 533 if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A)); 534 PetscCall(VecCopy(yy, zz)); 535 if (herm) PetscCall((*a->A->ops->multhermitiantranspose)(a->A, xx, a->lvec)); 536 else PetscCall((*a->A->ops->multtranspose)(a->A, xx, a->lvec)); 537 PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype)); 538 PetscCall(VecGetArrayAndMemType(zz, &ay, &aymtype)); 539 PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM)); 540 PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM)); 541 PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax)); 542 PetscCall(VecRestoreArrayAndMemType(zz, &ay)); 543 PetscFunctionReturn(PETSC_SUCCESS); 544 } 545 546 static PetscErrorCode MatMultTranspose_MPIDense(Mat A, Vec xx, Vec yy) 547 { 548 PetscFunctionBegin; 549 PetscCall(MatMultTransposeKernel_MPIDense(A, xx, yy, PETSC_FALSE)); 550 PetscFunctionReturn(PETSC_SUCCESS); 551 } 552 553 static PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A, Vec xx, Vec yy, Vec zz) 554 { 555 PetscFunctionBegin; 556 PetscCall(MatMultTransposeAddKernel_MPIDense(A, xx, yy, zz, PETSC_FALSE)); 557 PetscFunctionReturn(PETSC_SUCCESS); 558 } 559 560 static PetscErrorCode MatMultHermitianTranspose_MPIDense(Mat A, Vec xx, Vec yy) 561 { 562 PetscFunctionBegin; 563 PetscCall(MatMultTransposeKernel_MPIDense(A, xx, yy, PETSC_TRUE)); 564 PetscFunctionReturn(PETSC_SUCCESS); 565 } 566 567 static PetscErrorCode MatMultHermitianTransposeAdd_MPIDense(Mat A, Vec xx, Vec yy, Vec zz) 568 { 569 PetscFunctionBegin; 570 PetscCall(MatMultTransposeAddKernel_MPIDense(A, xx, yy, zz, PETSC_TRUE)); 571 PetscFunctionReturn(PETSC_SUCCESS); 572 } 573 574 PetscErrorCode MatGetDiagonal_MPIDense(Mat A, Vec v) 575 { 576 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 577 PetscInt lda, len, i, nl, ng, m = A->rmap->n, radd; 578 PetscScalar *x; 579 const PetscScalar *av; 580 581 PetscFunctionBegin; 582 PetscCall(VecGetArray(v, &x)); 583 PetscCall(VecGetSize(v, &ng)); 584 PetscCheck(ng == A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming mat and vec"); 585 PetscCall(VecGetLocalSize(v, &nl)); 586 len = PetscMin(a->A->rmap->n, a->A->cmap->n); 587 radd = A->rmap->rstart * m; 588 PetscCall(MatDenseGetArrayRead(a->A, &av)); 589 PetscCall(MatDenseGetLDA(a->A, &lda)); 590 for (i = 0; i < len; i++) x[i] = av[radd + i * lda + i]; 591 PetscCall(MatDenseRestoreArrayRead(a->A, &av)); 592 PetscCall(PetscArrayzero(x + i, nl - i)); 593 PetscCall(VecRestoreArray(v, &x)); 594 PetscFunctionReturn(PETSC_SUCCESS); 595 } 596 597 static PetscErrorCode MatDestroy_MPIDense(Mat mat) 598 { 599 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 600 601 PetscFunctionBegin; 602 PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N)); 603 PetscCall(MatStashDestroy_Private(&mat->stash)); 604 PetscCheck(!mdn->vecinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 605 PetscCheck(!mdn->matinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 606 PetscCall(MatDestroy(&mdn->A)); 607 PetscCall(VecDestroy(&mdn->lvec)); 608 PetscCall(PetscSFDestroy(&mdn->Mvctx)); 609 PetscCall(VecDestroy(&mdn->cvec)); 610 PetscCall(MatDestroy(&mdn->cmat)); 611 612 PetscCall(PetscFree(mat->data)); 613 PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL)); 614 615 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetLDA_C", NULL)); 616 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseSetLDA_C", NULL)); 617 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArray_C", NULL)); 618 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArray_C", NULL)); 619 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayRead_C", NULL)); 620 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayRead_C", NULL)); 621 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayWrite_C", NULL)); 622 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayWrite_C", NULL)); 623 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDensePlaceArray_C", NULL)); 624 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseResetArray_C", NULL)); 625 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseReplaceArray_C", NULL)); 626 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", NULL)); 627 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpiaij_C", NULL)); 628 #if defined(PETSC_HAVE_ELEMENTAL) 629 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_elemental_C", NULL)); 630 #endif 631 #if defined(PETSC_HAVE_SCALAPACK) 632 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_scalapack_C", NULL)); 633 #endif 634 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIDenseSetPreallocation_C", NULL)); 635 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidense_C", NULL)); 636 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaij_C", NULL)); 637 #if defined(PETSC_HAVE_CUDA) 638 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidense_C", NULL)); 639 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijcusparse_C", NULL)); 640 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensecuda_C", NULL)); 641 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidensecuda_mpidense_C", NULL)); 642 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidensecuda_C", NULL)); 643 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C", NULL)); 644 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensecuda_mpiaij_C", NULL)); 645 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C", NULL)); 646 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArray_C", NULL)); 647 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArrayRead_C", NULL)); 648 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArrayWrite_C", NULL)); 649 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArray_C", NULL)); 650 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArrayRead_C", NULL)); 651 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArrayWrite_C", NULL)); 652 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAPlaceArray_C", NULL)); 653 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAResetArray_C", NULL)); 654 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAReplaceArray_C", NULL)); 655 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDASetPreallocation_C", NULL)); 656 #endif 657 #if defined(PETSC_HAVE_HIP) 658 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidense_C", NULL)); 659 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijhipsparse_C", NULL)); 660 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensehip_C", NULL)); 661 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidensehip_mpidense_C", NULL)); 662 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidensehip_C", NULL)); 663 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidensehip_C", NULL)); 664 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensehip_mpiaij_C", NULL)); 665 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensehip_mpiaijhipsparse_C", NULL)); 666 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArray_C", NULL)); 667 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArrayRead_C", NULL)); 668 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArrayWrite_C", NULL)); 669 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArray_C", NULL)); 670 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArrayRead_C", NULL)); 671 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArrayWrite_C", NULL)); 672 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPPlaceArray_C", NULL)); 673 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPResetArray_C", NULL)); 674 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPReplaceArray_C", NULL)); 675 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPSetPreallocation_C", NULL)); 676 #endif 677 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumn_C", NULL)); 678 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumn_C", NULL)); 679 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", NULL)); 680 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", NULL)); 681 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", NULL)); 682 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", NULL)); 683 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", NULL)); 684 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", NULL)); 685 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetSubMatrix_C", NULL)); 686 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreSubMatrix_C", NULL)); 687 688 PetscCall(PetscObjectCompose((PetscObject)mat, "DiagonalBlock", NULL)); 689 PetscFunctionReturn(PETSC_SUCCESS); 690 } 691 692 #include <petscdraw.h> 693 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat, PetscViewer viewer) 694 { 695 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 696 PetscMPIInt rank; 697 PetscViewerType vtype; 698 PetscBool iascii, isdraw; 699 PetscViewer sviewer; 700 PetscViewerFormat format; 701 702 PetscFunctionBegin; 703 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 704 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 705 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 706 if (iascii) { 707 PetscCall(PetscViewerGetType(viewer, &vtype)); 708 PetscCall(PetscViewerGetFormat(viewer, &format)); 709 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 710 MatInfo info; 711 PetscCall(MatGetInfo(mat, MAT_LOCAL, &info)); 712 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 713 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %" PetscInt_FMT " \n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, 714 (PetscInt)info.memory)); 715 PetscCall(PetscViewerFlush(viewer)); 716 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 717 if (mdn->Mvctx) PetscCall(PetscSFView(mdn->Mvctx, viewer)); 718 PetscFunctionReturn(PETSC_SUCCESS); 719 } else if (format == PETSC_VIEWER_ASCII_INFO) { 720 PetscFunctionReturn(PETSC_SUCCESS); 721 } 722 } else if (isdraw) { 723 PetscDraw draw; 724 PetscBool isnull; 725 726 PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw)); 727 PetscCall(PetscDrawIsNull(draw, &isnull)); 728 if (isnull) PetscFunctionReturn(PETSC_SUCCESS); 729 } 730 731 { 732 /* assemble the entire matrix onto first processor. */ 733 Mat A; 734 PetscInt M = mat->rmap->N, N = mat->cmap->N, m, row, i, nz; 735 PetscInt *cols; 736 PetscScalar *vals; 737 738 PetscCall(MatCreate(PetscObjectComm((PetscObject)mat), &A)); 739 if (rank == 0) { 740 PetscCall(MatSetSizes(A, M, N, M, N)); 741 } else { 742 PetscCall(MatSetSizes(A, 0, 0, M, N)); 743 } 744 /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */ 745 PetscCall(MatSetType(A, MATMPIDENSE)); 746 PetscCall(MatMPIDenseSetPreallocation(A, NULL)); 747 748 /* Copy the matrix ... This isn't the most efficient means, 749 but it's quick for now */ 750 A->insertmode = INSERT_VALUES; 751 752 row = mat->rmap->rstart; 753 m = mdn->A->rmap->n; 754 for (i = 0; i < m; i++) { 755 PetscCall(MatGetRow_MPIDense(mat, row, &nz, &cols, &vals)); 756 PetscCall(MatSetValues_MPIDense(A, 1, &row, nz, cols, vals, INSERT_VALUES)); 757 PetscCall(MatRestoreRow_MPIDense(mat, row, &nz, &cols, &vals)); 758 row++; 759 } 760 761 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 762 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 763 PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 764 if (rank == 0) { 765 PetscCall(PetscObjectSetName((PetscObject)((Mat_MPIDense *)A->data)->A, ((PetscObject)mat)->name)); 766 PetscCall(MatView_SeqDense(((Mat_MPIDense *)A->data)->A, sviewer)); 767 } 768 PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 769 PetscCall(MatDestroy(&A)); 770 } 771 PetscFunctionReturn(PETSC_SUCCESS); 772 } 773 774 static PetscErrorCode MatView_MPIDense(Mat mat, PetscViewer viewer) 775 { 776 PetscBool iascii, isbinary, isdraw, issocket; 777 778 PetscFunctionBegin; 779 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 780 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 781 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket)); 782 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 783 784 if (iascii || issocket || isdraw) { 785 PetscCall(MatView_MPIDense_ASCIIorDraworSocket(mat, viewer)); 786 } else if (isbinary) PetscCall(MatView_Dense_Binary(mat, viewer)); 787 PetscFunctionReturn(PETSC_SUCCESS); 788 } 789 790 static PetscErrorCode MatGetInfo_MPIDense(Mat A, MatInfoType flag, MatInfo *info) 791 { 792 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 793 Mat mdn = mat->A; 794 PetscLogDouble isend[5], irecv[5]; 795 796 PetscFunctionBegin; 797 info->block_size = 1.0; 798 799 PetscCall(MatGetInfo(mdn, MAT_LOCAL, info)); 800 801 isend[0] = info->nz_used; 802 isend[1] = info->nz_allocated; 803 isend[2] = info->nz_unneeded; 804 isend[3] = info->memory; 805 isend[4] = info->mallocs; 806 if (flag == MAT_LOCAL) { 807 info->nz_used = isend[0]; 808 info->nz_allocated = isend[1]; 809 info->nz_unneeded = isend[2]; 810 info->memory = isend[3]; 811 info->mallocs = isend[4]; 812 } else if (flag == MAT_GLOBAL_MAX) { 813 PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)A))); 814 815 info->nz_used = irecv[0]; 816 info->nz_allocated = irecv[1]; 817 info->nz_unneeded = irecv[2]; 818 info->memory = irecv[3]; 819 info->mallocs = irecv[4]; 820 } else if (flag == MAT_GLOBAL_SUM) { 821 PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)A))); 822 823 info->nz_used = irecv[0]; 824 info->nz_allocated = irecv[1]; 825 info->nz_unneeded = irecv[2]; 826 info->memory = irecv[3]; 827 info->mallocs = irecv[4]; 828 } 829 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 830 info->fill_ratio_needed = 0; 831 info->factor_mallocs = 0; 832 PetscFunctionReturn(PETSC_SUCCESS); 833 } 834 835 static PetscErrorCode MatSetOption_MPIDense(Mat A, MatOption op, PetscBool flg) 836 { 837 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 838 839 PetscFunctionBegin; 840 switch (op) { 841 case MAT_NEW_NONZERO_LOCATIONS: 842 case MAT_NEW_NONZERO_LOCATION_ERR: 843 case MAT_NEW_NONZERO_ALLOCATION_ERR: 844 MatCheckPreallocated(A, 1); 845 PetscCall(MatSetOption(a->A, op, flg)); 846 break; 847 case MAT_ROW_ORIENTED: 848 MatCheckPreallocated(A, 1); 849 a->roworiented = flg; 850 PetscCall(MatSetOption(a->A, op, flg)); 851 break; 852 case MAT_FORCE_DIAGONAL_ENTRIES: 853 case MAT_KEEP_NONZERO_PATTERN: 854 case MAT_USE_HASH_TABLE: 855 case MAT_SORTED_FULL: 856 PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op])); 857 break; 858 case MAT_IGNORE_OFF_PROC_ENTRIES: 859 a->donotstash = flg; 860 break; 861 case MAT_SYMMETRIC: 862 case MAT_STRUCTURALLY_SYMMETRIC: 863 case MAT_HERMITIAN: 864 case MAT_SYMMETRY_ETERNAL: 865 case MAT_STRUCTURAL_SYMMETRY_ETERNAL: 866 case MAT_SPD: 867 case MAT_IGNORE_LOWER_TRIANGULAR: 868 case MAT_IGNORE_ZERO_ENTRIES: 869 case MAT_SPD_ETERNAL: 870 /* if the diagonal matrix is square it inherits some of the properties above */ 871 PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op])); 872 break; 873 default: 874 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %s", MatOptions[op]); 875 } 876 PetscFunctionReturn(PETSC_SUCCESS); 877 } 878 879 static PetscErrorCode MatDiagonalScale_MPIDense(Mat A, Vec ll, Vec rr) 880 { 881 Mat_MPIDense *mdn = (Mat_MPIDense *)A->data; 882 const PetscScalar *l; 883 PetscScalar x, *v, *vv, *r; 884 PetscInt i, j, s2a, s3a, s2, s3, m = mdn->A->rmap->n, n = mdn->A->cmap->n, lda; 885 886 PetscFunctionBegin; 887 PetscCall(MatDenseGetArray(mdn->A, &vv)); 888 PetscCall(MatDenseGetLDA(mdn->A, &lda)); 889 PetscCall(MatGetLocalSize(A, &s2, &s3)); 890 if (ll) { 891 PetscCall(VecGetLocalSize(ll, &s2a)); 892 PetscCheck(s2a == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Left scaling vector non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT, s2a, s2); 893 PetscCall(VecGetArrayRead(ll, &l)); 894 for (i = 0; i < m; i++) { 895 x = l[i]; 896 v = vv + i; 897 for (j = 0; j < n; j++) { 898 (*v) *= x; 899 v += lda; 900 } 901 } 902 PetscCall(VecRestoreArrayRead(ll, &l)); 903 PetscCall(PetscLogFlops(1.0 * n * m)); 904 } 905 if (rr) { 906 const PetscScalar *ar; 907 908 PetscCall(VecGetLocalSize(rr, &s3a)); 909 PetscCheck(s3a == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Right scaling vec non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT ".", s3a, s3); 910 PetscCall(VecGetArrayRead(rr, &ar)); 911 if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A)); 912 PetscCall(VecGetArray(mdn->lvec, &r)); 913 PetscCall(PetscSFBcastBegin(mdn->Mvctx, MPIU_SCALAR, ar, r, MPI_REPLACE)); 914 PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ar, r, MPI_REPLACE)); 915 PetscCall(VecRestoreArrayRead(rr, &ar)); 916 for (i = 0; i < n; i++) { 917 x = r[i]; 918 v = vv + i * lda; 919 for (j = 0; j < m; j++) (*v++) *= x; 920 } 921 PetscCall(VecRestoreArray(mdn->lvec, &r)); 922 PetscCall(PetscLogFlops(1.0 * n * m)); 923 } 924 PetscCall(MatDenseRestoreArray(mdn->A, &vv)); 925 PetscFunctionReturn(PETSC_SUCCESS); 926 } 927 928 static PetscErrorCode MatNorm_MPIDense(Mat A, NormType type, PetscReal *nrm) 929 { 930 Mat_MPIDense *mdn = (Mat_MPIDense *)A->data; 931 PetscInt i, j; 932 PetscMPIInt size; 933 PetscReal sum = 0.0; 934 const PetscScalar *av, *v; 935 936 PetscFunctionBegin; 937 PetscCall(MatDenseGetArrayRead(mdn->A, &av)); 938 v = av; 939 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 940 if (size == 1) { 941 PetscCall(MatNorm(mdn->A, type, nrm)); 942 } else { 943 if (type == NORM_FROBENIUS) { 944 for (i = 0; i < mdn->A->cmap->n * mdn->A->rmap->n; i++) { 945 sum += PetscRealPart(PetscConj(*v) * (*v)); 946 v++; 947 } 948 PetscCall(MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A))); 949 *nrm = PetscSqrtReal(*nrm); 950 PetscCall(PetscLogFlops(2.0 * mdn->A->cmap->n * mdn->A->rmap->n)); 951 } else if (type == NORM_1) { 952 PetscReal *tmp, *tmp2; 953 PetscCall(PetscCalloc2(A->cmap->N, &tmp, A->cmap->N, &tmp2)); 954 *nrm = 0.0; 955 v = av; 956 for (j = 0; j < mdn->A->cmap->n; j++) { 957 for (i = 0; i < mdn->A->rmap->n; i++) { 958 tmp[j] += PetscAbsScalar(*v); 959 v++; 960 } 961 } 962 PetscCall(MPIU_Allreduce(tmp, tmp2, A->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A))); 963 for (j = 0; j < A->cmap->N; j++) { 964 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 965 } 966 PetscCall(PetscFree2(tmp, tmp2)); 967 PetscCall(PetscLogFlops(A->cmap->n * A->rmap->n)); 968 } else if (type == NORM_INFINITY) { /* max row norm */ 969 PetscReal ntemp; 970 PetscCall(MatNorm(mdn->A, type, &ntemp)); 971 PetscCall(MPIU_Allreduce(&ntemp, nrm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A))); 972 } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "No support for two norm"); 973 } 974 PetscCall(MatDenseRestoreArrayRead(mdn->A, &av)); 975 PetscFunctionReturn(PETSC_SUCCESS); 976 } 977 978 static PetscErrorCode MatTranspose_MPIDense(Mat A, MatReuse reuse, Mat *matout) 979 { 980 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 981 Mat B; 982 PetscInt M = A->rmap->N, N = A->cmap->N, m, n, *rwork, rstart = A->rmap->rstart; 983 PetscInt j, i, lda; 984 PetscScalar *v; 985 986 PetscFunctionBegin; 987 if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout)); 988 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) { 989 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 990 PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M)); 991 PetscCall(MatSetType(B, ((PetscObject)A)->type_name)); 992 PetscCall(MatMPIDenseSetPreallocation(B, NULL)); 993 } else B = *matout; 994 995 m = a->A->rmap->n; 996 n = a->A->cmap->n; 997 PetscCall(MatDenseGetArrayRead(a->A, (const PetscScalar **)&v)); 998 PetscCall(MatDenseGetLDA(a->A, &lda)); 999 PetscCall(PetscMalloc1(m, &rwork)); 1000 for (i = 0; i < m; i++) rwork[i] = rstart + i; 1001 for (j = 0; j < n; j++) { 1002 PetscCall(MatSetValues(B, 1, &j, m, rwork, v, INSERT_VALUES)); 1003 v = PetscSafePointerPlusOffset(v, lda); 1004 } 1005 PetscCall(MatDenseRestoreArrayRead(a->A, (const PetscScalar **)&v)); 1006 PetscCall(PetscFree(rwork)); 1007 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 1008 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 1009 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 1010 *matout = B; 1011 } else { 1012 PetscCall(MatHeaderMerge(A, &B)); 1013 } 1014 PetscFunctionReturn(PETSC_SUCCESS); 1015 } 1016 1017 static PetscErrorCode MatDuplicate_MPIDense(Mat, MatDuplicateOption, Mat *); 1018 PETSC_INTERN PetscErrorCode MatScale_MPIDense(Mat, PetscScalar); 1019 1020 static PetscErrorCode MatSetUp_MPIDense(Mat A) 1021 { 1022 PetscFunctionBegin; 1023 PetscCall(PetscLayoutSetUp(A->rmap)); 1024 PetscCall(PetscLayoutSetUp(A->cmap)); 1025 if (!A->preallocated) PetscCall(MatMPIDenseSetPreallocation(A, NULL)); 1026 PetscFunctionReturn(PETSC_SUCCESS); 1027 } 1028 1029 static PetscErrorCode MatAXPY_MPIDense(Mat Y, PetscScalar alpha, Mat X, MatStructure str) 1030 { 1031 Mat_MPIDense *A = (Mat_MPIDense *)Y->data, *B = (Mat_MPIDense *)X->data; 1032 1033 PetscFunctionBegin; 1034 PetscCall(MatAXPY(A->A, alpha, B->A, str)); 1035 PetscFunctionReturn(PETSC_SUCCESS); 1036 } 1037 1038 static PetscErrorCode MatConjugate_MPIDense(Mat mat) 1039 { 1040 Mat_MPIDense *a = (Mat_MPIDense *)mat->data; 1041 1042 PetscFunctionBegin; 1043 PetscCall(MatConjugate(a->A)); 1044 PetscFunctionReturn(PETSC_SUCCESS); 1045 } 1046 1047 static PetscErrorCode MatRealPart_MPIDense(Mat A) 1048 { 1049 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1050 1051 PetscFunctionBegin; 1052 PetscCall(MatRealPart(a->A)); 1053 PetscFunctionReturn(PETSC_SUCCESS); 1054 } 1055 1056 static PetscErrorCode MatImaginaryPart_MPIDense(Mat A) 1057 { 1058 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1059 1060 PetscFunctionBegin; 1061 PetscCall(MatImaginaryPart(a->A)); 1062 PetscFunctionReturn(PETSC_SUCCESS); 1063 } 1064 1065 static PetscErrorCode MatGetColumnVector_MPIDense(Mat A, Vec v, PetscInt col) 1066 { 1067 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1068 1069 PetscFunctionBegin; 1070 PetscCheck(a->A, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Missing local matrix"); 1071 PetscCheck(a->A->ops->getcolumnvector, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Missing get column operation"); 1072 PetscCall((*a->A->ops->getcolumnvector)(a->A, v, col)); 1073 PetscFunctionReturn(PETSC_SUCCESS); 1074 } 1075 1076 PETSC_INTERN PetscErrorCode MatGetColumnReductions_SeqDense(Mat, PetscInt, PetscReal *); 1077 1078 static PetscErrorCode MatGetColumnReductions_MPIDense(Mat A, PetscInt type, PetscReal *reductions) 1079 { 1080 PetscInt i, m, n; 1081 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1082 PetscReal *work; 1083 1084 PetscFunctionBegin; 1085 PetscCall(MatGetSize(A, &m, &n)); 1086 PetscCall(PetscMalloc1(n, &work)); 1087 if (type == REDUCTION_MEAN_REALPART) { 1088 PetscCall(MatGetColumnReductions_SeqDense(a->A, (PetscInt)REDUCTION_SUM_REALPART, work)); 1089 } else if (type == REDUCTION_MEAN_IMAGINARYPART) { 1090 PetscCall(MatGetColumnReductions_SeqDense(a->A, (PetscInt)REDUCTION_SUM_IMAGINARYPART, work)); 1091 } else { 1092 PetscCall(MatGetColumnReductions_SeqDense(a->A, type, work)); 1093 } 1094 if (type == NORM_2) { 1095 for (i = 0; i < n; i++) work[i] *= work[i]; 1096 } 1097 if (type == NORM_INFINITY) { 1098 PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_MAX, A->hdr.comm)); 1099 } else { 1100 PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_SUM, A->hdr.comm)); 1101 } 1102 PetscCall(PetscFree(work)); 1103 if (type == NORM_2) { 1104 for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]); 1105 } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 1106 for (i = 0; i < n; i++) reductions[i] /= m; 1107 } 1108 PetscFunctionReturn(PETSC_SUCCESS); 1109 } 1110 1111 static PetscErrorCode MatSetRandom_MPIDense(Mat x, PetscRandom rctx) 1112 { 1113 Mat_MPIDense *d = (Mat_MPIDense *)x->data; 1114 1115 PetscFunctionBegin; 1116 PetscCall(MatSetRandom(d->A, rctx)); 1117 #if defined(PETSC_HAVE_DEVICE) 1118 x->offloadmask = d->A->offloadmask; 1119 #endif 1120 PetscFunctionReturn(PETSC_SUCCESS); 1121 } 1122 1123 static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A, PetscBool *missing, PetscInt *d) 1124 { 1125 PetscFunctionBegin; 1126 *missing = PETSC_FALSE; 1127 PetscFunctionReturn(PETSC_SUCCESS); 1128 } 1129 1130 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat, Mat, PetscReal, Mat); 1131 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat, Mat, Mat); 1132 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat, Mat, PetscReal, Mat); 1133 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat, Mat, Mat); 1134 static PetscErrorCode MatEqual_MPIDense(Mat, Mat, PetscBool *); 1135 static PetscErrorCode MatLoad_MPIDense(Mat, PetscViewer); 1136 static PetscErrorCode MatProductSetFromOptions_MPIDense(Mat); 1137 1138 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense, 1139 MatGetRow_MPIDense, 1140 MatRestoreRow_MPIDense, 1141 MatMult_MPIDense, 1142 /* 4*/ MatMultAdd_MPIDense, 1143 MatMultTranspose_MPIDense, 1144 MatMultTransposeAdd_MPIDense, 1145 NULL, 1146 NULL, 1147 NULL, 1148 /* 10*/ NULL, 1149 NULL, 1150 NULL, 1151 NULL, 1152 MatTranspose_MPIDense, 1153 /* 15*/ MatGetInfo_MPIDense, 1154 MatEqual_MPIDense, 1155 MatGetDiagonal_MPIDense, 1156 MatDiagonalScale_MPIDense, 1157 MatNorm_MPIDense, 1158 /* 20*/ MatAssemblyBegin_MPIDense, 1159 MatAssemblyEnd_MPIDense, 1160 MatSetOption_MPIDense, 1161 MatZeroEntries_MPIDense, 1162 /* 24*/ MatZeroRows_MPIDense, 1163 NULL, 1164 NULL, 1165 NULL, 1166 NULL, 1167 /* 29*/ MatSetUp_MPIDense, 1168 NULL, 1169 NULL, 1170 MatGetDiagonalBlock_MPIDense, 1171 NULL, 1172 /* 34*/ MatDuplicate_MPIDense, 1173 NULL, 1174 NULL, 1175 NULL, 1176 NULL, 1177 /* 39*/ MatAXPY_MPIDense, 1178 MatCreateSubMatrices_MPIDense, 1179 NULL, 1180 MatGetValues_MPIDense, 1181 MatCopy_MPIDense, 1182 /* 44*/ NULL, 1183 MatScale_MPIDense, 1184 MatShift_MPIDense, 1185 NULL, 1186 NULL, 1187 /* 49*/ MatSetRandom_MPIDense, 1188 NULL, 1189 NULL, 1190 NULL, 1191 NULL, 1192 /* 54*/ NULL, 1193 NULL, 1194 NULL, 1195 NULL, 1196 NULL, 1197 /* 59*/ MatCreateSubMatrix_MPIDense, 1198 MatDestroy_MPIDense, 1199 MatView_MPIDense, 1200 NULL, 1201 NULL, 1202 /* 64*/ NULL, 1203 NULL, 1204 NULL, 1205 NULL, 1206 NULL, 1207 /* 69*/ NULL, 1208 NULL, 1209 NULL, 1210 NULL, 1211 NULL, 1212 /* 74*/ NULL, 1213 NULL, 1214 NULL, 1215 NULL, 1216 NULL, 1217 /* 79*/ NULL, 1218 NULL, 1219 NULL, 1220 NULL, 1221 /* 83*/ MatLoad_MPIDense, 1222 NULL, 1223 NULL, 1224 NULL, 1225 NULL, 1226 NULL, 1227 /* 89*/ NULL, 1228 NULL, 1229 NULL, 1230 NULL, 1231 NULL, 1232 /* 94*/ NULL, 1233 NULL, 1234 MatMatTransposeMultSymbolic_MPIDense_MPIDense, 1235 MatMatTransposeMultNumeric_MPIDense_MPIDense, 1236 NULL, 1237 /* 99*/ MatProductSetFromOptions_MPIDense, 1238 NULL, 1239 NULL, 1240 MatConjugate_MPIDense, 1241 NULL, 1242 /*104*/ NULL, 1243 MatRealPart_MPIDense, 1244 MatImaginaryPart_MPIDense, 1245 NULL, 1246 NULL, 1247 /*109*/ NULL, 1248 NULL, 1249 NULL, 1250 MatGetColumnVector_MPIDense, 1251 MatMissingDiagonal_MPIDense, 1252 /*114*/ NULL, 1253 NULL, 1254 NULL, 1255 NULL, 1256 NULL, 1257 /*119*/ NULL, 1258 NULL, 1259 MatMultHermitianTranspose_MPIDense, 1260 MatMultHermitianTransposeAdd_MPIDense, 1261 NULL, 1262 /*124*/ NULL, 1263 MatGetColumnReductions_MPIDense, 1264 NULL, 1265 NULL, 1266 NULL, 1267 /*129*/ NULL, 1268 NULL, 1269 MatTransposeMatMultSymbolic_MPIDense_MPIDense, 1270 MatTransposeMatMultNumeric_MPIDense_MPIDense, 1271 NULL, 1272 /*134*/ NULL, 1273 NULL, 1274 NULL, 1275 NULL, 1276 NULL, 1277 /*139*/ NULL, 1278 NULL, 1279 NULL, 1280 NULL, 1281 NULL, 1282 MatCreateMPIMatConcatenateSeqMat_MPIDense, 1283 /*145*/ NULL, 1284 NULL, 1285 NULL, 1286 NULL, 1287 NULL, 1288 /*150*/ NULL, 1289 NULL, 1290 NULL}; 1291 1292 static PetscErrorCode MatMPIDenseSetPreallocation_MPIDense(Mat mat, PetscScalar *data) 1293 { 1294 Mat_MPIDense *a = (Mat_MPIDense *)mat->data; 1295 MatType mtype = MATSEQDENSE; 1296 1297 PetscFunctionBegin; 1298 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1299 PetscCall(PetscLayoutSetUp(mat->rmap)); 1300 PetscCall(PetscLayoutSetUp(mat->cmap)); 1301 if (!a->A) { 1302 PetscCall(MatCreate(PETSC_COMM_SELF, &a->A)); 1303 PetscCall(MatSetSizes(a->A, mat->rmap->n, mat->cmap->N, mat->rmap->n, mat->cmap->N)); 1304 } 1305 #if defined(PETSC_HAVE_CUDA) 1306 PetscBool iscuda; 1307 PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPIDENSECUDA, &iscuda)); 1308 if (iscuda) mtype = MATSEQDENSECUDA; 1309 #endif 1310 #if defined(PETSC_HAVE_HIP) 1311 PetscBool iship; 1312 PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPIDENSEHIP, &iship)); 1313 if (iship) mtype = MATSEQDENSEHIP; 1314 #endif 1315 PetscCall(MatSetType(a->A, mtype)); 1316 PetscCall(MatSeqDenseSetPreallocation(a->A, data)); 1317 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 1318 mat->offloadmask = a->A->offloadmask; 1319 #endif 1320 mat->preallocated = PETSC_TRUE; 1321 mat->assembled = PETSC_TRUE; 1322 PetscFunctionReturn(PETSC_SUCCESS); 1323 } 1324 1325 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIDense(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 1326 { 1327 Mat B, C; 1328 1329 PetscFunctionBegin; 1330 PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &C)); 1331 PetscCall(MatConvert_SeqAIJ_SeqDense(C, MATSEQDENSE, MAT_INITIAL_MATRIX, &B)); 1332 PetscCall(MatDestroy(&C)); 1333 if (reuse == MAT_REUSE_MATRIX) { 1334 C = *newmat; 1335 } else C = NULL; 1336 PetscCall(MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A), B, A->cmap->n, !C ? MAT_INITIAL_MATRIX : MAT_REUSE_MATRIX, &C)); 1337 PetscCall(MatDestroy(&B)); 1338 if (reuse == MAT_INPLACE_MATRIX) { 1339 PetscCall(MatHeaderReplace(A, &C)); 1340 } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C; 1341 PetscFunctionReturn(PETSC_SUCCESS); 1342 } 1343 1344 static PetscErrorCode MatConvert_MPIDense_MPIAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 1345 { 1346 Mat B, C; 1347 1348 PetscFunctionBegin; 1349 PetscCall(MatDenseGetLocalMatrix(A, &C)); 1350 PetscCall(MatConvert_SeqDense_SeqAIJ(C, MATSEQAIJ, MAT_INITIAL_MATRIX, &B)); 1351 if (reuse == MAT_REUSE_MATRIX) { 1352 C = *newmat; 1353 } else C = NULL; 1354 PetscCall(MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A), B, A->cmap->n, !C ? MAT_INITIAL_MATRIX : MAT_REUSE_MATRIX, &C)); 1355 PetscCall(MatDestroy(&B)); 1356 if (reuse == MAT_INPLACE_MATRIX) { 1357 PetscCall(MatHeaderReplace(A, &C)); 1358 } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C; 1359 PetscFunctionReturn(PETSC_SUCCESS); 1360 } 1361 1362 #if defined(PETSC_HAVE_ELEMENTAL) 1363 PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 1364 { 1365 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1366 Mat mat_elemental; 1367 PetscScalar *v; 1368 PetscInt m = A->rmap->n, N = A->cmap->N, rstart = A->rmap->rstart, i, *rows, *cols, lda; 1369 1370 PetscFunctionBegin; 1371 if (reuse == MAT_REUSE_MATRIX) { 1372 mat_elemental = *newmat; 1373 PetscCall(MatZeroEntries(*newmat)); 1374 } else { 1375 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental)); 1376 PetscCall(MatSetSizes(mat_elemental, PETSC_DECIDE, PETSC_DECIDE, A->rmap->N, A->cmap->N)); 1377 PetscCall(MatSetType(mat_elemental, MATELEMENTAL)); 1378 PetscCall(MatSetUp(mat_elemental)); 1379 PetscCall(MatSetOption(mat_elemental, MAT_ROW_ORIENTED, PETSC_FALSE)); 1380 } 1381 1382 PetscCall(PetscMalloc2(m, &rows, N, &cols)); 1383 for (i = 0; i < N; i++) cols[i] = i; 1384 for (i = 0; i < m; i++) rows[i] = rstart + i; 1385 1386 /* PETSc-Elemental interface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */ 1387 PetscCall(MatDenseGetArray(A, &v)); 1388 PetscCall(MatDenseGetLDA(a->A, &lda)); 1389 if (lda == m) PetscCall(MatSetValues(mat_elemental, m, rows, N, cols, v, ADD_VALUES)); 1390 else { 1391 for (i = 0; i < N; i++) PetscCall(MatSetValues(mat_elemental, m, rows, 1, &i, v + lda * i, ADD_VALUES)); 1392 } 1393 PetscCall(MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY)); 1394 PetscCall(MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY)); 1395 PetscCall(MatDenseRestoreArray(A, &v)); 1396 PetscCall(PetscFree2(rows, cols)); 1397 1398 if (reuse == MAT_INPLACE_MATRIX) { 1399 PetscCall(MatHeaderReplace(A, &mat_elemental)); 1400 } else { 1401 *newmat = mat_elemental; 1402 } 1403 PetscFunctionReturn(PETSC_SUCCESS); 1404 } 1405 #endif 1406 1407 static PetscErrorCode MatDenseGetColumn_MPIDense(Mat A, PetscInt col, PetscScalar **vals) 1408 { 1409 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 1410 1411 PetscFunctionBegin; 1412 PetscCall(MatDenseGetColumn(mat->A, col, vals)); 1413 PetscFunctionReturn(PETSC_SUCCESS); 1414 } 1415 1416 static PetscErrorCode MatDenseRestoreColumn_MPIDense(Mat A, PetscScalar **vals) 1417 { 1418 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 1419 1420 PetscFunctionBegin; 1421 PetscCall(MatDenseRestoreColumn(mat->A, vals)); 1422 PetscFunctionReturn(PETSC_SUCCESS); 1423 } 1424 1425 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIDense(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat) 1426 { 1427 Mat_MPIDense *mat; 1428 PetscInt m, nloc, N; 1429 1430 PetscFunctionBegin; 1431 PetscCall(MatGetSize(inmat, &m, &N)); 1432 PetscCall(MatGetLocalSize(inmat, NULL, &nloc)); 1433 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 1434 PetscInt sum; 1435 1436 if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnership(comm, &n, &N)); 1437 /* Check sum(n) = N */ 1438 PetscCall(MPIU_Allreduce(&n, &sum, 1, MPIU_INT, MPI_SUM, comm)); 1439 PetscCheck(sum == N, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT, sum, N); 1440 1441 PetscCall(MatCreateDense(comm, m, n, PETSC_DETERMINE, N, NULL, outmat)); 1442 PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 1443 } 1444 1445 /* numeric phase */ 1446 mat = (Mat_MPIDense *)(*outmat)->data; 1447 PetscCall(MatCopy(inmat, mat->A, SAME_NONZERO_PATTERN)); 1448 PetscFunctionReturn(PETSC_SUCCESS); 1449 } 1450 1451 PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat A, PetscInt col, Vec *v) 1452 { 1453 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1454 PetscInt lda; 1455 1456 PetscFunctionBegin; 1457 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1458 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1459 if (!a->cvec) PetscCall(MatDenseCreateColumnVec_Private(A, &a->cvec)); 1460 a->vecinuse = col + 1; 1461 PetscCall(MatDenseGetLDA(a->A, &lda)); 1462 PetscCall(MatDenseGetArray(a->A, (PetscScalar **)&a->ptrinuse)); 1463 PetscCall(VecPlaceArray(a->cvec, PetscSafePointerPlusOffset(a->ptrinuse, (size_t)col * (size_t)lda))); 1464 *v = a->cvec; 1465 PetscFunctionReturn(PETSC_SUCCESS); 1466 } 1467 1468 PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat A, PetscInt col, Vec *v) 1469 { 1470 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1471 1472 PetscFunctionBegin; 1473 PetscCheck(a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first"); 1474 PetscCheck(a->cvec, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing internal column vector"); 1475 a->vecinuse = 0; 1476 PetscCall(MatDenseRestoreArray(a->A, (PetscScalar **)&a->ptrinuse)); 1477 PetscCall(VecResetArray(a->cvec)); 1478 if (v) *v = NULL; 1479 PetscFunctionReturn(PETSC_SUCCESS); 1480 } 1481 1482 PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat A, PetscInt col, Vec *v) 1483 { 1484 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1485 PetscInt lda; 1486 1487 PetscFunctionBegin; 1488 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1489 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1490 if (!a->cvec) PetscCall(MatDenseCreateColumnVec_Private(A, &a->cvec)); 1491 a->vecinuse = col + 1; 1492 PetscCall(MatDenseGetLDA(a->A, &lda)); 1493 PetscCall(MatDenseGetArrayRead(a->A, &a->ptrinuse)); 1494 PetscCall(VecPlaceArray(a->cvec, PetscSafePointerPlusOffset(a->ptrinuse, (size_t)col * (size_t)lda))); 1495 PetscCall(VecLockReadPush(a->cvec)); 1496 *v = a->cvec; 1497 PetscFunctionReturn(PETSC_SUCCESS); 1498 } 1499 1500 PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat A, PetscInt col, Vec *v) 1501 { 1502 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1503 1504 PetscFunctionBegin; 1505 PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first"); 1506 PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector"); 1507 a->vecinuse = 0; 1508 PetscCall(MatDenseRestoreArrayRead(a->A, &a->ptrinuse)); 1509 PetscCall(VecLockReadPop(a->cvec)); 1510 PetscCall(VecResetArray(a->cvec)); 1511 if (v) *v = NULL; 1512 PetscFunctionReturn(PETSC_SUCCESS); 1513 } 1514 1515 PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat A, PetscInt col, Vec *v) 1516 { 1517 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1518 PetscInt lda; 1519 1520 PetscFunctionBegin; 1521 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1522 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1523 if (!a->cvec) PetscCall(MatDenseCreateColumnVec_Private(A, &a->cvec)); 1524 a->vecinuse = col + 1; 1525 PetscCall(MatDenseGetLDA(a->A, &lda)); 1526 PetscCall(MatDenseGetArrayWrite(a->A, (PetscScalar **)&a->ptrinuse)); 1527 PetscCall(VecPlaceArray(a->cvec, PetscSafePointerPlusOffset(a->ptrinuse, (size_t)col * (size_t)lda))); 1528 *v = a->cvec; 1529 PetscFunctionReturn(PETSC_SUCCESS); 1530 } 1531 1532 PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat A, PetscInt col, Vec *v) 1533 { 1534 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1535 1536 PetscFunctionBegin; 1537 PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first"); 1538 PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector"); 1539 a->vecinuse = 0; 1540 PetscCall(MatDenseRestoreArrayWrite(a->A, (PetscScalar **)&a->ptrinuse)); 1541 PetscCall(VecResetArray(a->cvec)); 1542 if (v) *v = NULL; 1543 PetscFunctionReturn(PETSC_SUCCESS); 1544 } 1545 1546 static PetscErrorCode MatDenseGetSubMatrix_MPIDense(Mat A, PetscInt rbegin, PetscInt rend, PetscInt cbegin, PetscInt cend, Mat *v) 1547 { 1548 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1549 Mat_MPIDense *c; 1550 MPI_Comm comm; 1551 PetscInt pbegin, pend; 1552 1553 PetscFunctionBegin; 1554 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 1555 PetscCheck(!a->vecinuse, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1556 PetscCheck(!a->matinuse, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1557 pbegin = PetscMax(0, PetscMin(A->rmap->rend, rbegin) - A->rmap->rstart); 1558 pend = PetscMin(A->rmap->n, PetscMax(0, rend - A->rmap->rstart)); 1559 if (!a->cmat) { 1560 PetscCall(MatCreate(comm, &a->cmat)); 1561 PetscCall(MatSetType(a->cmat, ((PetscObject)A)->type_name)); 1562 if (rend - rbegin == A->rmap->N) PetscCall(PetscLayoutReference(A->rmap, &a->cmat->rmap)); 1563 else { 1564 PetscCall(PetscLayoutSetLocalSize(a->cmat->rmap, pend - pbegin)); 1565 PetscCall(PetscLayoutSetSize(a->cmat->rmap, rend - rbegin)); 1566 PetscCall(PetscLayoutSetUp(a->cmat->rmap)); 1567 } 1568 PetscCall(PetscLayoutSetSize(a->cmat->cmap, cend - cbegin)); 1569 PetscCall(PetscLayoutSetUp(a->cmat->cmap)); 1570 } else { 1571 PetscBool same = (PetscBool)(rend - rbegin == a->cmat->rmap->N); 1572 if (same && a->cmat->rmap->N != A->rmap->N) { 1573 same = (PetscBool)(pend - pbegin == a->cmat->rmap->n); 1574 PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &same, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A))); 1575 } 1576 if (!same) { 1577 PetscCall(PetscLayoutDestroy(&a->cmat->rmap)); 1578 PetscCall(PetscLayoutCreate(comm, &a->cmat->rmap)); 1579 PetscCall(PetscLayoutSetLocalSize(a->cmat->rmap, pend - pbegin)); 1580 PetscCall(PetscLayoutSetSize(a->cmat->rmap, rend - rbegin)); 1581 PetscCall(PetscLayoutSetUp(a->cmat->rmap)); 1582 } 1583 if (cend - cbegin != a->cmat->cmap->N) { 1584 PetscCall(PetscLayoutDestroy(&a->cmat->cmap)); 1585 PetscCall(PetscLayoutCreate(comm, &a->cmat->cmap)); 1586 PetscCall(PetscLayoutSetSize(a->cmat->cmap, cend - cbegin)); 1587 PetscCall(PetscLayoutSetUp(a->cmat->cmap)); 1588 } 1589 } 1590 c = (Mat_MPIDense *)a->cmat->data; 1591 PetscCheck(!c->A, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1592 PetscCall(MatDenseGetSubMatrix(a->A, pbegin, pend, cbegin, cend, &c->A)); 1593 1594 a->cmat->preallocated = PETSC_TRUE; 1595 a->cmat->assembled = PETSC_TRUE; 1596 #if defined(PETSC_HAVE_DEVICE) 1597 a->cmat->offloadmask = c->A->offloadmask; 1598 #endif 1599 a->matinuse = cbegin + 1; 1600 *v = a->cmat; 1601 PetscFunctionReturn(PETSC_SUCCESS); 1602 } 1603 1604 static PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat A, Mat *v) 1605 { 1606 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1607 Mat_MPIDense *c; 1608 1609 PetscFunctionBegin; 1610 PetscCheck(a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetSubMatrix() first"); 1611 PetscCheck(a->cmat, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal matrix"); 1612 PetscCheck(*v == a->cmat, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Not the matrix obtained from MatDenseGetSubMatrix()"); 1613 a->matinuse = 0; 1614 c = (Mat_MPIDense *)a->cmat->data; 1615 PetscCall(MatDenseRestoreSubMatrix(a->A, &c->A)); 1616 if (v) *v = NULL; 1617 #if defined(PETSC_HAVE_DEVICE) 1618 A->offloadmask = a->A->offloadmask; 1619 #endif 1620 PetscFunctionReturn(PETSC_SUCCESS); 1621 } 1622 1623 /*MC 1624 MATMPIDENSE - MATMPIDENSE = "mpidense" - A matrix type to be used for distributed dense matrices. 1625 1626 Options Database Key: 1627 . -mat_type mpidense - sets the matrix type to `MATMPIDENSE` during a call to `MatSetFromOptions()` 1628 1629 Level: beginner 1630 1631 .seealso: [](ch_matrices), `Mat`, `MatCreateDense()`, `MATSEQDENSE`, `MATDENSE` 1632 M*/ 1633 PetscErrorCode MatCreate_MPIDense(Mat mat) 1634 { 1635 Mat_MPIDense *a; 1636 1637 PetscFunctionBegin; 1638 PetscCall(PetscNew(&a)); 1639 mat->data = (void *)a; 1640 mat->ops[0] = MatOps_Values; 1641 1642 mat->insertmode = NOT_SET_VALUES; 1643 1644 /* build cache for off array entries formed */ 1645 a->donotstash = PETSC_FALSE; 1646 1647 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)mat), 1, &mat->stash)); 1648 1649 /* stuff used for matrix vector multiply */ 1650 a->lvec = NULL; 1651 a->Mvctx = NULL; 1652 a->roworiented = PETSC_TRUE; 1653 1654 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetLDA_C", MatDenseGetLDA_MPIDense)); 1655 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseSetLDA_C", MatDenseSetLDA_MPIDense)); 1656 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArray_C", MatDenseGetArray_MPIDense)); 1657 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArray_C", MatDenseRestoreArray_MPIDense)); 1658 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayRead_C", MatDenseGetArrayRead_MPIDense)); 1659 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayRead_C", MatDenseRestoreArrayRead_MPIDense)); 1660 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayWrite_C", MatDenseGetArrayWrite_MPIDense)); 1661 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayWrite_C", MatDenseRestoreArrayWrite_MPIDense)); 1662 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDensePlaceArray_C", MatDensePlaceArray_MPIDense)); 1663 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseResetArray_C", MatDenseResetArray_MPIDense)); 1664 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseReplaceArray_C", MatDenseReplaceArray_MPIDense)); 1665 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", MatDenseGetColumnVec_MPIDense)); 1666 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", MatDenseRestoreColumnVec_MPIDense)); 1667 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", MatDenseGetColumnVecRead_MPIDense)); 1668 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", MatDenseRestoreColumnVecRead_MPIDense)); 1669 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", MatDenseGetColumnVecWrite_MPIDense)); 1670 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", MatDenseRestoreColumnVecWrite_MPIDense)); 1671 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetSubMatrix_C", MatDenseGetSubMatrix_MPIDense)); 1672 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreSubMatrix_C", MatDenseRestoreSubMatrix_MPIDense)); 1673 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", MatConvert_MPIAIJ_MPIDense)); 1674 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpiaij_C", MatConvert_MPIDense_MPIAIJ)); 1675 #if defined(PETSC_HAVE_ELEMENTAL) 1676 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_elemental_C", MatConvert_MPIDense_Elemental)); 1677 #endif 1678 #if defined(PETSC_HAVE_SCALAPACK) 1679 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_scalapack_C", MatConvert_Dense_ScaLAPACK)); 1680 #endif 1681 #if defined(PETSC_HAVE_CUDA) 1682 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensecuda_C", MatConvert_MPIDense_MPIDenseCUDA)); 1683 #endif 1684 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIDenseSetPreallocation_C", MatMPIDenseSetPreallocation_MPIDense)); 1685 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense)); 1686 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaij_C", MatProductSetFromOptions_MPIDense_MPIAIJ)); 1687 #if defined(PETSC_HAVE_CUDA) 1688 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense)); 1689 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijcusparse_C", MatProductSetFromOptions_MPIDense_MPIAIJ)); 1690 #endif 1691 #if defined(PETSC_HAVE_HIP) 1692 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensehip_C", MatConvert_MPIDense_MPIDenseHIP)); 1693 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense)); 1694 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijhipsparse_C", MatProductSetFromOptions_MPIDense_MPIAIJ)); 1695 #endif 1696 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumn_C", MatDenseGetColumn_MPIDense)); 1697 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumn_C", MatDenseRestoreColumn_MPIDense)); 1698 PetscCall(PetscObjectChangeTypeName((PetscObject)mat, MATMPIDENSE)); 1699 PetscFunctionReturn(PETSC_SUCCESS); 1700 } 1701 1702 /*MC 1703 MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices. 1704 1705 This matrix type is identical to `MATSEQDENSE` when constructed with a single process communicator, 1706 and `MATMPIDENSE` otherwise. 1707 1708 Options Database Key: 1709 . -mat_type dense - sets the matrix type to `MATDENSE` during a call to `MatSetFromOptions()` 1710 1711 Level: beginner 1712 1713 .seealso: [](ch_matrices), `Mat`, `MATSEQDENSE`, `MATMPIDENSE`, `MATDENSECUDA`, `MATDENSEHIP` 1714 M*/ 1715 1716 /*@C 1717 MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries 1718 1719 Collective 1720 1721 Input Parameters: 1722 + B - the matrix 1723 - data - optional location of matrix data. Set to `NULL` for PETSc 1724 to control all matrix memory allocation. 1725 1726 Level: intermediate 1727 1728 Notes: 1729 The dense format is fully compatible with standard Fortran 1730 storage by columns. 1731 1732 The data input variable is intended primarily for Fortran programmers 1733 who wish to allocate their own matrix memory space. Most users should 1734 set `data` to `NULL`. 1735 1736 .seealso: [](ch_matrices), `Mat`, `MATMPIDENSE`, `MatCreate()`, `MatCreateSeqDense()`, `MatSetValues()` 1737 @*/ 1738 PetscErrorCode MatMPIDenseSetPreallocation(Mat B, PetscScalar *data) 1739 { 1740 PetscFunctionBegin; 1741 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 1742 PetscTryMethod(B, "MatMPIDenseSetPreallocation_C", (Mat, PetscScalar *), (B, data)); 1743 PetscFunctionReturn(PETSC_SUCCESS); 1744 } 1745 1746 /*@ 1747 MatDensePlaceArray - Allows one to replace the array in a `MATDENSE` matrix with an 1748 array provided by the user. This is useful to avoid copying an array 1749 into a matrix 1750 1751 Not Collective 1752 1753 Input Parameters: 1754 + mat - the matrix 1755 - array - the array in column major order 1756 1757 Level: developer 1758 1759 Note: 1760 You can return to the original array with a call to `MatDenseResetArray()`. The user is responsible for freeing this array; it will not be 1761 freed when the matrix is destroyed. 1762 1763 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseGetArray()`, `MatDenseResetArray()`, `VecPlaceArray()`, `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`, `VecResetArray()`, 1764 `MatDenseReplaceArray()` 1765 @*/ 1766 PetscErrorCode MatDensePlaceArray(Mat mat, const PetscScalar *array) 1767 { 1768 PetscFunctionBegin; 1769 PetscValidHeaderSpecific(mat, MAT_CLASSID, 1); 1770 PetscUseMethod(mat, "MatDensePlaceArray_C", (Mat, const PetscScalar *), (mat, array)); 1771 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 1772 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 1773 mat->offloadmask = PETSC_OFFLOAD_CPU; 1774 #endif 1775 PetscFunctionReturn(PETSC_SUCCESS); 1776 } 1777 1778 /*@ 1779 MatDenseResetArray - Resets the matrix array to that it previously had before the call to `MatDensePlaceArray()` 1780 1781 Not Collective 1782 1783 Input Parameter: 1784 . mat - the matrix 1785 1786 Level: developer 1787 1788 Note: 1789 You can only call this after a call to `MatDensePlaceArray()` 1790 1791 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseGetArray()`, `MatDensePlaceArray()`, `VecPlaceArray()`, `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`, `VecResetArray()` 1792 @*/ 1793 PetscErrorCode MatDenseResetArray(Mat mat) 1794 { 1795 PetscFunctionBegin; 1796 PetscValidHeaderSpecific(mat, MAT_CLASSID, 1); 1797 PetscUseMethod(mat, "MatDenseResetArray_C", (Mat), (mat)); 1798 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 1799 PetscFunctionReturn(PETSC_SUCCESS); 1800 } 1801 1802 /*@ 1803 MatDenseReplaceArray - Allows one to replace the array in a dense matrix with an 1804 array provided by the user. This is useful to avoid copying an array 1805 into a matrix 1806 1807 Not Collective 1808 1809 Input Parameters: 1810 + mat - the matrix 1811 - array - the array in column major order 1812 1813 Level: developer 1814 1815 Note: 1816 The memory passed in MUST be obtained with `PetscMalloc()` and CANNOT be 1817 freed by the user. It will be freed when the matrix is destroyed. 1818 1819 .seealso: [](ch_matrices), `Mat`, `MatDensePlaceArray()`, `MatDenseGetArray()`, `VecReplaceArray()` 1820 @*/ 1821 PetscErrorCode MatDenseReplaceArray(Mat mat, const PetscScalar *array) 1822 { 1823 PetscFunctionBegin; 1824 PetscValidHeaderSpecific(mat, MAT_CLASSID, 1); 1825 PetscUseMethod(mat, "MatDenseReplaceArray_C", (Mat, const PetscScalar *), (mat, array)); 1826 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 1827 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 1828 mat->offloadmask = PETSC_OFFLOAD_CPU; 1829 #endif 1830 PetscFunctionReturn(PETSC_SUCCESS); 1831 } 1832 1833 /*@C 1834 MatCreateDense - Creates a matrix in `MATDENSE` format. 1835 1836 Collective 1837 1838 Input Parameters: 1839 + comm - MPI communicator 1840 . m - number of local rows (or `PETSC_DECIDE` to have calculated if `M` is given) 1841 . n - number of local columns (or `PETSC_DECIDE` to have calculated if `N` is given) 1842 . M - number of global rows (or `PETSC_DECIDE` to have calculated if `m` is given) 1843 . N - number of global columns (or `PETSC_DECIDE` to have calculated if `n` is given) 1844 - data - optional location of matrix data. Set data to `NULL` (`PETSC_NULL_SCALAR` for Fortran users) for PETSc 1845 to control all matrix memory allocation. 1846 1847 Output Parameter: 1848 . A - the matrix 1849 1850 Level: intermediate 1851 1852 Notes: 1853 The dense format is fully compatible with standard Fortran 1854 storage by columns. 1855 1856 Although local portions of the matrix are stored in column-major 1857 order, the matrix is partitioned across MPI ranks by row. 1858 1859 The data input variable is intended primarily for Fortran programmers 1860 who wish to allocate their own matrix memory space. Most users should 1861 set `data` to `NULL` (`PETSC_NULL_SCALAR` for Fortran users). 1862 1863 The user MUST specify either the local or global matrix dimensions 1864 (possibly both). 1865 1866 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatCreate()`, `MatCreateSeqDense()`, `MatSetValues()` 1867 @*/ 1868 PetscErrorCode MatCreateDense(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscScalar *data, Mat *A) 1869 { 1870 PetscFunctionBegin; 1871 PetscCall(MatCreate(comm, A)); 1872 PetscCall(MatSetSizes(*A, m, n, M, N)); 1873 PetscCall(MatSetType(*A, MATDENSE)); 1874 PetscCall(MatSeqDenseSetPreallocation(*A, data)); 1875 PetscCall(MatMPIDenseSetPreallocation(*A, data)); 1876 PetscFunctionReturn(PETSC_SUCCESS); 1877 } 1878 1879 static PetscErrorCode MatDuplicate_MPIDense(Mat A, MatDuplicateOption cpvalues, Mat *newmat) 1880 { 1881 Mat mat; 1882 Mat_MPIDense *a, *oldmat = (Mat_MPIDense *)A->data; 1883 1884 PetscFunctionBegin; 1885 *newmat = NULL; 1886 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat)); 1887 PetscCall(MatSetSizes(mat, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 1888 PetscCall(MatSetType(mat, ((PetscObject)A)->type_name)); 1889 a = (Mat_MPIDense *)mat->data; 1890 1891 mat->factortype = A->factortype; 1892 mat->assembled = PETSC_TRUE; 1893 mat->preallocated = PETSC_TRUE; 1894 1895 mat->insertmode = NOT_SET_VALUES; 1896 a->donotstash = oldmat->donotstash; 1897 1898 PetscCall(PetscLayoutReference(A->rmap, &mat->rmap)); 1899 PetscCall(PetscLayoutReference(A->cmap, &mat->cmap)); 1900 1901 PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A)); 1902 1903 *newmat = mat; 1904 PetscFunctionReturn(PETSC_SUCCESS); 1905 } 1906 1907 static PetscErrorCode MatLoad_MPIDense(Mat newMat, PetscViewer viewer) 1908 { 1909 PetscBool isbinary; 1910 #if defined(PETSC_HAVE_HDF5) 1911 PetscBool ishdf5; 1912 #endif 1913 1914 PetscFunctionBegin; 1915 PetscValidHeaderSpecific(newMat, MAT_CLASSID, 1); 1916 PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2); 1917 /* force binary viewer to load .info file if it has not yet done so */ 1918 PetscCall(PetscViewerSetUp(viewer)); 1919 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 1920 #if defined(PETSC_HAVE_HDF5) 1921 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5)); 1922 #endif 1923 if (isbinary) { 1924 PetscCall(MatLoad_Dense_Binary(newMat, viewer)); 1925 #if defined(PETSC_HAVE_HDF5) 1926 } else if (ishdf5) { 1927 PetscCall(MatLoad_Dense_HDF5(newMat, viewer)); 1928 #endif 1929 } else SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name); 1930 PetscFunctionReturn(PETSC_SUCCESS); 1931 } 1932 1933 static PetscErrorCode MatEqual_MPIDense(Mat A, Mat B, PetscBool *flag) 1934 { 1935 Mat_MPIDense *matB = (Mat_MPIDense *)B->data, *matA = (Mat_MPIDense *)A->data; 1936 Mat a, b; 1937 1938 PetscFunctionBegin; 1939 a = matA->A; 1940 b = matB->A; 1941 PetscCall(MatEqual(a, b, flag)); 1942 PetscCall(MPIU_Allreduce(MPI_IN_PLACE, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A))); 1943 PetscFunctionReturn(PETSC_SUCCESS); 1944 } 1945 1946 static PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(void *data) 1947 { 1948 Mat_TransMatMultDense *atb = (Mat_TransMatMultDense *)data; 1949 1950 PetscFunctionBegin; 1951 PetscCall(PetscFree2(atb->sendbuf, atb->recvcounts)); 1952 PetscCall(MatDestroy(&atb->atb)); 1953 PetscCall(PetscFree(atb)); 1954 PetscFunctionReturn(PETSC_SUCCESS); 1955 } 1956 1957 static PetscErrorCode MatDestroy_MatMatTransMult_MPIDense_MPIDense(void *data) 1958 { 1959 Mat_MatTransMultDense *abt = (Mat_MatTransMultDense *)data; 1960 1961 PetscFunctionBegin; 1962 PetscCall(PetscFree2(abt->buf[0], abt->buf[1])); 1963 PetscCall(PetscFree2(abt->recvcounts, abt->recvdispls)); 1964 PetscCall(PetscFree(abt)); 1965 PetscFunctionReturn(PETSC_SUCCESS); 1966 } 1967 1968 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C) 1969 { 1970 Mat_MPIDense *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data; 1971 Mat_TransMatMultDense *atb; 1972 MPI_Comm comm; 1973 PetscMPIInt size, *recvcounts; 1974 PetscScalar *carray, *sendbuf; 1975 const PetscScalar *atbarray; 1976 PetscInt i, cN = C->cmap->N, proc, k, j, lda; 1977 const PetscInt *ranges; 1978 1979 PetscFunctionBegin; 1980 MatCheckProduct(C, 3); 1981 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 1982 atb = (Mat_TransMatMultDense *)C->product->data; 1983 recvcounts = atb->recvcounts; 1984 sendbuf = atb->sendbuf; 1985 1986 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 1987 PetscCallMPI(MPI_Comm_size(comm, &size)); 1988 1989 /* compute atbarray = aseq^T * bseq */ 1990 PetscCall(MatTransposeMatMult(a->A, b->A, atb->atb ? MAT_REUSE_MATRIX : MAT_INITIAL_MATRIX, PETSC_DEFAULT, &atb->atb)); 1991 1992 PetscCall(MatGetOwnershipRanges(C, &ranges)); 1993 1994 /* arrange atbarray into sendbuf */ 1995 PetscCall(MatDenseGetArrayRead(atb->atb, &atbarray)); 1996 PetscCall(MatDenseGetLDA(atb->atb, &lda)); 1997 for (proc = 0, k = 0; proc < size; proc++) { 1998 for (j = 0; j < cN; j++) { 1999 for (i = ranges[proc]; i < ranges[proc + 1]; i++) sendbuf[k++] = atbarray[i + j * lda]; 2000 } 2001 } 2002 PetscCall(MatDenseRestoreArrayRead(atb->atb, &atbarray)); 2003 2004 /* sum all atbarray to local values of C */ 2005 PetscCall(MatDenseGetArrayWrite(c->A, &carray)); 2006 PetscCallMPI(MPI_Reduce_scatter(sendbuf, carray, recvcounts, MPIU_SCALAR, MPIU_SUM, comm)); 2007 PetscCall(MatDenseRestoreArrayWrite(c->A, &carray)); 2008 PetscCall(MatSetOption(C, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 2009 PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY)); 2010 PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY)); 2011 PetscFunctionReturn(PETSC_SUCCESS); 2012 } 2013 2014 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C) 2015 { 2016 MPI_Comm comm; 2017 PetscMPIInt size; 2018 PetscInt cm = A->cmap->n, cM, cN = B->cmap->N; 2019 Mat_TransMatMultDense *atb; 2020 PetscBool cisdense = PETSC_FALSE; 2021 PetscInt i; 2022 const PetscInt *ranges; 2023 2024 PetscFunctionBegin; 2025 MatCheckProduct(C, 4); 2026 PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty"); 2027 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 2028 if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) { 2029 SETERRQ(comm, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend); 2030 } 2031 2032 /* create matrix product C */ 2033 PetscCall(MatSetSizes(C, cm, B->cmap->n, A->cmap->N, B->cmap->N)); 2034 #if defined(PETSC_HAVE_CUDA) 2035 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSECUDA, "")); 2036 #endif 2037 #if defined(PETSC_HAVE_HIP) 2038 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSEHIP, "")); 2039 #endif 2040 if (!cisdense) PetscCall(MatSetType(C, ((PetscObject)A)->type_name)); 2041 PetscCall(MatSetUp(C)); 2042 2043 /* create data structure for reuse C */ 2044 PetscCallMPI(MPI_Comm_size(comm, &size)); 2045 PetscCall(PetscNew(&atb)); 2046 cM = C->rmap->N; 2047 PetscCall(PetscMalloc2(cM * cN, &atb->sendbuf, size, &atb->recvcounts)); 2048 PetscCall(MatGetOwnershipRanges(C, &ranges)); 2049 for (i = 0; i < size; i++) atb->recvcounts[i] = (ranges[i + 1] - ranges[i]) * cN; 2050 2051 C->product->data = atb; 2052 C->product->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense; 2053 PetscFunctionReturn(PETSC_SUCCESS); 2054 } 2055 2056 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C) 2057 { 2058 MPI_Comm comm; 2059 PetscMPIInt i, size; 2060 PetscInt maxRows, bufsiz; 2061 PetscMPIInt tag; 2062 PetscInt alg; 2063 Mat_MatTransMultDense *abt; 2064 Mat_Product *product = C->product; 2065 PetscBool flg; 2066 2067 PetscFunctionBegin; 2068 MatCheckProduct(C, 4); 2069 PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty"); 2070 /* check local size of A and B */ 2071 PetscCheck(A->cmap->n == B->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local column dimensions are incompatible, A (%" PetscInt_FMT ") != B (%" PetscInt_FMT ")", A->cmap->n, B->cmap->n); 2072 2073 PetscCall(PetscStrcmp(product->alg, "allgatherv", &flg)); 2074 alg = flg ? 0 : 1; 2075 2076 /* setup matrix product C */ 2077 PetscCall(MatSetSizes(C, A->rmap->n, B->rmap->n, A->rmap->N, B->rmap->N)); 2078 PetscCall(MatSetType(C, MATMPIDENSE)); 2079 PetscCall(MatSetUp(C)); 2080 PetscCall(PetscObjectGetNewTag((PetscObject)C, &tag)); 2081 2082 /* create data structure for reuse C */ 2083 PetscCall(PetscObjectGetComm((PetscObject)C, &comm)); 2084 PetscCallMPI(MPI_Comm_size(comm, &size)); 2085 PetscCall(PetscNew(&abt)); 2086 abt->tag = tag; 2087 abt->alg = alg; 2088 switch (alg) { 2089 case 1: /* alg: "cyclic" */ 2090 for (maxRows = 0, i = 0; i < size; i++) maxRows = PetscMax(maxRows, (B->rmap->range[i + 1] - B->rmap->range[i])); 2091 bufsiz = A->cmap->N * maxRows; 2092 PetscCall(PetscMalloc2(bufsiz, &abt->buf[0], bufsiz, &abt->buf[1])); 2093 break; 2094 default: /* alg: "allgatherv" */ 2095 PetscCall(PetscMalloc2(B->rmap->n * B->cmap->N, &abt->buf[0], B->rmap->N * B->cmap->N, &abt->buf[1])); 2096 PetscCall(PetscMalloc2(size, &abt->recvcounts, size + 1, &abt->recvdispls)); 2097 for (i = 0; i <= size; i++) abt->recvdispls[i] = B->rmap->range[i] * A->cmap->N; 2098 for (i = 0; i < size; i++) abt->recvcounts[i] = abt->recvdispls[i + 1] - abt->recvdispls[i]; 2099 break; 2100 } 2101 2102 C->product->data = abt; 2103 C->product->destroy = MatDestroy_MatMatTransMult_MPIDense_MPIDense; 2104 C->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_MPIDense_MPIDense; 2105 PetscFunctionReturn(PETSC_SUCCESS); 2106 } 2107 2108 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(Mat A, Mat B, Mat C) 2109 { 2110 Mat_MPIDense *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data; 2111 Mat_MatTransMultDense *abt; 2112 MPI_Comm comm; 2113 PetscMPIInt rank, size, sendsiz, recvsiz, sendto, recvfrom, recvisfrom; 2114 PetscScalar *sendbuf, *recvbuf = NULL, *cv; 2115 PetscInt i, cK = A->cmap->N, k, j, bn; 2116 PetscScalar _DOne = 1.0, _DZero = 0.0; 2117 const PetscScalar *av, *bv; 2118 PetscBLASInt cm, cn, ck, alda, blda = 0, clda; 2119 MPI_Request reqs[2]; 2120 const PetscInt *ranges; 2121 2122 PetscFunctionBegin; 2123 MatCheckProduct(C, 3); 2124 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 2125 abt = (Mat_MatTransMultDense *)C->product->data; 2126 PetscCall(PetscObjectGetComm((PetscObject)C, &comm)); 2127 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 2128 PetscCallMPI(MPI_Comm_size(comm, &size)); 2129 PetscCall(MatDenseGetArrayRead(a->A, &av)); 2130 PetscCall(MatDenseGetArrayRead(b->A, &bv)); 2131 PetscCall(MatDenseGetArrayWrite(c->A, &cv)); 2132 PetscCall(MatDenseGetLDA(a->A, &i)); 2133 PetscCall(PetscBLASIntCast(i, &alda)); 2134 PetscCall(MatDenseGetLDA(b->A, &i)); 2135 PetscCall(PetscBLASIntCast(i, &blda)); 2136 PetscCall(MatDenseGetLDA(c->A, &i)); 2137 PetscCall(PetscBLASIntCast(i, &clda)); 2138 PetscCall(MatGetOwnershipRanges(B, &ranges)); 2139 bn = B->rmap->n; 2140 if (blda == bn) { 2141 sendbuf = (PetscScalar *)bv; 2142 } else { 2143 sendbuf = abt->buf[0]; 2144 for (k = 0, i = 0; i < cK; i++) { 2145 for (j = 0; j < bn; j++, k++) sendbuf[k] = bv[i * blda + j]; 2146 } 2147 } 2148 if (size > 1) { 2149 sendto = (rank + size - 1) % size; 2150 recvfrom = (rank + size + 1) % size; 2151 } else { 2152 sendto = recvfrom = 0; 2153 } 2154 PetscCall(PetscBLASIntCast(cK, &ck)); 2155 PetscCall(PetscBLASIntCast(c->A->rmap->n, &cm)); 2156 recvisfrom = rank; 2157 for (i = 0; i < size; i++) { 2158 /* we have finished receiving in sending, bufs can be read/modified */ 2159 PetscInt nextrecvisfrom = (recvisfrom + 1) % size; /* which process the next recvbuf will originate on */ 2160 PetscInt nextbn = ranges[nextrecvisfrom + 1] - ranges[nextrecvisfrom]; 2161 2162 if (nextrecvisfrom != rank) { 2163 /* start the cyclic sends from sendbuf, to recvbuf (which will switch to sendbuf) */ 2164 sendsiz = cK * bn; 2165 recvsiz = cK * nextbn; 2166 recvbuf = (i & 1) ? abt->buf[0] : abt->buf[1]; 2167 PetscCallMPI(MPI_Isend(sendbuf, sendsiz, MPIU_SCALAR, sendto, abt->tag, comm, &reqs[0])); 2168 PetscCallMPI(MPI_Irecv(recvbuf, recvsiz, MPIU_SCALAR, recvfrom, abt->tag, comm, &reqs[1])); 2169 } 2170 2171 /* local aseq * sendbuf^T */ 2172 PetscCall(PetscBLASIntCast(ranges[recvisfrom + 1] - ranges[recvisfrom], &cn)); 2173 if (cm && cn && ck) PetscCallBLAS("BLASgemm", BLASgemm_("N", "T", &cm, &cn, &ck, &_DOne, av, &alda, sendbuf, &cn, &_DZero, cv + clda * ranges[recvisfrom], &clda)); 2174 2175 if (nextrecvisfrom != rank) { 2176 /* wait for the sends and receives to complete, swap sendbuf and recvbuf */ 2177 PetscCallMPI(MPI_Waitall(2, reqs, MPI_STATUSES_IGNORE)); 2178 } 2179 bn = nextbn; 2180 recvisfrom = nextrecvisfrom; 2181 sendbuf = recvbuf; 2182 } 2183 PetscCall(MatDenseRestoreArrayRead(a->A, &av)); 2184 PetscCall(MatDenseRestoreArrayRead(b->A, &bv)); 2185 PetscCall(MatDenseRestoreArrayWrite(c->A, &cv)); 2186 PetscCall(MatSetOption(C, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 2187 PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY)); 2188 PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY)); 2189 PetscFunctionReturn(PETSC_SUCCESS); 2190 } 2191 2192 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(Mat A, Mat B, Mat C) 2193 { 2194 Mat_MPIDense *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data; 2195 Mat_MatTransMultDense *abt; 2196 MPI_Comm comm; 2197 PetscMPIInt size; 2198 PetscScalar *cv, *sendbuf, *recvbuf; 2199 const PetscScalar *av, *bv; 2200 PetscInt blda, i, cK = A->cmap->N, k, j, bn; 2201 PetscScalar _DOne = 1.0, _DZero = 0.0; 2202 PetscBLASInt cm, cn, ck, alda, clda; 2203 2204 PetscFunctionBegin; 2205 MatCheckProduct(C, 3); 2206 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 2207 abt = (Mat_MatTransMultDense *)C->product->data; 2208 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 2209 PetscCallMPI(MPI_Comm_size(comm, &size)); 2210 PetscCall(MatDenseGetArrayRead(a->A, &av)); 2211 PetscCall(MatDenseGetArrayRead(b->A, &bv)); 2212 PetscCall(MatDenseGetArrayWrite(c->A, &cv)); 2213 PetscCall(MatDenseGetLDA(a->A, &i)); 2214 PetscCall(PetscBLASIntCast(i, &alda)); 2215 PetscCall(MatDenseGetLDA(b->A, &blda)); 2216 PetscCall(MatDenseGetLDA(c->A, &i)); 2217 PetscCall(PetscBLASIntCast(i, &clda)); 2218 /* copy transpose of B into buf[0] */ 2219 bn = B->rmap->n; 2220 sendbuf = abt->buf[0]; 2221 recvbuf = abt->buf[1]; 2222 for (k = 0, j = 0; j < bn; j++) { 2223 for (i = 0; i < cK; i++, k++) sendbuf[k] = bv[i * blda + j]; 2224 } 2225 PetscCall(MatDenseRestoreArrayRead(b->A, &bv)); 2226 PetscCallMPI(MPI_Allgatherv(sendbuf, bn * cK, MPIU_SCALAR, recvbuf, abt->recvcounts, abt->recvdispls, MPIU_SCALAR, comm)); 2227 PetscCall(PetscBLASIntCast(cK, &ck)); 2228 PetscCall(PetscBLASIntCast(c->A->rmap->n, &cm)); 2229 PetscCall(PetscBLASIntCast(c->A->cmap->n, &cn)); 2230 if (cm && cn && ck) PetscCallBLAS("BLASgemm", BLASgemm_("N", "N", &cm, &cn, &ck, &_DOne, av, &alda, recvbuf, &ck, &_DZero, cv, &clda)); 2231 PetscCall(MatDenseRestoreArrayRead(a->A, &av)); 2232 PetscCall(MatDenseRestoreArrayRead(b->A, &bv)); 2233 PetscCall(MatDenseRestoreArrayWrite(c->A, &cv)); 2234 PetscCall(MatSetOption(C, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 2235 PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY)); 2236 PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY)); 2237 PetscFunctionReturn(PETSC_SUCCESS); 2238 } 2239 2240 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C) 2241 { 2242 Mat_MatTransMultDense *abt; 2243 2244 PetscFunctionBegin; 2245 MatCheckProduct(C, 3); 2246 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 2247 abt = (Mat_MatTransMultDense *)C->product->data; 2248 switch (abt->alg) { 2249 case 1: 2250 PetscCall(MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(A, B, C)); 2251 break; 2252 default: 2253 PetscCall(MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(A, B, C)); 2254 break; 2255 } 2256 PetscFunctionReturn(PETSC_SUCCESS); 2257 } 2258 2259 static PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(void *data) 2260 { 2261 Mat_MatMultDense *ab = (Mat_MatMultDense *)data; 2262 2263 PetscFunctionBegin; 2264 PetscCall(MatDestroy(&ab->Ce)); 2265 PetscCall(MatDestroy(&ab->Ae)); 2266 PetscCall(MatDestroy(&ab->Be)); 2267 PetscCall(PetscFree(ab)); 2268 PetscFunctionReturn(PETSC_SUCCESS); 2269 } 2270 2271 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C) 2272 { 2273 Mat_MatMultDense *ab; 2274 Mat_MPIDense *mdn = (Mat_MPIDense *)A->data; 2275 2276 PetscFunctionBegin; 2277 MatCheckProduct(C, 3); 2278 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Missing product data"); 2279 ab = (Mat_MatMultDense *)C->product->data; 2280 if (ab->Ae && ab->Ce) { 2281 #if PetscDefined(HAVE_ELEMENTAL) 2282 PetscCall(MatConvert_MPIDense_Elemental(A, MATELEMENTAL, MAT_REUSE_MATRIX, &ab->Ae)); 2283 PetscCall(MatConvert_MPIDense_Elemental(B, MATELEMENTAL, MAT_REUSE_MATRIX, &ab->Be)); 2284 PetscCall(MatMatMultNumeric_Elemental(ab->Ae, ab->Be, ab->Ce)); 2285 PetscCall(MatConvert(ab->Ce, MATMPIDENSE, MAT_REUSE_MATRIX, &C)); 2286 #else 2287 SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "PETSC_HAVE_ELEMENTAL not defined"); 2288 #endif 2289 } else { 2290 const PetscScalar *read; 2291 PetscScalar *write; 2292 PetscInt lda; 2293 2294 PetscCall(MatDenseGetLDA(B, &lda)); 2295 PetscCall(MatDenseGetArrayRead(B, &read)); 2296 PetscCall(MatDenseGetArrayWrite(ab->Be, &write)); 2297 if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A)); /* cannot be done during the symbolic phase because of possible calls to MatProductReplaceMats() */ 2298 for (PetscInt i = 0; i < C->cmap->N; ++i) { 2299 PetscCall(PetscSFBcastBegin(mdn->Mvctx, MPIU_SCALAR, read + i * lda, write + i * ab->Be->rmap->n, MPI_REPLACE)); 2300 PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, read + i * lda, write + i * ab->Be->rmap->n, MPI_REPLACE)); 2301 } 2302 PetscCall(MatDenseRestoreArrayWrite(ab->Be, &write)); 2303 PetscCall(MatDenseRestoreArrayRead(B, &read)); 2304 PetscCall(MatMatMultNumeric_SeqDense_SeqDense(((Mat_MPIDense *)A->data)->A, ab->Be, ((Mat_MPIDense *)C->data)->A)); 2305 } 2306 PetscFunctionReturn(PETSC_SUCCESS); 2307 } 2308 2309 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C) 2310 { 2311 Mat_Product *product = C->product; 2312 PetscInt alg; 2313 Mat_MatMultDense *ab; 2314 PetscBool flg; 2315 2316 PetscFunctionBegin; 2317 MatCheckProduct(C, 4); 2318 PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty"); 2319 /* check local size of A and B */ 2320 PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ", %" PetscInt_FMT ")", 2321 A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend); 2322 2323 PetscCall(PetscStrcmp(product->alg, "petsc", &flg)); 2324 alg = flg ? 0 : 1; 2325 2326 /* setup C */ 2327 PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, A->rmap->N, B->cmap->N)); 2328 PetscCall(MatSetType(C, MATMPIDENSE)); 2329 PetscCall(MatSetUp(C)); 2330 2331 /* create data structure for reuse Cdense */ 2332 PetscCall(PetscNew(&ab)); 2333 2334 switch (alg) { 2335 case 1: /* alg: "elemental" */ 2336 #if PetscDefined(HAVE_ELEMENTAL) 2337 /* create elemental matrices Ae and Be */ 2338 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &ab->Ae)); 2339 PetscCall(MatSetSizes(ab->Ae, PETSC_DECIDE, PETSC_DECIDE, A->rmap->N, A->cmap->N)); 2340 PetscCall(MatSetType(ab->Ae, MATELEMENTAL)); 2341 PetscCall(MatSetUp(ab->Ae)); 2342 PetscCall(MatSetOption(ab->Ae, MAT_ROW_ORIENTED, PETSC_FALSE)); 2343 2344 PetscCall(MatCreate(PetscObjectComm((PetscObject)B), &ab->Be)); 2345 PetscCall(MatSetSizes(ab->Be, PETSC_DECIDE, PETSC_DECIDE, B->rmap->N, B->cmap->N)); 2346 PetscCall(MatSetType(ab->Be, MATELEMENTAL)); 2347 PetscCall(MatSetUp(ab->Be)); 2348 PetscCall(MatSetOption(ab->Be, MAT_ROW_ORIENTED, PETSC_FALSE)); 2349 2350 /* compute symbolic Ce = Ae*Be */ 2351 PetscCall(MatCreate(PetscObjectComm((PetscObject)C), &ab->Ce)); 2352 PetscCall(MatMatMultSymbolic_Elemental(ab->Ae, ab->Be, fill, ab->Ce)); 2353 #else 2354 SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "PETSC_HAVE_ELEMENTAL not defined"); 2355 #endif 2356 break; 2357 default: /* alg: "petsc" */ 2358 ab->Ae = NULL; 2359 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, A->cmap->N, B->cmap->N, NULL, &ab->Be)); 2360 ab->Ce = NULL; 2361 break; 2362 } 2363 2364 C->product->data = ab; 2365 C->product->destroy = MatDestroy_MatMatMult_MPIDense_MPIDense; 2366 C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense; 2367 PetscFunctionReturn(PETSC_SUCCESS); 2368 } 2369 2370 static PetscErrorCode MatProductSetFromOptions_MPIDense_AB(Mat C) 2371 { 2372 Mat_Product *product = C->product; 2373 const char *algTypes[2] = {"petsc", "elemental"}; 2374 PetscInt alg, nalg = PetscDefined(HAVE_ELEMENTAL) ? 2 : 1; 2375 PetscBool flg = PETSC_FALSE; 2376 2377 PetscFunctionBegin; 2378 /* Set default algorithm */ 2379 alg = 0; /* default is petsc */ 2380 PetscCall(PetscStrcmp(product->alg, "default", &flg)); 2381 if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg])); 2382 2383 /* Get runtime option */ 2384 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_AB", "Mat"); 2385 PetscCall(PetscOptionsEList("-mat_product_algorithm", "Algorithmic approach", "MatProduct_AB", algTypes, nalg, algTypes[alg], &alg, &flg)); 2386 PetscOptionsEnd(); 2387 if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg])); 2388 2389 C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIDense; 2390 C->ops->productsymbolic = MatProductSymbolic_AB; 2391 PetscFunctionReturn(PETSC_SUCCESS); 2392 } 2393 2394 static PetscErrorCode MatProductSetFromOptions_MPIDense_AtB(Mat C) 2395 { 2396 Mat_Product *product = C->product; 2397 Mat A = product->A, B = product->B; 2398 2399 PetscFunctionBegin; 2400 PetscCheck(A->rmap->rstart == B->rmap->rstart && A->rmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", 2401 A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend); 2402 C->ops->transposematmultsymbolic = MatTransposeMatMultSymbolic_MPIDense_MPIDense; 2403 C->ops->productsymbolic = MatProductSymbolic_AtB; 2404 PetscFunctionReturn(PETSC_SUCCESS); 2405 } 2406 2407 static PetscErrorCode MatProductSetFromOptions_MPIDense_ABt(Mat C) 2408 { 2409 Mat_Product *product = C->product; 2410 const char *algTypes[2] = {"allgatherv", "cyclic"}; 2411 PetscInt alg, nalg = 2; 2412 PetscBool flg = PETSC_FALSE; 2413 2414 PetscFunctionBegin; 2415 /* Set default algorithm */ 2416 alg = 0; /* default is allgatherv */ 2417 PetscCall(PetscStrcmp(product->alg, "default", &flg)); 2418 if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg])); 2419 2420 /* Get runtime option */ 2421 if (product->api_user) { 2422 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatMatTransposeMult", "Mat"); 2423 PetscCall(PetscOptionsEList("-matmattransmult_mpidense_mpidense_via", "Algorithmic approach", "MatMatTransposeMult", algTypes, nalg, algTypes[alg], &alg, &flg)); 2424 PetscOptionsEnd(); 2425 } else { 2426 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_ABt", "Mat"); 2427 PetscCall(PetscOptionsEList("-mat_product_algorithm", "Algorithmic approach", "MatProduct_ABt", algTypes, nalg, algTypes[alg], &alg, &flg)); 2428 PetscOptionsEnd(); 2429 } 2430 if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg])); 2431 2432 C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_MPIDense_MPIDense; 2433 C->ops->productsymbolic = MatProductSymbolic_ABt; 2434 PetscFunctionReturn(PETSC_SUCCESS); 2435 } 2436 2437 static PetscErrorCode MatProductSetFromOptions_MPIDense(Mat C) 2438 { 2439 Mat_Product *product = C->product; 2440 2441 PetscFunctionBegin; 2442 switch (product->type) { 2443 case MATPRODUCT_AB: 2444 PetscCall(MatProductSetFromOptions_MPIDense_AB(C)); 2445 break; 2446 case MATPRODUCT_AtB: 2447 PetscCall(MatProductSetFromOptions_MPIDense_AtB(C)); 2448 break; 2449 case MATPRODUCT_ABt: 2450 PetscCall(MatProductSetFromOptions_MPIDense_ABt(C)); 2451 break; 2452 default: 2453 break; 2454 } 2455 PetscFunctionReturn(PETSC_SUCCESS); 2456 } 2457