1 /* 2 Basic functions for basic parallel dense matrices. 3 Portions of this code are under: 4 Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. 5 */ 6 7 #include <../src/mat/impls/dense/mpi/mpidense.h> /*I "petscmat.h" I*/ 8 #include <../src/mat/impls/aij/mpi/mpiaij.h> 9 #include <petscblaslapack.h> 10 11 /*@ 12 MatDenseGetLocalMatrix - For a `MATMPIDENSE` or `MATSEQDENSE` matrix returns the sequential 13 matrix that represents the operator. For sequential matrices it returns itself. 14 15 Input Parameter: 16 . A - the sequential or MPI `MATDENSE` matrix 17 18 Output Parameter: 19 . B - the inner matrix 20 21 Level: intermediate 22 23 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MATMPIDENSE`, `MATSEQDENSE` 24 @*/ 25 PetscErrorCode MatDenseGetLocalMatrix(Mat A, Mat *B) 26 { 27 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 28 PetscBool flg; 29 30 PetscFunctionBegin; 31 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 32 PetscAssertPointer(B, 2); 33 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIDENSE, &flg)); 34 if (flg) *B = mat->A; 35 else { 36 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQDENSE, &flg)); 37 PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Not for matrix type %s", ((PetscObject)A)->type_name); 38 *B = A; 39 } 40 PetscFunctionReturn(PETSC_SUCCESS); 41 } 42 43 static PetscErrorCode MatCopy_MPIDense(Mat A, Mat B, MatStructure s) 44 { 45 Mat_MPIDense *Amat = (Mat_MPIDense *)A->data; 46 Mat_MPIDense *Bmat = (Mat_MPIDense *)B->data; 47 48 PetscFunctionBegin; 49 PetscCall(MatCopy(Amat->A, Bmat->A, s)); 50 PetscFunctionReturn(PETSC_SUCCESS); 51 } 52 53 PetscErrorCode MatShift_MPIDense(Mat A, PetscScalar alpha) 54 { 55 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 56 PetscInt j, lda, rstart = A->rmap->rstart, rend = A->rmap->rend, rend2; 57 PetscScalar *v; 58 59 PetscFunctionBegin; 60 PetscCall(MatDenseGetArray(mat->A, &v)); 61 PetscCall(MatDenseGetLDA(mat->A, &lda)); 62 rend2 = PetscMin(rend, A->cmap->N); 63 if (rend2 > rstart) { 64 for (j = rstart; j < rend2; j++) v[j - rstart + j * lda] += alpha; 65 PetscCall(PetscLogFlops(rend2 - rstart)); 66 } 67 PetscCall(MatDenseRestoreArray(mat->A, &v)); 68 PetscFunctionReturn(PETSC_SUCCESS); 69 } 70 71 static PetscErrorCode MatGetRow_MPIDense(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 72 { 73 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 74 PetscInt lrow, rstart = A->rmap->rstart, rend = A->rmap->rend; 75 76 PetscFunctionBegin; 77 PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "only local rows"); 78 lrow = row - rstart; 79 PetscCall(MatGetRow(mat->A, lrow, nz, (const PetscInt **)idx, (const PetscScalar **)v)); 80 PetscFunctionReturn(PETSC_SUCCESS); 81 } 82 83 static PetscErrorCode MatRestoreRow_MPIDense(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 84 { 85 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 86 PetscInt lrow, rstart = A->rmap->rstart, rend = A->rmap->rend; 87 88 PetscFunctionBegin; 89 PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "only local rows"); 90 lrow = row - rstart; 91 PetscCall(MatRestoreRow(mat->A, lrow, nz, (const PetscInt **)idx, (const PetscScalar **)v)); 92 PetscFunctionReturn(PETSC_SUCCESS); 93 } 94 95 static PetscErrorCode MatGetDiagonalBlock_MPIDense(Mat A, Mat *a) 96 { 97 Mat_MPIDense *mdn = (Mat_MPIDense *)A->data; 98 PetscInt m = A->rmap->n, rstart = A->rmap->rstart; 99 PetscScalar *array; 100 MPI_Comm comm; 101 PetscBool flg; 102 Mat B; 103 104 PetscFunctionBegin; 105 PetscCall(MatHasCongruentLayouts(A, &flg)); 106 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only square matrices supported."); 107 PetscCall(PetscObjectQuery((PetscObject)A, "DiagonalBlock", (PetscObject *)&B)); 108 if (!B) { /* This should use MatDenseGetSubMatrix (not create), but we would need a call like MatRestoreDiagonalBlock */ 109 #if PetscDefined(HAVE_CUDA) 110 PetscCall(PetscObjectTypeCompare((PetscObject)mdn->A, MATSEQDENSECUDA, &flg)); 111 PetscCheck(!flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature", MATSEQDENSECUDA); 112 #elif PetscDefined(HAVE_HIP) 113 PetscCall(PetscObjectTypeCompare((PetscObject)mdn->A, MATSEQDENSEHIP, &flg)); 114 PetscCheck(!flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature", MATSEQDENSEHIP); 115 #endif 116 PetscCall(PetscObjectGetComm((PetscObject)(mdn->A), &comm)); 117 PetscCall(MatCreate(comm, &B)); 118 PetscCall(MatSetSizes(B, m, m, m, m)); 119 PetscCall(MatSetType(B, ((PetscObject)mdn->A)->type_name)); 120 PetscCall(MatDenseGetArrayRead(mdn->A, (const PetscScalar **)&array)); 121 PetscCall(MatSeqDenseSetPreallocation(B, array + m * rstart)); 122 PetscCall(MatDenseRestoreArrayRead(mdn->A, (const PetscScalar **)&array)); 123 PetscCall(PetscObjectCompose((PetscObject)A, "DiagonalBlock", (PetscObject)B)); 124 *a = B; 125 PetscCall(MatDestroy(&B)); 126 } else *a = B; 127 PetscFunctionReturn(PETSC_SUCCESS); 128 } 129 130 static PetscErrorCode MatSetValues_MPIDense(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], const PetscScalar v[], InsertMode addv) 131 { 132 Mat_MPIDense *A = (Mat_MPIDense *)mat->data; 133 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend, row; 134 PetscBool roworiented = A->roworiented; 135 136 PetscFunctionBegin; 137 for (i = 0; i < m; i++) { 138 if (idxm[i] < 0) continue; 139 PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large"); 140 if (idxm[i] >= rstart && idxm[i] < rend) { 141 row = idxm[i] - rstart; 142 if (roworiented) { 143 PetscCall(MatSetValues(A->A, 1, &row, n, idxn, v + i * n, addv)); 144 } else { 145 for (j = 0; j < n; j++) { 146 if (idxn[j] < 0) continue; 147 PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large"); 148 PetscCall(MatSetValues(A->A, 1, &row, 1, &idxn[j], v + i + j * m, addv)); 149 } 150 } 151 } else if (!A->donotstash) { 152 mat->assembled = PETSC_FALSE; 153 if (roworiented) { 154 PetscCall(MatStashValuesRow_Private(&mat->stash, idxm[i], n, idxn, v + i * n, PETSC_FALSE)); 155 } else { 156 PetscCall(MatStashValuesCol_Private(&mat->stash, idxm[i], n, idxn, v + i, m, PETSC_FALSE)); 157 } 158 } 159 } 160 PetscFunctionReturn(PETSC_SUCCESS); 161 } 162 163 static PetscErrorCode MatGetValues_MPIDense(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[]) 164 { 165 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 166 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend, row; 167 168 PetscFunctionBegin; 169 for (i = 0; i < m; i++) { 170 if (idxm[i] < 0) continue; /* negative row */ 171 PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large"); 172 if (idxm[i] >= rstart && idxm[i] < rend) { 173 row = idxm[i] - rstart; 174 for (j = 0; j < n; j++) { 175 if (idxn[j] < 0) continue; /* negative column */ 176 PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large"); 177 PetscCall(MatGetValues(mdn->A, 1, &row, 1, &idxn[j], v + i * n + j)); 178 } 179 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported"); 180 } 181 PetscFunctionReturn(PETSC_SUCCESS); 182 } 183 184 static PetscErrorCode MatDenseGetLDA_MPIDense(Mat A, PetscInt *lda) 185 { 186 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 187 188 PetscFunctionBegin; 189 PetscCall(MatDenseGetLDA(a->A, lda)); 190 PetscFunctionReturn(PETSC_SUCCESS); 191 } 192 193 static PetscErrorCode MatDenseSetLDA_MPIDense(Mat A, PetscInt lda) 194 { 195 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 196 MatType mtype = MATSEQDENSE; 197 198 PetscFunctionBegin; 199 if (!a->A) { 200 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 201 PetscCall(PetscLayoutSetUp(A->rmap)); 202 PetscCall(PetscLayoutSetUp(A->cmap)); 203 PetscCall(MatCreate(PETSC_COMM_SELF, &a->A)); 204 PetscCall(MatSetSizes(a->A, A->rmap->n, A->cmap->N, A->rmap->n, A->cmap->N)); 205 #if PetscDefined(HAVE_CUDA) 206 PetscBool iscuda; 207 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIDENSECUDA, &iscuda)); 208 if (iscuda) mtype = MATSEQDENSECUDA; 209 #elif PetscDefined(HAVE_HIP) 210 PetscBool iship; 211 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIDENSEHIP, &iship)); 212 if (iship) mtype = MATSEQDENSEHIP; 213 #endif 214 PetscCall(MatSetType(a->A, mtype)); 215 } 216 PetscCall(MatDenseSetLDA(a->A, lda)); 217 PetscFunctionReturn(PETSC_SUCCESS); 218 } 219 220 static PetscErrorCode MatDenseGetArray_MPIDense(Mat A, PetscScalar **array) 221 { 222 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 223 224 PetscFunctionBegin; 225 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 226 PetscCall(MatDenseGetArray(a->A, array)); 227 PetscFunctionReturn(PETSC_SUCCESS); 228 } 229 230 static PetscErrorCode MatDenseGetArrayRead_MPIDense(Mat A, PetscScalar **array) 231 { 232 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 233 234 PetscFunctionBegin; 235 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 236 PetscCall(MatDenseGetArrayRead(a->A, (const PetscScalar **)array)); 237 PetscFunctionReturn(PETSC_SUCCESS); 238 } 239 240 static PetscErrorCode MatDenseGetArrayWrite_MPIDense(Mat A, PetscScalar **array) 241 { 242 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 243 244 PetscFunctionBegin; 245 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 246 PetscCall(MatDenseGetArrayWrite(a->A, array)); 247 PetscFunctionReturn(PETSC_SUCCESS); 248 } 249 250 static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A, const PetscScalar *array) 251 { 252 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 253 254 PetscFunctionBegin; 255 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 256 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 257 PetscCall(MatDensePlaceArray(a->A, array)); 258 PetscFunctionReturn(PETSC_SUCCESS); 259 } 260 261 static PetscErrorCode MatDenseResetArray_MPIDense(Mat A) 262 { 263 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 264 265 PetscFunctionBegin; 266 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 267 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 268 PetscCall(MatDenseResetArray(a->A)); 269 PetscFunctionReturn(PETSC_SUCCESS); 270 } 271 272 static PetscErrorCode MatDenseReplaceArray_MPIDense(Mat A, const PetscScalar *array) 273 { 274 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 275 276 PetscFunctionBegin; 277 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 278 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 279 PetscCall(MatDenseReplaceArray(a->A, array)); 280 PetscFunctionReturn(PETSC_SUCCESS); 281 } 282 283 static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A, IS isrow, IS iscol, MatReuse scall, Mat *B) 284 { 285 Mat_MPIDense *mat = (Mat_MPIDense *)A->data, *newmatd; 286 PetscInt lda, i, j, rstart, rend, nrows, ncols, Ncols, nlrows, nlcols; 287 const PetscInt *irow, *icol; 288 const PetscScalar *v; 289 PetscScalar *bv; 290 Mat newmat; 291 IS iscol_local; 292 MPI_Comm comm_is, comm_mat; 293 294 PetscFunctionBegin; 295 PetscCall(PetscObjectGetComm((PetscObject)A, &comm_mat)); 296 PetscCall(PetscObjectGetComm((PetscObject)iscol, &comm_is)); 297 PetscCheck(comm_mat == comm_is, PETSC_COMM_SELF, PETSC_ERR_ARG_NOTSAMECOMM, "IS communicator must match matrix communicator"); 298 299 PetscCall(ISAllGather(iscol, &iscol_local)); 300 PetscCall(ISGetIndices(isrow, &irow)); 301 PetscCall(ISGetIndices(iscol_local, &icol)); 302 PetscCall(ISGetLocalSize(isrow, &nrows)); 303 PetscCall(ISGetLocalSize(iscol, &ncols)); 304 PetscCall(ISGetSize(iscol, &Ncols)); /* global number of columns, size of iscol_local */ 305 306 /* No parallel redistribution currently supported! Should really check each index set 307 to confirm that it is OK. ... Currently supports only submatrix same partitioning as 308 original matrix! */ 309 310 PetscCall(MatGetLocalSize(A, &nlrows, &nlcols)); 311 PetscCall(MatGetOwnershipRange(A, &rstart, &rend)); 312 313 /* Check submatrix call */ 314 if (scall == MAT_REUSE_MATRIX) { 315 /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */ 316 /* Really need to test rows and column sizes! */ 317 newmat = *B; 318 } else { 319 /* Create and fill new matrix */ 320 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &newmat)); 321 PetscCall(MatSetSizes(newmat, nrows, ncols, PETSC_DECIDE, Ncols)); 322 PetscCall(MatSetType(newmat, ((PetscObject)A)->type_name)); 323 PetscCall(MatMPIDenseSetPreallocation(newmat, NULL)); 324 } 325 326 /* Now extract the data pointers and do the copy, column at a time */ 327 newmatd = (Mat_MPIDense *)newmat->data; 328 PetscCall(MatDenseGetArray(newmatd->A, &bv)); 329 PetscCall(MatDenseGetArrayRead(mat->A, &v)); 330 PetscCall(MatDenseGetLDA(mat->A, &lda)); 331 for (i = 0; i < Ncols; i++) { 332 const PetscScalar *av = v + lda * icol[i]; 333 for (j = 0; j < nrows; j++) *bv++ = av[irow[j] - rstart]; 334 } 335 PetscCall(MatDenseRestoreArrayRead(mat->A, &v)); 336 PetscCall(MatDenseRestoreArray(newmatd->A, &bv)); 337 338 /* Assemble the matrices so that the correct flags are set */ 339 PetscCall(MatAssemblyBegin(newmat, MAT_FINAL_ASSEMBLY)); 340 PetscCall(MatAssemblyEnd(newmat, MAT_FINAL_ASSEMBLY)); 341 342 /* Free work space */ 343 PetscCall(ISRestoreIndices(isrow, &irow)); 344 PetscCall(ISRestoreIndices(iscol_local, &icol)); 345 PetscCall(ISDestroy(&iscol_local)); 346 *B = newmat; 347 PetscFunctionReturn(PETSC_SUCCESS); 348 } 349 350 static PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A, PetscScalar **array) 351 { 352 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 353 354 PetscFunctionBegin; 355 PetscCall(MatDenseRestoreArray(a->A, array)); 356 PetscFunctionReturn(PETSC_SUCCESS); 357 } 358 359 static PetscErrorCode MatDenseRestoreArrayRead_MPIDense(Mat A, PetscScalar **array) 360 { 361 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 362 363 PetscFunctionBegin; 364 PetscCall(MatDenseRestoreArrayRead(a->A, (const PetscScalar **)array)); 365 PetscFunctionReturn(PETSC_SUCCESS); 366 } 367 368 static PetscErrorCode MatDenseRestoreArrayWrite_MPIDense(Mat A, PetscScalar **array) 369 { 370 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 371 372 PetscFunctionBegin; 373 PetscCall(MatDenseRestoreArrayWrite(a->A, array)); 374 PetscFunctionReturn(PETSC_SUCCESS); 375 } 376 377 static PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat, MatAssemblyType mode) 378 { 379 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 380 PetscInt nstash, reallocs; 381 382 PetscFunctionBegin; 383 if (mdn->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS); 384 385 PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range)); 386 PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs)); 387 PetscCall(PetscInfo(mdn->A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 388 PetscFunctionReturn(PETSC_SUCCESS); 389 } 390 391 static PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat, MatAssemblyType mode) 392 { 393 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 394 PetscInt i, *row, *col, flg, j, rstart, ncols; 395 PetscMPIInt n; 396 PetscScalar *val; 397 398 PetscFunctionBegin; 399 if (!mdn->donotstash && !mat->nooffprocentries) { 400 /* wait on receives */ 401 while (1) { 402 PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg)); 403 if (!flg) break; 404 405 for (i = 0; i < n;) { 406 /* Now identify the consecutive vals belonging to the same row */ 407 for (j = i, rstart = row[j]; j < n; j++) { 408 if (row[j] != rstart) break; 409 } 410 if (j < n) ncols = j - i; 411 else ncols = n - i; 412 /* Now assemble all these values with a single function call */ 413 PetscCall(MatSetValues_MPIDense(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode)); 414 i = j; 415 } 416 } 417 PetscCall(MatStashScatterEnd_Private(&mat->stash)); 418 } 419 420 PetscCall(MatAssemblyBegin(mdn->A, mode)); 421 PetscCall(MatAssemblyEnd(mdn->A, mode)); 422 PetscFunctionReturn(PETSC_SUCCESS); 423 } 424 425 static PetscErrorCode MatZeroEntries_MPIDense(Mat A) 426 { 427 Mat_MPIDense *l = (Mat_MPIDense *)A->data; 428 429 PetscFunctionBegin; 430 PetscCall(MatZeroEntries(l->A)); 431 PetscFunctionReturn(PETSC_SUCCESS); 432 } 433 434 static PetscErrorCode MatZeroRows_MPIDense(Mat A, PetscInt n, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 435 { 436 Mat_MPIDense *l = (Mat_MPIDense *)A->data; 437 PetscInt i, len, *lrows; 438 439 PetscFunctionBegin; 440 /* get locally owned rows */ 441 PetscCall(PetscLayoutMapLocal(A->rmap, n, rows, &len, &lrows, NULL)); 442 /* fix right hand side if needed */ 443 if (x && b) { 444 const PetscScalar *xx; 445 PetscScalar *bb; 446 447 PetscCall(VecGetArrayRead(x, &xx)); 448 PetscCall(VecGetArrayWrite(b, &bb)); 449 for (i = 0; i < len; ++i) bb[lrows[i]] = diag * xx[lrows[i]]; 450 PetscCall(VecRestoreArrayRead(x, &xx)); 451 PetscCall(VecRestoreArrayWrite(b, &bb)); 452 } 453 PetscCall(MatZeroRows(l->A, len, lrows, 0.0, NULL, NULL)); 454 if (diag != 0.0) { 455 Vec d; 456 457 PetscCall(MatCreateVecs(A, NULL, &d)); 458 PetscCall(VecSet(d, diag)); 459 PetscCall(MatDiagonalSet(A, d, INSERT_VALUES)); 460 PetscCall(VecDestroy(&d)); 461 } 462 PetscCall(PetscFree(lrows)); 463 PetscFunctionReturn(PETSC_SUCCESS); 464 } 465 466 static PetscErrorCode MatMult_MPIDense(Mat mat, Vec xx, Vec yy) 467 { 468 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 469 const PetscScalar *ax; 470 PetscScalar *ay; 471 PetscMemType axmtype, aymtype; 472 473 PetscFunctionBegin; 474 if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat)); 475 PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype)); 476 PetscCall(VecGetArrayAndMemType(mdn->lvec, &ay, &aymtype)); 477 PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE)); 478 PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE)); 479 PetscCall(VecRestoreArrayAndMemType(mdn->lvec, &ay)); 480 PetscCall(VecRestoreArrayReadAndMemType(xx, &ax)); 481 PetscCall((*mdn->A->ops->mult)(mdn->A, mdn->lvec, yy)); 482 PetscFunctionReturn(PETSC_SUCCESS); 483 } 484 485 static PetscErrorCode MatMultAdd_MPIDense(Mat mat, Vec xx, Vec yy, Vec zz) 486 { 487 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 488 const PetscScalar *ax; 489 PetscScalar *ay; 490 PetscMemType axmtype, aymtype; 491 492 PetscFunctionBegin; 493 if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat)); 494 PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype)); 495 PetscCall(VecGetArrayAndMemType(mdn->lvec, &ay, &aymtype)); 496 PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE)); 497 PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE)); 498 PetscCall(VecRestoreArrayAndMemType(mdn->lvec, &ay)); 499 PetscCall(VecRestoreArrayReadAndMemType(xx, &ax)); 500 PetscCall((*mdn->A->ops->multadd)(mdn->A, mdn->lvec, yy, zz)); 501 PetscFunctionReturn(PETSC_SUCCESS); 502 } 503 504 static PetscErrorCode MatMultTransposeKernel_MPIDense(Mat A, Vec xx, Vec yy, PetscBool herm) 505 { 506 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 507 const PetscScalar *ax; 508 PetscScalar *ay; 509 PetscMemType axmtype, aymtype; 510 PetscFunctionBegin; 511 if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A)); 512 PetscCall(VecSet(yy, 0.0)); 513 if (herm) PetscCall((*a->A->ops->multhermitiantranspose)(a->A, xx, a->lvec)); 514 else PetscCall((*a->A->ops->multtranspose)(a->A, xx, a->lvec)); 515 PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype)); 516 PetscCall(VecGetArrayAndMemType(yy, &ay, &aymtype)); 517 PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM)); 518 PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM)); 519 PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax)); 520 PetscCall(VecRestoreArrayAndMemType(yy, &ay)); 521 PetscFunctionReturn(PETSC_SUCCESS); 522 } 523 524 static PetscErrorCode MatMultTransposeAddKernel_MPIDense(Mat A, Vec xx, Vec yy, Vec zz, PetscBool herm) 525 { 526 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 527 const PetscScalar *ax; 528 PetscScalar *ay; 529 PetscMemType axmtype, aymtype; 530 531 PetscFunctionBegin; 532 if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A)); 533 PetscCall(VecCopy(yy, zz)); 534 if (herm) PetscCall((*a->A->ops->multhermitiantranspose)(a->A, xx, a->lvec)); 535 else PetscCall((*a->A->ops->multtranspose)(a->A, xx, a->lvec)); 536 PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype)); 537 PetscCall(VecGetArrayAndMemType(zz, &ay, &aymtype)); 538 PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM)); 539 PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM)); 540 PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax)); 541 PetscCall(VecRestoreArrayAndMemType(zz, &ay)); 542 PetscFunctionReturn(PETSC_SUCCESS); 543 } 544 545 static PetscErrorCode MatMultTranspose_MPIDense(Mat A, Vec xx, Vec yy) 546 { 547 PetscFunctionBegin; 548 PetscCall(MatMultTransposeKernel_MPIDense(A, xx, yy, PETSC_FALSE)); 549 PetscFunctionReturn(PETSC_SUCCESS); 550 } 551 552 static PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A, Vec xx, Vec yy, Vec zz) 553 { 554 PetscFunctionBegin; 555 PetscCall(MatMultTransposeAddKernel_MPIDense(A, xx, yy, zz, PETSC_FALSE)); 556 PetscFunctionReturn(PETSC_SUCCESS); 557 } 558 559 static PetscErrorCode MatMultHermitianTranspose_MPIDense(Mat A, Vec xx, Vec yy) 560 { 561 PetscFunctionBegin; 562 PetscCall(MatMultTransposeKernel_MPIDense(A, xx, yy, PETSC_TRUE)); 563 PetscFunctionReturn(PETSC_SUCCESS); 564 } 565 566 static PetscErrorCode MatMultHermitianTransposeAdd_MPIDense(Mat A, Vec xx, Vec yy, Vec zz) 567 { 568 PetscFunctionBegin; 569 PetscCall(MatMultTransposeAddKernel_MPIDense(A, xx, yy, zz, PETSC_TRUE)); 570 PetscFunctionReturn(PETSC_SUCCESS); 571 } 572 573 PetscErrorCode MatGetDiagonal_MPIDense(Mat A, Vec v) 574 { 575 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 576 PetscInt lda, len, i, nl, ng, m = A->rmap->n, radd; 577 PetscScalar *x; 578 const PetscScalar *av; 579 580 PetscFunctionBegin; 581 PetscCall(VecGetArray(v, &x)); 582 PetscCall(VecGetSize(v, &ng)); 583 PetscCheck(ng == A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming mat and vec"); 584 PetscCall(VecGetLocalSize(v, &nl)); 585 len = PetscMin(a->A->rmap->n, a->A->cmap->n); 586 radd = A->rmap->rstart * m; 587 PetscCall(MatDenseGetArrayRead(a->A, &av)); 588 PetscCall(MatDenseGetLDA(a->A, &lda)); 589 for (i = 0; i < len; i++) x[i] = av[radd + i * lda + i]; 590 PetscCall(MatDenseRestoreArrayRead(a->A, &av)); 591 PetscCall(PetscArrayzero(x + i, nl - i)); 592 PetscCall(VecRestoreArray(v, &x)); 593 PetscFunctionReturn(PETSC_SUCCESS); 594 } 595 596 static PetscErrorCode MatDestroy_MPIDense(Mat mat) 597 { 598 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 599 600 PetscFunctionBegin; 601 PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N)); 602 PetscCall(MatStashDestroy_Private(&mat->stash)); 603 PetscCheck(!mdn->vecinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 604 PetscCheck(!mdn->matinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 605 PetscCall(MatDestroy(&mdn->A)); 606 PetscCall(VecDestroy(&mdn->lvec)); 607 PetscCall(PetscSFDestroy(&mdn->Mvctx)); 608 PetscCall(VecDestroy(&mdn->cvec)); 609 PetscCall(MatDestroy(&mdn->cmat)); 610 611 PetscCall(PetscFree(mat->data)); 612 PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL)); 613 614 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetLDA_C", NULL)); 615 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseSetLDA_C", NULL)); 616 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArray_C", NULL)); 617 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArray_C", NULL)); 618 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayRead_C", NULL)); 619 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayRead_C", NULL)); 620 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayWrite_C", NULL)); 621 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayWrite_C", NULL)); 622 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDensePlaceArray_C", NULL)); 623 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseResetArray_C", NULL)); 624 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseReplaceArray_C", NULL)); 625 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", NULL)); 626 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpiaij_C", NULL)); 627 #if defined(PETSC_HAVE_ELEMENTAL) 628 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_elemental_C", NULL)); 629 #endif 630 #if defined(PETSC_HAVE_SCALAPACK) 631 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_scalapack_C", NULL)); 632 #endif 633 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIDenseSetPreallocation_C", NULL)); 634 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidense_C", NULL)); 635 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaij_C", NULL)); 636 #if defined(PETSC_HAVE_CUDA) 637 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidense_C", NULL)); 638 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijcusparse_C", NULL)); 639 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensecuda_C", NULL)); 640 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidensecuda_mpidense_C", NULL)); 641 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidensecuda_C", NULL)); 642 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C", NULL)); 643 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensecuda_mpiaij_C", NULL)); 644 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C", NULL)); 645 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArray_C", NULL)); 646 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArrayRead_C", NULL)); 647 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArrayWrite_C", NULL)); 648 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArray_C", NULL)); 649 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArrayRead_C", NULL)); 650 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArrayWrite_C", NULL)); 651 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAPlaceArray_C", NULL)); 652 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAResetArray_C", NULL)); 653 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAReplaceArray_C", NULL)); 654 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDASetPreallocation_C", NULL)); 655 #endif 656 #if defined(PETSC_HAVE_HIP) 657 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidense_C", NULL)); 658 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijhipsparse_C", NULL)); 659 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensehip_C", NULL)); 660 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidensehip_mpidense_C", NULL)); 661 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidensehip_C", NULL)); 662 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidensehip_C", NULL)); 663 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensehip_mpiaij_C", NULL)); 664 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensehip_mpiaijhipsparse_C", NULL)); 665 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArray_C", NULL)); 666 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArrayRead_C", NULL)); 667 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArrayWrite_C", NULL)); 668 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArray_C", NULL)); 669 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArrayRead_C", NULL)); 670 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArrayWrite_C", NULL)); 671 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPPlaceArray_C", NULL)); 672 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPResetArray_C", NULL)); 673 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPReplaceArray_C", NULL)); 674 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPSetPreallocation_C", NULL)); 675 #endif 676 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumn_C", NULL)); 677 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumn_C", NULL)); 678 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", NULL)); 679 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", NULL)); 680 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", NULL)); 681 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", NULL)); 682 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", NULL)); 683 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", NULL)); 684 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetSubMatrix_C", NULL)); 685 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreSubMatrix_C", NULL)); 686 687 PetscCall(PetscObjectCompose((PetscObject)mat, "DiagonalBlock", NULL)); 688 PetscFunctionReturn(PETSC_SUCCESS); 689 } 690 691 #include <petscdraw.h> 692 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat, PetscViewer viewer) 693 { 694 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 695 PetscMPIInt rank; 696 PetscViewerType vtype; 697 PetscBool iascii, isdraw; 698 PetscViewer sviewer; 699 PetscViewerFormat format; 700 701 PetscFunctionBegin; 702 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 703 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 704 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 705 if (iascii) { 706 PetscCall(PetscViewerGetType(viewer, &vtype)); 707 PetscCall(PetscViewerGetFormat(viewer, &format)); 708 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 709 MatInfo info; 710 PetscCall(MatGetInfo(mat, MAT_LOCAL, &info)); 711 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 712 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %" PetscInt_FMT " \n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, 713 (PetscInt)info.memory)); 714 PetscCall(PetscViewerFlush(viewer)); 715 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 716 if (mdn->Mvctx) PetscCall(PetscSFView(mdn->Mvctx, viewer)); 717 PetscFunctionReturn(PETSC_SUCCESS); 718 } else if (format == PETSC_VIEWER_ASCII_INFO) { 719 PetscFunctionReturn(PETSC_SUCCESS); 720 } 721 } else if (isdraw) { 722 PetscDraw draw; 723 PetscBool isnull; 724 725 PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw)); 726 PetscCall(PetscDrawIsNull(draw, &isnull)); 727 if (isnull) PetscFunctionReturn(PETSC_SUCCESS); 728 } 729 730 { 731 /* assemble the entire matrix onto first processor. */ 732 Mat A; 733 PetscInt M = mat->rmap->N, N = mat->cmap->N, m, row, i, nz; 734 PetscInt *cols; 735 PetscScalar *vals; 736 737 PetscCall(MatCreate(PetscObjectComm((PetscObject)mat), &A)); 738 if (rank == 0) { 739 PetscCall(MatSetSizes(A, M, N, M, N)); 740 } else { 741 PetscCall(MatSetSizes(A, 0, 0, M, N)); 742 } 743 /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */ 744 PetscCall(MatSetType(A, MATMPIDENSE)); 745 PetscCall(MatMPIDenseSetPreallocation(A, NULL)); 746 747 /* Copy the matrix ... This isn't the most efficient means, 748 but it's quick for now */ 749 A->insertmode = INSERT_VALUES; 750 751 row = mat->rmap->rstart; 752 m = mdn->A->rmap->n; 753 for (i = 0; i < m; i++) { 754 PetscCall(MatGetRow_MPIDense(mat, row, &nz, &cols, &vals)); 755 PetscCall(MatSetValues_MPIDense(A, 1, &row, nz, cols, vals, INSERT_VALUES)); 756 PetscCall(MatRestoreRow_MPIDense(mat, row, &nz, &cols, &vals)); 757 row++; 758 } 759 760 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 761 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 762 PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 763 if (rank == 0) { 764 PetscCall(PetscObjectSetName((PetscObject)((Mat_MPIDense *)(A->data))->A, ((PetscObject)mat)->name)); 765 PetscCall(MatView_SeqDense(((Mat_MPIDense *)(A->data))->A, sviewer)); 766 } 767 PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 768 PetscCall(MatDestroy(&A)); 769 } 770 PetscFunctionReturn(PETSC_SUCCESS); 771 } 772 773 static PetscErrorCode MatView_MPIDense(Mat mat, PetscViewer viewer) 774 { 775 PetscBool iascii, isbinary, isdraw, issocket; 776 777 PetscFunctionBegin; 778 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 779 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 780 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket)); 781 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 782 783 if (iascii || issocket || isdraw) { 784 PetscCall(MatView_MPIDense_ASCIIorDraworSocket(mat, viewer)); 785 } else if (isbinary) PetscCall(MatView_Dense_Binary(mat, viewer)); 786 PetscFunctionReturn(PETSC_SUCCESS); 787 } 788 789 static PetscErrorCode MatGetInfo_MPIDense(Mat A, MatInfoType flag, MatInfo *info) 790 { 791 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 792 Mat mdn = mat->A; 793 PetscLogDouble isend[5], irecv[5]; 794 795 PetscFunctionBegin; 796 info->block_size = 1.0; 797 798 PetscCall(MatGetInfo(mdn, MAT_LOCAL, info)); 799 800 isend[0] = info->nz_used; 801 isend[1] = info->nz_allocated; 802 isend[2] = info->nz_unneeded; 803 isend[3] = info->memory; 804 isend[4] = info->mallocs; 805 if (flag == MAT_LOCAL) { 806 info->nz_used = isend[0]; 807 info->nz_allocated = isend[1]; 808 info->nz_unneeded = isend[2]; 809 info->memory = isend[3]; 810 info->mallocs = isend[4]; 811 } else if (flag == MAT_GLOBAL_MAX) { 812 PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)A))); 813 814 info->nz_used = irecv[0]; 815 info->nz_allocated = irecv[1]; 816 info->nz_unneeded = irecv[2]; 817 info->memory = irecv[3]; 818 info->mallocs = irecv[4]; 819 } else if (flag == MAT_GLOBAL_SUM) { 820 PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)A))); 821 822 info->nz_used = irecv[0]; 823 info->nz_allocated = irecv[1]; 824 info->nz_unneeded = irecv[2]; 825 info->memory = irecv[3]; 826 info->mallocs = irecv[4]; 827 } 828 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 829 info->fill_ratio_needed = 0; 830 info->factor_mallocs = 0; 831 PetscFunctionReturn(PETSC_SUCCESS); 832 } 833 834 static PetscErrorCode MatSetOption_MPIDense(Mat A, MatOption op, PetscBool flg) 835 { 836 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 837 838 PetscFunctionBegin; 839 switch (op) { 840 case MAT_NEW_NONZERO_LOCATIONS: 841 case MAT_NEW_NONZERO_LOCATION_ERR: 842 case MAT_NEW_NONZERO_ALLOCATION_ERR: 843 MatCheckPreallocated(A, 1); 844 PetscCall(MatSetOption(a->A, op, flg)); 845 break; 846 case MAT_ROW_ORIENTED: 847 MatCheckPreallocated(A, 1); 848 a->roworiented = flg; 849 PetscCall(MatSetOption(a->A, op, flg)); 850 break; 851 case MAT_FORCE_DIAGONAL_ENTRIES: 852 case MAT_KEEP_NONZERO_PATTERN: 853 case MAT_USE_HASH_TABLE: 854 case MAT_SORTED_FULL: 855 PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op])); 856 break; 857 case MAT_IGNORE_OFF_PROC_ENTRIES: 858 a->donotstash = flg; 859 break; 860 case MAT_SYMMETRIC: 861 case MAT_STRUCTURALLY_SYMMETRIC: 862 case MAT_HERMITIAN: 863 case MAT_SYMMETRY_ETERNAL: 864 case MAT_STRUCTURAL_SYMMETRY_ETERNAL: 865 case MAT_SPD: 866 case MAT_IGNORE_LOWER_TRIANGULAR: 867 case MAT_IGNORE_ZERO_ENTRIES: 868 case MAT_SPD_ETERNAL: 869 /* if the diagonal matrix is square it inherits some of the properties above */ 870 PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op])); 871 break; 872 default: 873 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %s", MatOptions[op]); 874 } 875 PetscFunctionReturn(PETSC_SUCCESS); 876 } 877 878 static PetscErrorCode MatDiagonalScale_MPIDense(Mat A, Vec ll, Vec rr) 879 { 880 Mat_MPIDense *mdn = (Mat_MPIDense *)A->data; 881 const PetscScalar *l; 882 PetscScalar x, *v, *vv, *r; 883 PetscInt i, j, s2a, s3a, s2, s3, m = mdn->A->rmap->n, n = mdn->A->cmap->n, lda; 884 885 PetscFunctionBegin; 886 PetscCall(MatDenseGetArray(mdn->A, &vv)); 887 PetscCall(MatDenseGetLDA(mdn->A, &lda)); 888 PetscCall(MatGetLocalSize(A, &s2, &s3)); 889 if (ll) { 890 PetscCall(VecGetLocalSize(ll, &s2a)); 891 PetscCheck(s2a == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Left scaling vector non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT, s2a, s2); 892 PetscCall(VecGetArrayRead(ll, &l)); 893 for (i = 0; i < m; i++) { 894 x = l[i]; 895 v = vv + i; 896 for (j = 0; j < n; j++) { 897 (*v) *= x; 898 v += lda; 899 } 900 } 901 PetscCall(VecRestoreArrayRead(ll, &l)); 902 PetscCall(PetscLogFlops(1.0 * n * m)); 903 } 904 if (rr) { 905 const PetscScalar *ar; 906 907 PetscCall(VecGetLocalSize(rr, &s3a)); 908 PetscCheck(s3a == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Right scaling vec non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT ".", s3a, s3); 909 PetscCall(VecGetArrayRead(rr, &ar)); 910 if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A)); 911 PetscCall(VecGetArray(mdn->lvec, &r)); 912 PetscCall(PetscSFBcastBegin(mdn->Mvctx, MPIU_SCALAR, ar, r, MPI_REPLACE)); 913 PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ar, r, MPI_REPLACE)); 914 PetscCall(VecRestoreArrayRead(rr, &ar)); 915 for (i = 0; i < n; i++) { 916 x = r[i]; 917 v = vv + i * lda; 918 for (j = 0; j < m; j++) (*v++) *= x; 919 } 920 PetscCall(VecRestoreArray(mdn->lvec, &r)); 921 PetscCall(PetscLogFlops(1.0 * n * m)); 922 } 923 PetscCall(MatDenseRestoreArray(mdn->A, &vv)); 924 PetscFunctionReturn(PETSC_SUCCESS); 925 } 926 927 static PetscErrorCode MatNorm_MPIDense(Mat A, NormType type, PetscReal *nrm) 928 { 929 Mat_MPIDense *mdn = (Mat_MPIDense *)A->data; 930 PetscInt i, j; 931 PetscMPIInt size; 932 PetscReal sum = 0.0; 933 const PetscScalar *av, *v; 934 935 PetscFunctionBegin; 936 PetscCall(MatDenseGetArrayRead(mdn->A, &av)); 937 v = av; 938 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 939 if (size == 1) { 940 PetscCall(MatNorm(mdn->A, type, nrm)); 941 } else { 942 if (type == NORM_FROBENIUS) { 943 for (i = 0; i < mdn->A->cmap->n * mdn->A->rmap->n; i++) { 944 sum += PetscRealPart(PetscConj(*v) * (*v)); 945 v++; 946 } 947 PetscCall(MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A))); 948 *nrm = PetscSqrtReal(*nrm); 949 PetscCall(PetscLogFlops(2.0 * mdn->A->cmap->n * mdn->A->rmap->n)); 950 } else if (type == NORM_1) { 951 PetscReal *tmp, *tmp2; 952 PetscCall(PetscCalloc2(A->cmap->N, &tmp, A->cmap->N, &tmp2)); 953 *nrm = 0.0; 954 v = av; 955 for (j = 0; j < mdn->A->cmap->n; j++) { 956 for (i = 0; i < mdn->A->rmap->n; i++) { 957 tmp[j] += PetscAbsScalar(*v); 958 v++; 959 } 960 } 961 PetscCall(MPIU_Allreduce(tmp, tmp2, A->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A))); 962 for (j = 0; j < A->cmap->N; j++) { 963 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 964 } 965 PetscCall(PetscFree2(tmp, tmp2)); 966 PetscCall(PetscLogFlops(A->cmap->n * A->rmap->n)); 967 } else if (type == NORM_INFINITY) { /* max row norm */ 968 PetscReal ntemp; 969 PetscCall(MatNorm(mdn->A, type, &ntemp)); 970 PetscCall(MPIU_Allreduce(&ntemp, nrm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A))); 971 } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "No support for two norm"); 972 } 973 PetscCall(MatDenseRestoreArrayRead(mdn->A, &av)); 974 PetscFunctionReturn(PETSC_SUCCESS); 975 } 976 977 static PetscErrorCode MatTranspose_MPIDense(Mat A, MatReuse reuse, Mat *matout) 978 { 979 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 980 Mat B; 981 PetscInt M = A->rmap->N, N = A->cmap->N, m, n, *rwork, rstart = A->rmap->rstart; 982 PetscInt j, i, lda; 983 PetscScalar *v; 984 985 PetscFunctionBegin; 986 if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout)); 987 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) { 988 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 989 PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M)); 990 PetscCall(MatSetType(B, ((PetscObject)A)->type_name)); 991 PetscCall(MatMPIDenseSetPreallocation(B, NULL)); 992 } else B = *matout; 993 994 m = a->A->rmap->n; 995 n = a->A->cmap->n; 996 PetscCall(MatDenseGetArrayRead(a->A, (const PetscScalar **)&v)); 997 PetscCall(MatDenseGetLDA(a->A, &lda)); 998 PetscCall(PetscMalloc1(m, &rwork)); 999 for (i = 0; i < m; i++) rwork[i] = rstart + i; 1000 for (j = 0; j < n; j++) { 1001 PetscCall(MatSetValues(B, 1, &j, m, rwork, v, INSERT_VALUES)); 1002 v = PetscSafePointerPlusOffset(v, lda); 1003 } 1004 PetscCall(MatDenseRestoreArrayRead(a->A, (const PetscScalar **)&v)); 1005 PetscCall(PetscFree(rwork)); 1006 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 1007 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 1008 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 1009 *matout = B; 1010 } else { 1011 PetscCall(MatHeaderMerge(A, &B)); 1012 } 1013 PetscFunctionReturn(PETSC_SUCCESS); 1014 } 1015 1016 static PetscErrorCode MatDuplicate_MPIDense(Mat, MatDuplicateOption, Mat *); 1017 PETSC_INTERN PetscErrorCode MatScale_MPIDense(Mat, PetscScalar); 1018 1019 static PetscErrorCode MatSetUp_MPIDense(Mat A) 1020 { 1021 PetscFunctionBegin; 1022 PetscCall(PetscLayoutSetUp(A->rmap)); 1023 PetscCall(PetscLayoutSetUp(A->cmap)); 1024 if (!A->preallocated) PetscCall(MatMPIDenseSetPreallocation(A, NULL)); 1025 PetscFunctionReturn(PETSC_SUCCESS); 1026 } 1027 1028 static PetscErrorCode MatAXPY_MPIDense(Mat Y, PetscScalar alpha, Mat X, MatStructure str) 1029 { 1030 Mat_MPIDense *A = (Mat_MPIDense *)Y->data, *B = (Mat_MPIDense *)X->data; 1031 1032 PetscFunctionBegin; 1033 PetscCall(MatAXPY(A->A, alpha, B->A, str)); 1034 PetscFunctionReturn(PETSC_SUCCESS); 1035 } 1036 1037 static PetscErrorCode MatConjugate_MPIDense(Mat mat) 1038 { 1039 Mat_MPIDense *a = (Mat_MPIDense *)mat->data; 1040 1041 PetscFunctionBegin; 1042 PetscCall(MatConjugate(a->A)); 1043 PetscFunctionReturn(PETSC_SUCCESS); 1044 } 1045 1046 static PetscErrorCode MatRealPart_MPIDense(Mat A) 1047 { 1048 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1049 1050 PetscFunctionBegin; 1051 PetscCall(MatRealPart(a->A)); 1052 PetscFunctionReturn(PETSC_SUCCESS); 1053 } 1054 1055 static PetscErrorCode MatImaginaryPart_MPIDense(Mat A) 1056 { 1057 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1058 1059 PetscFunctionBegin; 1060 PetscCall(MatImaginaryPart(a->A)); 1061 PetscFunctionReturn(PETSC_SUCCESS); 1062 } 1063 1064 static PetscErrorCode MatGetColumnVector_MPIDense(Mat A, Vec v, PetscInt col) 1065 { 1066 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1067 1068 PetscFunctionBegin; 1069 PetscCheck(a->A, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Missing local matrix"); 1070 PetscCheck(a->A->ops->getcolumnvector, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Missing get column operation"); 1071 PetscCall((*a->A->ops->getcolumnvector)(a->A, v, col)); 1072 PetscFunctionReturn(PETSC_SUCCESS); 1073 } 1074 1075 PETSC_INTERN PetscErrorCode MatGetColumnReductions_SeqDense(Mat, PetscInt, PetscReal *); 1076 1077 static PetscErrorCode MatGetColumnReductions_MPIDense(Mat A, PetscInt type, PetscReal *reductions) 1078 { 1079 PetscInt i, m, n; 1080 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1081 PetscReal *work; 1082 1083 PetscFunctionBegin; 1084 PetscCall(MatGetSize(A, &m, &n)); 1085 PetscCall(PetscMalloc1(n, &work)); 1086 if (type == REDUCTION_MEAN_REALPART) { 1087 PetscCall(MatGetColumnReductions_SeqDense(a->A, (PetscInt)REDUCTION_SUM_REALPART, work)); 1088 } else if (type == REDUCTION_MEAN_IMAGINARYPART) { 1089 PetscCall(MatGetColumnReductions_SeqDense(a->A, (PetscInt)REDUCTION_SUM_IMAGINARYPART, work)); 1090 } else { 1091 PetscCall(MatGetColumnReductions_SeqDense(a->A, type, work)); 1092 } 1093 if (type == NORM_2) { 1094 for (i = 0; i < n; i++) work[i] *= work[i]; 1095 } 1096 if (type == NORM_INFINITY) { 1097 PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_MAX, A->hdr.comm)); 1098 } else { 1099 PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_SUM, A->hdr.comm)); 1100 } 1101 PetscCall(PetscFree(work)); 1102 if (type == NORM_2) { 1103 for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]); 1104 } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 1105 for (i = 0; i < n; i++) reductions[i] /= m; 1106 } 1107 PetscFunctionReturn(PETSC_SUCCESS); 1108 } 1109 1110 static PetscErrorCode MatSetRandom_MPIDense(Mat x, PetscRandom rctx) 1111 { 1112 Mat_MPIDense *d = (Mat_MPIDense *)x->data; 1113 1114 PetscFunctionBegin; 1115 PetscCall(MatSetRandom(d->A, rctx)); 1116 #if defined(PETSC_HAVE_DEVICE) 1117 x->offloadmask = d->A->offloadmask; 1118 #endif 1119 PetscFunctionReturn(PETSC_SUCCESS); 1120 } 1121 1122 static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A, PetscBool *missing, PetscInt *d) 1123 { 1124 PetscFunctionBegin; 1125 *missing = PETSC_FALSE; 1126 PetscFunctionReturn(PETSC_SUCCESS); 1127 } 1128 1129 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat, Mat, PetscReal, Mat); 1130 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat, Mat, Mat); 1131 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat, Mat, PetscReal, Mat); 1132 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat, Mat, Mat); 1133 static PetscErrorCode MatEqual_MPIDense(Mat, Mat, PetscBool *); 1134 static PetscErrorCode MatLoad_MPIDense(Mat, PetscViewer); 1135 static PetscErrorCode MatProductSetFromOptions_MPIDense(Mat); 1136 1137 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense, 1138 MatGetRow_MPIDense, 1139 MatRestoreRow_MPIDense, 1140 MatMult_MPIDense, 1141 /* 4*/ MatMultAdd_MPIDense, 1142 MatMultTranspose_MPIDense, 1143 MatMultTransposeAdd_MPIDense, 1144 NULL, 1145 NULL, 1146 NULL, 1147 /* 10*/ NULL, 1148 NULL, 1149 NULL, 1150 NULL, 1151 MatTranspose_MPIDense, 1152 /* 15*/ MatGetInfo_MPIDense, 1153 MatEqual_MPIDense, 1154 MatGetDiagonal_MPIDense, 1155 MatDiagonalScale_MPIDense, 1156 MatNorm_MPIDense, 1157 /* 20*/ MatAssemblyBegin_MPIDense, 1158 MatAssemblyEnd_MPIDense, 1159 MatSetOption_MPIDense, 1160 MatZeroEntries_MPIDense, 1161 /* 24*/ MatZeroRows_MPIDense, 1162 NULL, 1163 NULL, 1164 NULL, 1165 NULL, 1166 /* 29*/ MatSetUp_MPIDense, 1167 NULL, 1168 NULL, 1169 MatGetDiagonalBlock_MPIDense, 1170 NULL, 1171 /* 34*/ MatDuplicate_MPIDense, 1172 NULL, 1173 NULL, 1174 NULL, 1175 NULL, 1176 /* 39*/ MatAXPY_MPIDense, 1177 MatCreateSubMatrices_MPIDense, 1178 NULL, 1179 MatGetValues_MPIDense, 1180 MatCopy_MPIDense, 1181 /* 44*/ NULL, 1182 MatScale_MPIDense, 1183 MatShift_MPIDense, 1184 NULL, 1185 NULL, 1186 /* 49*/ MatSetRandom_MPIDense, 1187 NULL, 1188 NULL, 1189 NULL, 1190 NULL, 1191 /* 54*/ NULL, 1192 NULL, 1193 NULL, 1194 NULL, 1195 NULL, 1196 /* 59*/ MatCreateSubMatrix_MPIDense, 1197 MatDestroy_MPIDense, 1198 MatView_MPIDense, 1199 NULL, 1200 NULL, 1201 /* 64*/ NULL, 1202 NULL, 1203 NULL, 1204 NULL, 1205 NULL, 1206 /* 69*/ NULL, 1207 NULL, 1208 NULL, 1209 NULL, 1210 NULL, 1211 /* 74*/ NULL, 1212 NULL, 1213 NULL, 1214 NULL, 1215 NULL, 1216 /* 79*/ NULL, 1217 NULL, 1218 NULL, 1219 NULL, 1220 /* 83*/ MatLoad_MPIDense, 1221 NULL, 1222 NULL, 1223 NULL, 1224 NULL, 1225 NULL, 1226 /* 89*/ NULL, 1227 NULL, 1228 NULL, 1229 NULL, 1230 NULL, 1231 /* 94*/ NULL, 1232 NULL, 1233 MatMatTransposeMultSymbolic_MPIDense_MPIDense, 1234 MatMatTransposeMultNumeric_MPIDense_MPIDense, 1235 NULL, 1236 /* 99*/ MatProductSetFromOptions_MPIDense, 1237 NULL, 1238 NULL, 1239 MatConjugate_MPIDense, 1240 NULL, 1241 /*104*/ NULL, 1242 MatRealPart_MPIDense, 1243 MatImaginaryPart_MPIDense, 1244 NULL, 1245 NULL, 1246 /*109*/ NULL, 1247 NULL, 1248 NULL, 1249 MatGetColumnVector_MPIDense, 1250 MatMissingDiagonal_MPIDense, 1251 /*114*/ NULL, 1252 NULL, 1253 NULL, 1254 NULL, 1255 NULL, 1256 /*119*/ NULL, 1257 NULL, 1258 MatMultHermitianTranspose_MPIDense, 1259 MatMultHermitianTransposeAdd_MPIDense, 1260 NULL, 1261 /*124*/ NULL, 1262 MatGetColumnReductions_MPIDense, 1263 NULL, 1264 NULL, 1265 NULL, 1266 /*129*/ NULL, 1267 NULL, 1268 MatTransposeMatMultSymbolic_MPIDense_MPIDense, 1269 MatTransposeMatMultNumeric_MPIDense_MPIDense, 1270 NULL, 1271 /*134*/ NULL, 1272 NULL, 1273 NULL, 1274 NULL, 1275 NULL, 1276 /*139*/ NULL, 1277 NULL, 1278 NULL, 1279 NULL, 1280 NULL, 1281 MatCreateMPIMatConcatenateSeqMat_MPIDense, 1282 /*145*/ NULL, 1283 NULL, 1284 NULL, 1285 NULL, 1286 NULL, 1287 /*150*/ NULL, 1288 NULL, 1289 NULL}; 1290 1291 static PetscErrorCode MatMPIDenseSetPreallocation_MPIDense(Mat mat, PetscScalar *data) 1292 { 1293 Mat_MPIDense *a = (Mat_MPIDense *)mat->data; 1294 MatType mtype = MATSEQDENSE; 1295 1296 PetscFunctionBegin; 1297 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1298 PetscCall(PetscLayoutSetUp(mat->rmap)); 1299 PetscCall(PetscLayoutSetUp(mat->cmap)); 1300 if (!a->A) { 1301 PetscCall(MatCreate(PETSC_COMM_SELF, &a->A)); 1302 PetscCall(MatSetSizes(a->A, mat->rmap->n, mat->cmap->N, mat->rmap->n, mat->cmap->N)); 1303 } 1304 #if defined(PETSC_HAVE_CUDA) 1305 PetscBool iscuda; 1306 PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPIDENSECUDA, &iscuda)); 1307 if (iscuda) mtype = MATSEQDENSECUDA; 1308 #endif 1309 #if defined(PETSC_HAVE_HIP) 1310 PetscBool iship; 1311 PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPIDENSEHIP, &iship)); 1312 if (iship) mtype = MATSEQDENSEHIP; 1313 #endif 1314 PetscCall(MatSetType(a->A, mtype)); 1315 PetscCall(MatSeqDenseSetPreallocation(a->A, data)); 1316 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 1317 mat->offloadmask = a->A->offloadmask; 1318 #endif 1319 mat->preallocated = PETSC_TRUE; 1320 mat->assembled = PETSC_TRUE; 1321 PetscFunctionReturn(PETSC_SUCCESS); 1322 } 1323 1324 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIDense(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 1325 { 1326 Mat B, C; 1327 1328 PetscFunctionBegin; 1329 PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &C)); 1330 PetscCall(MatConvert_SeqAIJ_SeqDense(C, MATSEQDENSE, MAT_INITIAL_MATRIX, &B)); 1331 PetscCall(MatDestroy(&C)); 1332 if (reuse == MAT_REUSE_MATRIX) { 1333 C = *newmat; 1334 } else C = NULL; 1335 PetscCall(MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A), B, A->cmap->n, !C ? MAT_INITIAL_MATRIX : MAT_REUSE_MATRIX, &C)); 1336 PetscCall(MatDestroy(&B)); 1337 if (reuse == MAT_INPLACE_MATRIX) { 1338 PetscCall(MatHeaderReplace(A, &C)); 1339 } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C; 1340 PetscFunctionReturn(PETSC_SUCCESS); 1341 } 1342 1343 static PetscErrorCode MatConvert_MPIDense_MPIAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 1344 { 1345 Mat B, C; 1346 1347 PetscFunctionBegin; 1348 PetscCall(MatDenseGetLocalMatrix(A, &C)); 1349 PetscCall(MatConvert_SeqDense_SeqAIJ(C, MATSEQAIJ, MAT_INITIAL_MATRIX, &B)); 1350 if (reuse == MAT_REUSE_MATRIX) { 1351 C = *newmat; 1352 } else C = NULL; 1353 PetscCall(MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A), B, A->cmap->n, !C ? MAT_INITIAL_MATRIX : MAT_REUSE_MATRIX, &C)); 1354 PetscCall(MatDestroy(&B)); 1355 if (reuse == MAT_INPLACE_MATRIX) { 1356 PetscCall(MatHeaderReplace(A, &C)); 1357 } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C; 1358 PetscFunctionReturn(PETSC_SUCCESS); 1359 } 1360 1361 #if defined(PETSC_HAVE_ELEMENTAL) 1362 PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 1363 { 1364 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1365 Mat mat_elemental; 1366 PetscScalar *v; 1367 PetscInt m = A->rmap->n, N = A->cmap->N, rstart = A->rmap->rstart, i, *rows, *cols, lda; 1368 1369 PetscFunctionBegin; 1370 if (reuse == MAT_REUSE_MATRIX) { 1371 mat_elemental = *newmat; 1372 PetscCall(MatZeroEntries(*newmat)); 1373 } else { 1374 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental)); 1375 PetscCall(MatSetSizes(mat_elemental, PETSC_DECIDE, PETSC_DECIDE, A->rmap->N, A->cmap->N)); 1376 PetscCall(MatSetType(mat_elemental, MATELEMENTAL)); 1377 PetscCall(MatSetUp(mat_elemental)); 1378 PetscCall(MatSetOption(mat_elemental, MAT_ROW_ORIENTED, PETSC_FALSE)); 1379 } 1380 1381 PetscCall(PetscMalloc2(m, &rows, N, &cols)); 1382 for (i = 0; i < N; i++) cols[i] = i; 1383 for (i = 0; i < m; i++) rows[i] = rstart + i; 1384 1385 /* PETSc-Elemental interface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */ 1386 PetscCall(MatDenseGetArray(A, &v)); 1387 PetscCall(MatDenseGetLDA(a->A, &lda)); 1388 if (lda == m) PetscCall(MatSetValues(mat_elemental, m, rows, N, cols, v, ADD_VALUES)); 1389 else { 1390 for (i = 0; i < N; i++) PetscCall(MatSetValues(mat_elemental, m, rows, 1, &i, v + lda * i, ADD_VALUES)); 1391 } 1392 PetscCall(MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY)); 1393 PetscCall(MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY)); 1394 PetscCall(MatDenseRestoreArray(A, &v)); 1395 PetscCall(PetscFree2(rows, cols)); 1396 1397 if (reuse == MAT_INPLACE_MATRIX) { 1398 PetscCall(MatHeaderReplace(A, &mat_elemental)); 1399 } else { 1400 *newmat = mat_elemental; 1401 } 1402 PetscFunctionReturn(PETSC_SUCCESS); 1403 } 1404 #endif 1405 1406 static PetscErrorCode MatDenseGetColumn_MPIDense(Mat A, PetscInt col, PetscScalar **vals) 1407 { 1408 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 1409 1410 PetscFunctionBegin; 1411 PetscCall(MatDenseGetColumn(mat->A, col, vals)); 1412 PetscFunctionReturn(PETSC_SUCCESS); 1413 } 1414 1415 static PetscErrorCode MatDenseRestoreColumn_MPIDense(Mat A, PetscScalar **vals) 1416 { 1417 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 1418 1419 PetscFunctionBegin; 1420 PetscCall(MatDenseRestoreColumn(mat->A, vals)); 1421 PetscFunctionReturn(PETSC_SUCCESS); 1422 } 1423 1424 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIDense(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat) 1425 { 1426 Mat_MPIDense *mat; 1427 PetscInt m, nloc, N; 1428 1429 PetscFunctionBegin; 1430 PetscCall(MatGetSize(inmat, &m, &N)); 1431 PetscCall(MatGetLocalSize(inmat, NULL, &nloc)); 1432 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 1433 PetscInt sum; 1434 1435 if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnership(comm, &n, &N)); 1436 /* Check sum(n) = N */ 1437 PetscCall(MPIU_Allreduce(&n, &sum, 1, MPIU_INT, MPI_SUM, comm)); 1438 PetscCheck(sum == N, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT, sum, N); 1439 1440 PetscCall(MatCreateDense(comm, m, n, PETSC_DETERMINE, N, NULL, outmat)); 1441 PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 1442 } 1443 1444 /* numeric phase */ 1445 mat = (Mat_MPIDense *)(*outmat)->data; 1446 PetscCall(MatCopy(inmat, mat->A, SAME_NONZERO_PATTERN)); 1447 PetscFunctionReturn(PETSC_SUCCESS); 1448 } 1449 1450 PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat A, PetscInt col, Vec *v) 1451 { 1452 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1453 PetscInt lda; 1454 1455 PetscFunctionBegin; 1456 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1457 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1458 if (!a->cvec) PetscCall(MatDenseCreateColumnVec_Private(A, &a->cvec)); 1459 a->vecinuse = col + 1; 1460 PetscCall(MatDenseGetLDA(a->A, &lda)); 1461 PetscCall(MatDenseGetArray(a->A, (PetscScalar **)&a->ptrinuse)); 1462 PetscCall(VecPlaceArray(a->cvec, PetscSafePointerPlusOffset(a->ptrinuse, (size_t)col * (size_t)lda))); 1463 *v = a->cvec; 1464 PetscFunctionReturn(PETSC_SUCCESS); 1465 } 1466 1467 PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat A, PetscInt col, Vec *v) 1468 { 1469 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1470 1471 PetscFunctionBegin; 1472 PetscCheck(a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first"); 1473 PetscCheck(a->cvec, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing internal column vector"); 1474 a->vecinuse = 0; 1475 PetscCall(MatDenseRestoreArray(a->A, (PetscScalar **)&a->ptrinuse)); 1476 PetscCall(VecResetArray(a->cvec)); 1477 if (v) *v = NULL; 1478 PetscFunctionReturn(PETSC_SUCCESS); 1479 } 1480 1481 PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat A, PetscInt col, Vec *v) 1482 { 1483 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1484 PetscInt lda; 1485 1486 PetscFunctionBegin; 1487 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1488 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1489 if (!a->cvec) PetscCall(MatDenseCreateColumnVec_Private(A, &a->cvec)); 1490 a->vecinuse = col + 1; 1491 PetscCall(MatDenseGetLDA(a->A, &lda)); 1492 PetscCall(MatDenseGetArrayRead(a->A, &a->ptrinuse)); 1493 PetscCall(VecPlaceArray(a->cvec, PetscSafePointerPlusOffset(a->ptrinuse, (size_t)col * (size_t)lda))); 1494 PetscCall(VecLockReadPush(a->cvec)); 1495 *v = a->cvec; 1496 PetscFunctionReturn(PETSC_SUCCESS); 1497 } 1498 1499 PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat A, PetscInt col, Vec *v) 1500 { 1501 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1502 1503 PetscFunctionBegin; 1504 PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first"); 1505 PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector"); 1506 a->vecinuse = 0; 1507 PetscCall(MatDenseRestoreArrayRead(a->A, &a->ptrinuse)); 1508 PetscCall(VecLockReadPop(a->cvec)); 1509 PetscCall(VecResetArray(a->cvec)); 1510 if (v) *v = NULL; 1511 PetscFunctionReturn(PETSC_SUCCESS); 1512 } 1513 1514 PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat A, PetscInt col, Vec *v) 1515 { 1516 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1517 PetscInt lda; 1518 1519 PetscFunctionBegin; 1520 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1521 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1522 if (!a->cvec) PetscCall(MatDenseCreateColumnVec_Private(A, &a->cvec)); 1523 a->vecinuse = col + 1; 1524 PetscCall(MatDenseGetLDA(a->A, &lda)); 1525 PetscCall(MatDenseGetArrayWrite(a->A, (PetscScalar **)&a->ptrinuse)); 1526 PetscCall(VecPlaceArray(a->cvec, PetscSafePointerPlusOffset(a->ptrinuse, (size_t)col * (size_t)lda))); 1527 *v = a->cvec; 1528 PetscFunctionReturn(PETSC_SUCCESS); 1529 } 1530 1531 PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat A, PetscInt col, Vec *v) 1532 { 1533 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1534 1535 PetscFunctionBegin; 1536 PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first"); 1537 PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector"); 1538 a->vecinuse = 0; 1539 PetscCall(MatDenseRestoreArrayWrite(a->A, (PetscScalar **)&a->ptrinuse)); 1540 PetscCall(VecResetArray(a->cvec)); 1541 if (v) *v = NULL; 1542 PetscFunctionReturn(PETSC_SUCCESS); 1543 } 1544 1545 static PetscErrorCode MatDenseGetSubMatrix_MPIDense(Mat A, PetscInt rbegin, PetscInt rend, PetscInt cbegin, PetscInt cend, Mat *v) 1546 { 1547 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1548 Mat_MPIDense *c; 1549 MPI_Comm comm; 1550 PetscInt pbegin, pend; 1551 1552 PetscFunctionBegin; 1553 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 1554 PetscCheck(!a->vecinuse, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1555 PetscCheck(!a->matinuse, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1556 pbegin = PetscMax(0, PetscMin(A->rmap->rend, rbegin) - A->rmap->rstart); 1557 pend = PetscMin(A->rmap->n, PetscMax(0, rend - A->rmap->rstart)); 1558 if (!a->cmat) { 1559 PetscCall(MatCreate(comm, &a->cmat)); 1560 PetscCall(MatSetType(a->cmat, ((PetscObject)A)->type_name)); 1561 if (rend - rbegin == A->rmap->N) PetscCall(PetscLayoutReference(A->rmap, &a->cmat->rmap)); 1562 else { 1563 PetscCall(PetscLayoutSetLocalSize(a->cmat->rmap, pend - pbegin)); 1564 PetscCall(PetscLayoutSetSize(a->cmat->rmap, rend - rbegin)); 1565 PetscCall(PetscLayoutSetUp(a->cmat->rmap)); 1566 } 1567 PetscCall(PetscLayoutSetSize(a->cmat->cmap, cend - cbegin)); 1568 PetscCall(PetscLayoutSetUp(a->cmat->cmap)); 1569 } else { 1570 PetscBool same = (PetscBool)(rend - rbegin == a->cmat->rmap->N); 1571 if (same && a->cmat->rmap->N != A->rmap->N) { 1572 same = (PetscBool)(pend - pbegin == a->cmat->rmap->n); 1573 PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &same, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A))); 1574 } 1575 if (!same) { 1576 PetscCall(PetscLayoutDestroy(&a->cmat->rmap)); 1577 PetscCall(PetscLayoutCreate(comm, &a->cmat->rmap)); 1578 PetscCall(PetscLayoutSetLocalSize(a->cmat->rmap, pend - pbegin)); 1579 PetscCall(PetscLayoutSetSize(a->cmat->rmap, rend - rbegin)); 1580 PetscCall(PetscLayoutSetUp(a->cmat->rmap)); 1581 } 1582 if (cend - cbegin != a->cmat->cmap->N) { 1583 PetscCall(PetscLayoutDestroy(&a->cmat->cmap)); 1584 PetscCall(PetscLayoutCreate(comm, &a->cmat->cmap)); 1585 PetscCall(PetscLayoutSetSize(a->cmat->cmap, cend - cbegin)); 1586 PetscCall(PetscLayoutSetUp(a->cmat->cmap)); 1587 } 1588 } 1589 c = (Mat_MPIDense *)a->cmat->data; 1590 PetscCheck(!c->A, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1591 PetscCall(MatDenseGetSubMatrix(a->A, pbegin, pend, cbegin, cend, &c->A)); 1592 1593 a->cmat->preallocated = PETSC_TRUE; 1594 a->cmat->assembled = PETSC_TRUE; 1595 #if defined(PETSC_HAVE_DEVICE) 1596 a->cmat->offloadmask = c->A->offloadmask; 1597 #endif 1598 a->matinuse = cbegin + 1; 1599 *v = a->cmat; 1600 PetscFunctionReturn(PETSC_SUCCESS); 1601 } 1602 1603 static PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat A, Mat *v) 1604 { 1605 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1606 Mat_MPIDense *c; 1607 1608 PetscFunctionBegin; 1609 PetscCheck(a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetSubMatrix() first"); 1610 PetscCheck(a->cmat, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal matrix"); 1611 PetscCheck(*v == a->cmat, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Not the matrix obtained from MatDenseGetSubMatrix()"); 1612 a->matinuse = 0; 1613 c = (Mat_MPIDense *)a->cmat->data; 1614 PetscCall(MatDenseRestoreSubMatrix(a->A, &c->A)); 1615 if (v) *v = NULL; 1616 #if defined(PETSC_HAVE_DEVICE) 1617 A->offloadmask = a->A->offloadmask; 1618 #endif 1619 PetscFunctionReturn(PETSC_SUCCESS); 1620 } 1621 1622 /*MC 1623 MATMPIDENSE - MATMPIDENSE = "mpidense" - A matrix type to be used for distributed dense matrices. 1624 1625 Options Database Key: 1626 . -mat_type mpidense - sets the matrix type to `MATMPIDENSE` during a call to `MatSetFromOptions()` 1627 1628 Level: beginner 1629 1630 .seealso: [](ch_matrices), `Mat`, `MatCreateDense()`, `MATSEQDENSE`, `MATDENSE` 1631 M*/ 1632 PetscErrorCode MatCreate_MPIDense(Mat mat) 1633 { 1634 Mat_MPIDense *a; 1635 1636 PetscFunctionBegin; 1637 PetscCall(PetscNew(&a)); 1638 mat->data = (void *)a; 1639 mat->ops[0] = MatOps_Values; 1640 1641 mat->insertmode = NOT_SET_VALUES; 1642 1643 /* build cache for off array entries formed */ 1644 a->donotstash = PETSC_FALSE; 1645 1646 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)mat), 1, &mat->stash)); 1647 1648 /* stuff used for matrix vector multiply */ 1649 a->lvec = NULL; 1650 a->Mvctx = NULL; 1651 a->roworiented = PETSC_TRUE; 1652 1653 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetLDA_C", MatDenseGetLDA_MPIDense)); 1654 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseSetLDA_C", MatDenseSetLDA_MPIDense)); 1655 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArray_C", MatDenseGetArray_MPIDense)); 1656 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArray_C", MatDenseRestoreArray_MPIDense)); 1657 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayRead_C", MatDenseGetArrayRead_MPIDense)); 1658 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayRead_C", MatDenseRestoreArrayRead_MPIDense)); 1659 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayWrite_C", MatDenseGetArrayWrite_MPIDense)); 1660 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayWrite_C", MatDenseRestoreArrayWrite_MPIDense)); 1661 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDensePlaceArray_C", MatDensePlaceArray_MPIDense)); 1662 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseResetArray_C", MatDenseResetArray_MPIDense)); 1663 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseReplaceArray_C", MatDenseReplaceArray_MPIDense)); 1664 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", MatDenseGetColumnVec_MPIDense)); 1665 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", MatDenseRestoreColumnVec_MPIDense)); 1666 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", MatDenseGetColumnVecRead_MPIDense)); 1667 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", MatDenseRestoreColumnVecRead_MPIDense)); 1668 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", MatDenseGetColumnVecWrite_MPIDense)); 1669 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", MatDenseRestoreColumnVecWrite_MPIDense)); 1670 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetSubMatrix_C", MatDenseGetSubMatrix_MPIDense)); 1671 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreSubMatrix_C", MatDenseRestoreSubMatrix_MPIDense)); 1672 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", MatConvert_MPIAIJ_MPIDense)); 1673 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpiaij_C", MatConvert_MPIDense_MPIAIJ)); 1674 #if defined(PETSC_HAVE_ELEMENTAL) 1675 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_elemental_C", MatConvert_MPIDense_Elemental)); 1676 #endif 1677 #if defined(PETSC_HAVE_SCALAPACK) 1678 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_scalapack_C", MatConvert_Dense_ScaLAPACK)); 1679 #endif 1680 #if defined(PETSC_HAVE_CUDA) 1681 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensecuda_C", MatConvert_MPIDense_MPIDenseCUDA)); 1682 #endif 1683 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIDenseSetPreallocation_C", MatMPIDenseSetPreallocation_MPIDense)); 1684 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense)); 1685 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaij_C", MatProductSetFromOptions_MPIDense_MPIAIJ)); 1686 #if defined(PETSC_HAVE_CUDA) 1687 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense)); 1688 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijcusparse_C", MatProductSetFromOptions_MPIDense_MPIAIJ)); 1689 #endif 1690 #if defined(PETSC_HAVE_HIP) 1691 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensehip_C", MatConvert_MPIDense_MPIDenseHIP)); 1692 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense)); 1693 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijhipsparse_C", MatProductSetFromOptions_MPIDense_MPIAIJ)); 1694 #endif 1695 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumn_C", MatDenseGetColumn_MPIDense)); 1696 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumn_C", MatDenseRestoreColumn_MPIDense)); 1697 PetscCall(PetscObjectChangeTypeName((PetscObject)mat, MATMPIDENSE)); 1698 PetscFunctionReturn(PETSC_SUCCESS); 1699 } 1700 1701 /*MC 1702 MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices. 1703 1704 This matrix type is identical to `MATSEQDENSE` when constructed with a single process communicator, 1705 and `MATMPIDENSE` otherwise. 1706 1707 Options Database Key: 1708 . -mat_type dense - sets the matrix type to `MATDENSE` during a call to `MatSetFromOptions()` 1709 1710 Level: beginner 1711 1712 .seealso: [](ch_matrices), `Mat`, `MATSEQDENSE`, `MATMPIDENSE`, `MATDENSECUDA`, `MATDENSEHIP` 1713 M*/ 1714 1715 /*@C 1716 MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries 1717 1718 Collective 1719 1720 Input Parameters: 1721 + B - the matrix 1722 - data - optional location of matrix data. Set to `NULL` for PETSc 1723 to control all matrix memory allocation. 1724 1725 Level: intermediate 1726 1727 Notes: 1728 The dense format is fully compatible with standard Fortran 1729 storage by columns. 1730 1731 The data input variable is intended primarily for Fortran programmers 1732 who wish to allocate their own matrix memory space. Most users should 1733 set `data` to `NULL`. 1734 1735 .seealso: [](ch_matrices), `Mat`, `MATMPIDENSE`, `MatCreate()`, `MatCreateSeqDense()`, `MatSetValues()` 1736 @*/ 1737 PetscErrorCode MatMPIDenseSetPreallocation(Mat B, PetscScalar *data) 1738 { 1739 PetscFunctionBegin; 1740 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 1741 PetscTryMethod(B, "MatMPIDenseSetPreallocation_C", (Mat, PetscScalar *), (B, data)); 1742 PetscFunctionReturn(PETSC_SUCCESS); 1743 } 1744 1745 /*@ 1746 MatDensePlaceArray - Allows one to replace the array in a `MATDENSE` matrix with an 1747 array provided by the user. This is useful to avoid copying an array 1748 into a matrix 1749 1750 Not Collective 1751 1752 Input Parameters: 1753 + mat - the matrix 1754 - array - the array in column major order 1755 1756 Level: developer 1757 1758 Note: 1759 You can return to the original array with a call to `MatDenseResetArray()`. The user is responsible for freeing this array; it will not be 1760 freed when the matrix is destroyed. 1761 1762 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseGetArray()`, `MatDenseResetArray()`, `VecPlaceArray()`, `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`, `VecResetArray()`, 1763 `MatDenseReplaceArray()` 1764 @*/ 1765 PetscErrorCode MatDensePlaceArray(Mat mat, const PetscScalar *array) 1766 { 1767 PetscFunctionBegin; 1768 PetscValidHeaderSpecific(mat, MAT_CLASSID, 1); 1769 PetscUseMethod(mat, "MatDensePlaceArray_C", (Mat, const PetscScalar *), (mat, array)); 1770 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 1771 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 1772 mat->offloadmask = PETSC_OFFLOAD_CPU; 1773 #endif 1774 PetscFunctionReturn(PETSC_SUCCESS); 1775 } 1776 1777 /*@ 1778 MatDenseResetArray - Resets the matrix array to that it previously had before the call to `MatDensePlaceArray()` 1779 1780 Not Collective 1781 1782 Input Parameter: 1783 . mat - the matrix 1784 1785 Level: developer 1786 1787 Note: 1788 You can only call this after a call to `MatDensePlaceArray()` 1789 1790 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseGetArray()`, `MatDensePlaceArray()`, `VecPlaceArray()`, `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`, `VecResetArray()` 1791 @*/ 1792 PetscErrorCode MatDenseResetArray(Mat mat) 1793 { 1794 PetscFunctionBegin; 1795 PetscValidHeaderSpecific(mat, MAT_CLASSID, 1); 1796 PetscUseMethod(mat, "MatDenseResetArray_C", (Mat), (mat)); 1797 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 1798 PetscFunctionReturn(PETSC_SUCCESS); 1799 } 1800 1801 /*@ 1802 MatDenseReplaceArray - Allows one to replace the array in a dense matrix with an 1803 array provided by the user. This is useful to avoid copying an array 1804 into a matrix 1805 1806 Not Collective 1807 1808 Input Parameters: 1809 + mat - the matrix 1810 - array - the array in column major order 1811 1812 Level: developer 1813 1814 Note: 1815 The memory passed in MUST be obtained with `PetscMalloc()` and CANNOT be 1816 freed by the user. It will be freed when the matrix is destroyed. 1817 1818 .seealso: [](ch_matrices), `Mat`, `MatDensePlaceArray()`, `MatDenseGetArray()`, `VecReplaceArray()` 1819 @*/ 1820 PetscErrorCode MatDenseReplaceArray(Mat mat, const PetscScalar *array) 1821 { 1822 PetscFunctionBegin; 1823 PetscValidHeaderSpecific(mat, MAT_CLASSID, 1); 1824 PetscUseMethod(mat, "MatDenseReplaceArray_C", (Mat, const PetscScalar *), (mat, array)); 1825 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 1826 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 1827 mat->offloadmask = PETSC_OFFLOAD_CPU; 1828 #endif 1829 PetscFunctionReturn(PETSC_SUCCESS); 1830 } 1831 1832 /*@C 1833 MatCreateDense - Creates a matrix in `MATDENSE` format. 1834 1835 Collective 1836 1837 Input Parameters: 1838 + comm - MPI communicator 1839 . m - number of local rows (or `PETSC_DECIDE` to have calculated if `M` is given) 1840 . n - number of local columns (or `PETSC_DECIDE` to have calculated if `N` is given) 1841 . M - number of global rows (or `PETSC_DECIDE` to have calculated if `m` is given) 1842 . N - number of global columns (or `PETSC_DECIDE` to have calculated if `n` is given) 1843 - data - optional location of matrix data. Set data to `NULL` (`PETSC_NULL_SCALAR` for Fortran users) for PETSc 1844 to control all matrix memory allocation. 1845 1846 Output Parameter: 1847 . A - the matrix 1848 1849 Level: intermediate 1850 1851 Notes: 1852 The dense format is fully compatible with standard Fortran 1853 storage by columns. 1854 1855 Although local portions of the matrix are stored in column-major 1856 order, the matrix is partitioned across MPI ranks by row. 1857 1858 The data input variable is intended primarily for Fortran programmers 1859 who wish to allocate their own matrix memory space. Most users should 1860 set `data` to `NULL` (`PETSC_NULL_SCALAR` for Fortran users). 1861 1862 The user MUST specify either the local or global matrix dimensions 1863 (possibly both). 1864 1865 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatCreate()`, `MatCreateSeqDense()`, `MatSetValues()` 1866 @*/ 1867 PetscErrorCode MatCreateDense(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscScalar *data, Mat *A) 1868 { 1869 PetscFunctionBegin; 1870 PetscCall(MatCreate(comm, A)); 1871 PetscCall(MatSetSizes(*A, m, n, M, N)); 1872 PetscCall(MatSetType(*A, MATDENSE)); 1873 PetscCall(MatSeqDenseSetPreallocation(*A, data)); 1874 PetscCall(MatMPIDenseSetPreallocation(*A, data)); 1875 PetscFunctionReturn(PETSC_SUCCESS); 1876 } 1877 1878 static PetscErrorCode MatDuplicate_MPIDense(Mat A, MatDuplicateOption cpvalues, Mat *newmat) 1879 { 1880 Mat mat; 1881 Mat_MPIDense *a, *oldmat = (Mat_MPIDense *)A->data; 1882 1883 PetscFunctionBegin; 1884 *newmat = NULL; 1885 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat)); 1886 PetscCall(MatSetSizes(mat, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 1887 PetscCall(MatSetType(mat, ((PetscObject)A)->type_name)); 1888 a = (Mat_MPIDense *)mat->data; 1889 1890 mat->factortype = A->factortype; 1891 mat->assembled = PETSC_TRUE; 1892 mat->preallocated = PETSC_TRUE; 1893 1894 mat->insertmode = NOT_SET_VALUES; 1895 a->donotstash = oldmat->donotstash; 1896 1897 PetscCall(PetscLayoutReference(A->rmap, &mat->rmap)); 1898 PetscCall(PetscLayoutReference(A->cmap, &mat->cmap)); 1899 1900 PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A)); 1901 1902 *newmat = mat; 1903 PetscFunctionReturn(PETSC_SUCCESS); 1904 } 1905 1906 static PetscErrorCode MatLoad_MPIDense(Mat newMat, PetscViewer viewer) 1907 { 1908 PetscBool isbinary; 1909 #if defined(PETSC_HAVE_HDF5) 1910 PetscBool ishdf5; 1911 #endif 1912 1913 PetscFunctionBegin; 1914 PetscValidHeaderSpecific(newMat, MAT_CLASSID, 1); 1915 PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2); 1916 /* force binary viewer to load .info file if it has not yet done so */ 1917 PetscCall(PetscViewerSetUp(viewer)); 1918 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 1919 #if defined(PETSC_HAVE_HDF5) 1920 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5)); 1921 #endif 1922 if (isbinary) { 1923 PetscCall(MatLoad_Dense_Binary(newMat, viewer)); 1924 #if defined(PETSC_HAVE_HDF5) 1925 } else if (ishdf5) { 1926 PetscCall(MatLoad_Dense_HDF5(newMat, viewer)); 1927 #endif 1928 } else SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name); 1929 PetscFunctionReturn(PETSC_SUCCESS); 1930 } 1931 1932 static PetscErrorCode MatEqual_MPIDense(Mat A, Mat B, PetscBool *flag) 1933 { 1934 Mat_MPIDense *matB = (Mat_MPIDense *)B->data, *matA = (Mat_MPIDense *)A->data; 1935 Mat a, b; 1936 1937 PetscFunctionBegin; 1938 a = matA->A; 1939 b = matB->A; 1940 PetscCall(MatEqual(a, b, flag)); 1941 PetscCall(MPIU_Allreduce(MPI_IN_PLACE, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A))); 1942 PetscFunctionReturn(PETSC_SUCCESS); 1943 } 1944 1945 static PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(void *data) 1946 { 1947 Mat_TransMatMultDense *atb = (Mat_TransMatMultDense *)data; 1948 1949 PetscFunctionBegin; 1950 PetscCall(PetscFree2(atb->sendbuf, atb->recvcounts)); 1951 PetscCall(MatDestroy(&atb->atb)); 1952 PetscCall(PetscFree(atb)); 1953 PetscFunctionReturn(PETSC_SUCCESS); 1954 } 1955 1956 static PetscErrorCode MatDestroy_MatMatTransMult_MPIDense_MPIDense(void *data) 1957 { 1958 Mat_MatTransMultDense *abt = (Mat_MatTransMultDense *)data; 1959 1960 PetscFunctionBegin; 1961 PetscCall(PetscFree2(abt->buf[0], abt->buf[1])); 1962 PetscCall(PetscFree2(abt->recvcounts, abt->recvdispls)); 1963 PetscCall(PetscFree(abt)); 1964 PetscFunctionReturn(PETSC_SUCCESS); 1965 } 1966 1967 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C) 1968 { 1969 Mat_MPIDense *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data; 1970 Mat_TransMatMultDense *atb; 1971 MPI_Comm comm; 1972 PetscMPIInt size, *recvcounts; 1973 PetscScalar *carray, *sendbuf; 1974 const PetscScalar *atbarray; 1975 PetscInt i, cN = C->cmap->N, proc, k, j, lda; 1976 const PetscInt *ranges; 1977 1978 PetscFunctionBegin; 1979 MatCheckProduct(C, 3); 1980 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 1981 atb = (Mat_TransMatMultDense *)C->product->data; 1982 recvcounts = atb->recvcounts; 1983 sendbuf = atb->sendbuf; 1984 1985 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 1986 PetscCallMPI(MPI_Comm_size(comm, &size)); 1987 1988 /* compute atbarray = aseq^T * bseq */ 1989 PetscCall(MatTransposeMatMult(a->A, b->A, atb->atb ? MAT_REUSE_MATRIX : MAT_INITIAL_MATRIX, PETSC_DEFAULT, &atb->atb)); 1990 1991 PetscCall(MatGetOwnershipRanges(C, &ranges)); 1992 1993 /* arrange atbarray into sendbuf */ 1994 PetscCall(MatDenseGetArrayRead(atb->atb, &atbarray)); 1995 PetscCall(MatDenseGetLDA(atb->atb, &lda)); 1996 for (proc = 0, k = 0; proc < size; proc++) { 1997 for (j = 0; j < cN; j++) { 1998 for (i = ranges[proc]; i < ranges[proc + 1]; i++) sendbuf[k++] = atbarray[i + j * lda]; 1999 } 2000 } 2001 PetscCall(MatDenseRestoreArrayRead(atb->atb, &atbarray)); 2002 2003 /* sum all atbarray to local values of C */ 2004 PetscCall(MatDenseGetArrayWrite(c->A, &carray)); 2005 PetscCallMPI(MPI_Reduce_scatter(sendbuf, carray, recvcounts, MPIU_SCALAR, MPIU_SUM, comm)); 2006 PetscCall(MatDenseRestoreArrayWrite(c->A, &carray)); 2007 PetscCall(MatSetOption(C, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 2008 PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY)); 2009 PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY)); 2010 PetscFunctionReturn(PETSC_SUCCESS); 2011 } 2012 2013 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C) 2014 { 2015 MPI_Comm comm; 2016 PetscMPIInt size; 2017 PetscInt cm = A->cmap->n, cM, cN = B->cmap->N; 2018 Mat_TransMatMultDense *atb; 2019 PetscBool cisdense = PETSC_FALSE; 2020 PetscInt i; 2021 const PetscInt *ranges; 2022 2023 PetscFunctionBegin; 2024 MatCheckProduct(C, 4); 2025 PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty"); 2026 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 2027 if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) { 2028 SETERRQ(comm, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend); 2029 } 2030 2031 /* create matrix product C */ 2032 PetscCall(MatSetSizes(C, cm, B->cmap->n, A->cmap->N, B->cmap->N)); 2033 #if defined(PETSC_HAVE_CUDA) 2034 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSECUDA, "")); 2035 #endif 2036 #if defined(PETSC_HAVE_HIP) 2037 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSEHIP, "")); 2038 #endif 2039 if (!cisdense) PetscCall(MatSetType(C, ((PetscObject)A)->type_name)); 2040 PetscCall(MatSetUp(C)); 2041 2042 /* create data structure for reuse C */ 2043 PetscCallMPI(MPI_Comm_size(comm, &size)); 2044 PetscCall(PetscNew(&atb)); 2045 cM = C->rmap->N; 2046 PetscCall(PetscMalloc2(cM * cN, &atb->sendbuf, size, &atb->recvcounts)); 2047 PetscCall(MatGetOwnershipRanges(C, &ranges)); 2048 for (i = 0; i < size; i++) atb->recvcounts[i] = (ranges[i + 1] - ranges[i]) * cN; 2049 2050 C->product->data = atb; 2051 C->product->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense; 2052 PetscFunctionReturn(PETSC_SUCCESS); 2053 } 2054 2055 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C) 2056 { 2057 MPI_Comm comm; 2058 PetscMPIInt i, size; 2059 PetscInt maxRows, bufsiz; 2060 PetscMPIInt tag; 2061 PetscInt alg; 2062 Mat_MatTransMultDense *abt; 2063 Mat_Product *product = C->product; 2064 PetscBool flg; 2065 2066 PetscFunctionBegin; 2067 MatCheckProduct(C, 4); 2068 PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty"); 2069 /* check local size of A and B */ 2070 PetscCheck(A->cmap->n == B->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local column dimensions are incompatible, A (%" PetscInt_FMT ") != B (%" PetscInt_FMT ")", A->cmap->n, B->cmap->n); 2071 2072 PetscCall(PetscStrcmp(product->alg, "allgatherv", &flg)); 2073 alg = flg ? 0 : 1; 2074 2075 /* setup matrix product C */ 2076 PetscCall(MatSetSizes(C, A->rmap->n, B->rmap->n, A->rmap->N, B->rmap->N)); 2077 PetscCall(MatSetType(C, MATMPIDENSE)); 2078 PetscCall(MatSetUp(C)); 2079 PetscCall(PetscObjectGetNewTag((PetscObject)C, &tag)); 2080 2081 /* create data structure for reuse C */ 2082 PetscCall(PetscObjectGetComm((PetscObject)C, &comm)); 2083 PetscCallMPI(MPI_Comm_size(comm, &size)); 2084 PetscCall(PetscNew(&abt)); 2085 abt->tag = tag; 2086 abt->alg = alg; 2087 switch (alg) { 2088 case 1: /* alg: "cyclic" */ 2089 for (maxRows = 0, i = 0; i < size; i++) maxRows = PetscMax(maxRows, (B->rmap->range[i + 1] - B->rmap->range[i])); 2090 bufsiz = A->cmap->N * maxRows; 2091 PetscCall(PetscMalloc2(bufsiz, &(abt->buf[0]), bufsiz, &(abt->buf[1]))); 2092 break; 2093 default: /* alg: "allgatherv" */ 2094 PetscCall(PetscMalloc2(B->rmap->n * B->cmap->N, &(abt->buf[0]), B->rmap->N * B->cmap->N, &(abt->buf[1]))); 2095 PetscCall(PetscMalloc2(size, &(abt->recvcounts), size + 1, &(abt->recvdispls))); 2096 for (i = 0; i <= size; i++) abt->recvdispls[i] = B->rmap->range[i] * A->cmap->N; 2097 for (i = 0; i < size; i++) abt->recvcounts[i] = abt->recvdispls[i + 1] - abt->recvdispls[i]; 2098 break; 2099 } 2100 2101 C->product->data = abt; 2102 C->product->destroy = MatDestroy_MatMatTransMult_MPIDense_MPIDense; 2103 C->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_MPIDense_MPIDense; 2104 PetscFunctionReturn(PETSC_SUCCESS); 2105 } 2106 2107 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(Mat A, Mat B, Mat C) 2108 { 2109 Mat_MPIDense *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data; 2110 Mat_MatTransMultDense *abt; 2111 MPI_Comm comm; 2112 PetscMPIInt rank, size, sendsiz, recvsiz, sendto, recvfrom, recvisfrom; 2113 PetscScalar *sendbuf, *recvbuf = NULL, *cv; 2114 PetscInt i, cK = A->cmap->N, k, j, bn; 2115 PetscScalar _DOne = 1.0, _DZero = 0.0; 2116 const PetscScalar *av, *bv; 2117 PetscBLASInt cm, cn, ck, alda, blda = 0, clda; 2118 MPI_Request reqs[2]; 2119 const PetscInt *ranges; 2120 2121 PetscFunctionBegin; 2122 MatCheckProduct(C, 3); 2123 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 2124 abt = (Mat_MatTransMultDense *)C->product->data; 2125 PetscCall(PetscObjectGetComm((PetscObject)C, &comm)); 2126 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 2127 PetscCallMPI(MPI_Comm_size(comm, &size)); 2128 PetscCall(MatDenseGetArrayRead(a->A, &av)); 2129 PetscCall(MatDenseGetArrayRead(b->A, &bv)); 2130 PetscCall(MatDenseGetArrayWrite(c->A, &cv)); 2131 PetscCall(MatDenseGetLDA(a->A, &i)); 2132 PetscCall(PetscBLASIntCast(i, &alda)); 2133 PetscCall(MatDenseGetLDA(b->A, &i)); 2134 PetscCall(PetscBLASIntCast(i, &blda)); 2135 PetscCall(MatDenseGetLDA(c->A, &i)); 2136 PetscCall(PetscBLASIntCast(i, &clda)); 2137 PetscCall(MatGetOwnershipRanges(B, &ranges)); 2138 bn = B->rmap->n; 2139 if (blda == bn) { 2140 sendbuf = (PetscScalar *)bv; 2141 } else { 2142 sendbuf = abt->buf[0]; 2143 for (k = 0, i = 0; i < cK; i++) { 2144 for (j = 0; j < bn; j++, k++) sendbuf[k] = bv[i * blda + j]; 2145 } 2146 } 2147 if (size > 1) { 2148 sendto = (rank + size - 1) % size; 2149 recvfrom = (rank + size + 1) % size; 2150 } else { 2151 sendto = recvfrom = 0; 2152 } 2153 PetscCall(PetscBLASIntCast(cK, &ck)); 2154 PetscCall(PetscBLASIntCast(c->A->rmap->n, &cm)); 2155 recvisfrom = rank; 2156 for (i = 0; i < size; i++) { 2157 /* we have finished receiving in sending, bufs can be read/modified */ 2158 PetscInt nextrecvisfrom = (recvisfrom + 1) % size; /* which process the next recvbuf will originate on */ 2159 PetscInt nextbn = ranges[nextrecvisfrom + 1] - ranges[nextrecvisfrom]; 2160 2161 if (nextrecvisfrom != rank) { 2162 /* start the cyclic sends from sendbuf, to recvbuf (which will switch to sendbuf) */ 2163 sendsiz = cK * bn; 2164 recvsiz = cK * nextbn; 2165 recvbuf = (i & 1) ? abt->buf[0] : abt->buf[1]; 2166 PetscCallMPI(MPI_Isend(sendbuf, sendsiz, MPIU_SCALAR, sendto, abt->tag, comm, &reqs[0])); 2167 PetscCallMPI(MPI_Irecv(recvbuf, recvsiz, MPIU_SCALAR, recvfrom, abt->tag, comm, &reqs[1])); 2168 } 2169 2170 /* local aseq * sendbuf^T */ 2171 PetscCall(PetscBLASIntCast(ranges[recvisfrom + 1] - ranges[recvisfrom], &cn)); 2172 if (cm && cn && ck) PetscCallBLAS("BLASgemm", BLASgemm_("N", "T", &cm, &cn, &ck, &_DOne, av, &alda, sendbuf, &cn, &_DZero, cv + clda * ranges[recvisfrom], &clda)); 2173 2174 if (nextrecvisfrom != rank) { 2175 /* wait for the sends and receives to complete, swap sendbuf and recvbuf */ 2176 PetscCallMPI(MPI_Waitall(2, reqs, MPI_STATUSES_IGNORE)); 2177 } 2178 bn = nextbn; 2179 recvisfrom = nextrecvisfrom; 2180 sendbuf = recvbuf; 2181 } 2182 PetscCall(MatDenseRestoreArrayRead(a->A, &av)); 2183 PetscCall(MatDenseRestoreArrayRead(b->A, &bv)); 2184 PetscCall(MatDenseRestoreArrayWrite(c->A, &cv)); 2185 PetscCall(MatSetOption(C, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 2186 PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY)); 2187 PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY)); 2188 PetscFunctionReturn(PETSC_SUCCESS); 2189 } 2190 2191 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(Mat A, Mat B, Mat C) 2192 { 2193 Mat_MPIDense *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data; 2194 Mat_MatTransMultDense *abt; 2195 MPI_Comm comm; 2196 PetscMPIInt size; 2197 PetscScalar *cv, *sendbuf, *recvbuf; 2198 const PetscScalar *av, *bv; 2199 PetscInt blda, i, cK = A->cmap->N, k, j, bn; 2200 PetscScalar _DOne = 1.0, _DZero = 0.0; 2201 PetscBLASInt cm, cn, ck, alda, clda; 2202 2203 PetscFunctionBegin; 2204 MatCheckProduct(C, 3); 2205 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 2206 abt = (Mat_MatTransMultDense *)C->product->data; 2207 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 2208 PetscCallMPI(MPI_Comm_size(comm, &size)); 2209 PetscCall(MatDenseGetArrayRead(a->A, &av)); 2210 PetscCall(MatDenseGetArrayRead(b->A, &bv)); 2211 PetscCall(MatDenseGetArrayWrite(c->A, &cv)); 2212 PetscCall(MatDenseGetLDA(a->A, &i)); 2213 PetscCall(PetscBLASIntCast(i, &alda)); 2214 PetscCall(MatDenseGetLDA(b->A, &blda)); 2215 PetscCall(MatDenseGetLDA(c->A, &i)); 2216 PetscCall(PetscBLASIntCast(i, &clda)); 2217 /* copy transpose of B into buf[0] */ 2218 bn = B->rmap->n; 2219 sendbuf = abt->buf[0]; 2220 recvbuf = abt->buf[1]; 2221 for (k = 0, j = 0; j < bn; j++) { 2222 for (i = 0; i < cK; i++, k++) sendbuf[k] = bv[i * blda + j]; 2223 } 2224 PetscCall(MatDenseRestoreArrayRead(b->A, &bv)); 2225 PetscCallMPI(MPI_Allgatherv(sendbuf, bn * cK, MPIU_SCALAR, recvbuf, abt->recvcounts, abt->recvdispls, MPIU_SCALAR, comm)); 2226 PetscCall(PetscBLASIntCast(cK, &ck)); 2227 PetscCall(PetscBLASIntCast(c->A->rmap->n, &cm)); 2228 PetscCall(PetscBLASIntCast(c->A->cmap->n, &cn)); 2229 if (cm && cn && ck) PetscCallBLAS("BLASgemm", BLASgemm_("N", "N", &cm, &cn, &ck, &_DOne, av, &alda, recvbuf, &ck, &_DZero, cv, &clda)); 2230 PetscCall(MatDenseRestoreArrayRead(a->A, &av)); 2231 PetscCall(MatDenseRestoreArrayRead(b->A, &bv)); 2232 PetscCall(MatDenseRestoreArrayWrite(c->A, &cv)); 2233 PetscCall(MatSetOption(C, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 2234 PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY)); 2235 PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY)); 2236 PetscFunctionReturn(PETSC_SUCCESS); 2237 } 2238 2239 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C) 2240 { 2241 Mat_MatTransMultDense *abt; 2242 2243 PetscFunctionBegin; 2244 MatCheckProduct(C, 3); 2245 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 2246 abt = (Mat_MatTransMultDense *)C->product->data; 2247 switch (abt->alg) { 2248 case 1: 2249 PetscCall(MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(A, B, C)); 2250 break; 2251 default: 2252 PetscCall(MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(A, B, C)); 2253 break; 2254 } 2255 PetscFunctionReturn(PETSC_SUCCESS); 2256 } 2257 2258 static PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(void *data) 2259 { 2260 Mat_MatMultDense *ab = (Mat_MatMultDense *)data; 2261 2262 PetscFunctionBegin; 2263 PetscCall(MatDestroy(&ab->Ce)); 2264 PetscCall(MatDestroy(&ab->Ae)); 2265 PetscCall(MatDestroy(&ab->Be)); 2266 PetscCall(PetscFree(ab)); 2267 PetscFunctionReturn(PETSC_SUCCESS); 2268 } 2269 2270 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C) 2271 { 2272 Mat_MatMultDense *ab; 2273 Mat_MPIDense *mdn = (Mat_MPIDense *)A->data; 2274 2275 PetscFunctionBegin; 2276 MatCheckProduct(C, 3); 2277 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Missing product data"); 2278 ab = (Mat_MatMultDense *)C->product->data; 2279 if (ab->Ae && ab->Ce) { 2280 #if PetscDefined(HAVE_ELEMENTAL) 2281 PetscCall(MatConvert_MPIDense_Elemental(A, MATELEMENTAL, MAT_REUSE_MATRIX, &ab->Ae)); 2282 PetscCall(MatConvert_MPIDense_Elemental(B, MATELEMENTAL, MAT_REUSE_MATRIX, &ab->Be)); 2283 PetscCall(MatMatMultNumeric_Elemental(ab->Ae, ab->Be, ab->Ce)); 2284 PetscCall(MatConvert(ab->Ce, MATMPIDENSE, MAT_REUSE_MATRIX, &C)); 2285 #else 2286 SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "PETSC_HAVE_ELEMENTAL not defined"); 2287 #endif 2288 } else { 2289 const PetscScalar *read; 2290 PetscScalar *write; 2291 PetscInt lda; 2292 2293 PetscCall(MatDenseGetLDA(B, &lda)); 2294 PetscCall(MatDenseGetArrayRead(B, &read)); 2295 PetscCall(MatDenseGetArrayWrite(ab->Be, &write)); 2296 if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A)); /* cannot be done during the symbolic phase because of possible calls to MatProductReplaceMats() */ 2297 for (PetscInt i = 0; i < C->cmap->N; ++i) { 2298 PetscCall(PetscSFBcastBegin(mdn->Mvctx, MPIU_SCALAR, read + i * lda, write + i * ab->Be->rmap->n, MPI_REPLACE)); 2299 PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, read + i * lda, write + i * ab->Be->rmap->n, MPI_REPLACE)); 2300 } 2301 PetscCall(MatDenseRestoreArrayWrite(ab->Be, &write)); 2302 PetscCall(MatDenseRestoreArrayRead(B, &read)); 2303 PetscCall(MatMatMultNumeric_SeqDense_SeqDense(((Mat_MPIDense *)(A->data))->A, ab->Be, ((Mat_MPIDense *)(C->data))->A)); 2304 } 2305 PetscFunctionReturn(PETSC_SUCCESS); 2306 } 2307 2308 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C) 2309 { 2310 Mat_Product *product = C->product; 2311 PetscInt alg; 2312 Mat_MatMultDense *ab; 2313 PetscBool flg; 2314 2315 PetscFunctionBegin; 2316 MatCheckProduct(C, 4); 2317 PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty"); 2318 /* check local size of A and B */ 2319 PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ", %" PetscInt_FMT ")", 2320 A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend); 2321 2322 PetscCall(PetscStrcmp(product->alg, "petsc", &flg)); 2323 alg = flg ? 0 : 1; 2324 2325 /* setup C */ 2326 PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, A->rmap->N, B->cmap->N)); 2327 PetscCall(MatSetType(C, MATMPIDENSE)); 2328 PetscCall(MatSetUp(C)); 2329 2330 /* create data structure for reuse Cdense */ 2331 PetscCall(PetscNew(&ab)); 2332 2333 switch (alg) { 2334 case 1: /* alg: "elemental" */ 2335 #if PetscDefined(HAVE_ELEMENTAL) 2336 /* create elemental matrices Ae and Be */ 2337 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &ab->Ae)); 2338 PetscCall(MatSetSizes(ab->Ae, PETSC_DECIDE, PETSC_DECIDE, A->rmap->N, A->cmap->N)); 2339 PetscCall(MatSetType(ab->Ae, MATELEMENTAL)); 2340 PetscCall(MatSetUp(ab->Ae)); 2341 PetscCall(MatSetOption(ab->Ae, MAT_ROW_ORIENTED, PETSC_FALSE)); 2342 2343 PetscCall(MatCreate(PetscObjectComm((PetscObject)B), &ab->Be)); 2344 PetscCall(MatSetSizes(ab->Be, PETSC_DECIDE, PETSC_DECIDE, B->rmap->N, B->cmap->N)); 2345 PetscCall(MatSetType(ab->Be, MATELEMENTAL)); 2346 PetscCall(MatSetUp(ab->Be)); 2347 PetscCall(MatSetOption(ab->Be, MAT_ROW_ORIENTED, PETSC_FALSE)); 2348 2349 /* compute symbolic Ce = Ae*Be */ 2350 PetscCall(MatCreate(PetscObjectComm((PetscObject)C), &ab->Ce)); 2351 PetscCall(MatMatMultSymbolic_Elemental(ab->Ae, ab->Be, fill, ab->Ce)); 2352 #else 2353 SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "PETSC_HAVE_ELEMENTAL not defined"); 2354 #endif 2355 break; 2356 default: /* alg: "petsc" */ 2357 ab->Ae = NULL; 2358 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, A->cmap->N, B->cmap->N, NULL, &ab->Be)); 2359 ab->Ce = NULL; 2360 break; 2361 } 2362 2363 C->product->data = ab; 2364 C->product->destroy = MatDestroy_MatMatMult_MPIDense_MPIDense; 2365 C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense; 2366 PetscFunctionReturn(PETSC_SUCCESS); 2367 } 2368 2369 static PetscErrorCode MatProductSetFromOptions_MPIDense_AB(Mat C) 2370 { 2371 Mat_Product *product = C->product; 2372 const char *algTypes[2] = {"petsc", "elemental"}; 2373 PetscInt alg, nalg = PetscDefined(HAVE_ELEMENTAL) ? 2 : 1; 2374 PetscBool flg = PETSC_FALSE; 2375 PetscFunctionBegin; 2376 2377 /* Set default algorithm */ 2378 alg = 0; /* default is petsc */ 2379 PetscCall(PetscStrcmp(product->alg, "default", &flg)); 2380 if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg])); 2381 2382 /* Get runtime option */ 2383 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_AB", "Mat"); 2384 PetscCall(PetscOptionsEList("-mat_product_algorithm", "Algorithmic approach", "MatProduct_AB", algTypes, nalg, algTypes[alg], &alg, &flg)); 2385 PetscOptionsEnd(); 2386 if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg])); 2387 2388 C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIDense; 2389 C->ops->productsymbolic = MatProductSymbolic_AB; 2390 PetscFunctionReturn(PETSC_SUCCESS); 2391 } 2392 2393 static PetscErrorCode MatProductSetFromOptions_MPIDense_AtB(Mat C) 2394 { 2395 Mat_Product *product = C->product; 2396 Mat A = product->A, B = product->B; 2397 2398 PetscFunctionBegin; 2399 PetscCheck(A->rmap->rstart == B->rmap->rstart && A->rmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", 2400 A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend); 2401 C->ops->transposematmultsymbolic = MatTransposeMatMultSymbolic_MPIDense_MPIDense; 2402 C->ops->productsymbolic = MatProductSymbolic_AtB; 2403 PetscFunctionReturn(PETSC_SUCCESS); 2404 } 2405 2406 static PetscErrorCode MatProductSetFromOptions_MPIDense_ABt(Mat C) 2407 { 2408 Mat_Product *product = C->product; 2409 const char *algTypes[2] = {"allgatherv", "cyclic"}; 2410 PetscInt alg, nalg = 2; 2411 PetscBool flg = PETSC_FALSE; 2412 2413 PetscFunctionBegin; 2414 /* Set default algorithm */ 2415 alg = 0; /* default is allgatherv */ 2416 PetscCall(PetscStrcmp(product->alg, "default", &flg)); 2417 if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg])); 2418 2419 /* Get runtime option */ 2420 if (product->api_user) { 2421 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatMatTransposeMult", "Mat"); 2422 PetscCall(PetscOptionsEList("-matmattransmult_mpidense_mpidense_via", "Algorithmic approach", "MatMatTransposeMult", algTypes, nalg, algTypes[alg], &alg, &flg)); 2423 PetscOptionsEnd(); 2424 } else { 2425 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_ABt", "Mat"); 2426 PetscCall(PetscOptionsEList("-mat_product_algorithm", "Algorithmic approach", "MatProduct_ABt", algTypes, nalg, algTypes[alg], &alg, &flg)); 2427 PetscOptionsEnd(); 2428 } 2429 if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg])); 2430 2431 C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_MPIDense_MPIDense; 2432 C->ops->productsymbolic = MatProductSymbolic_ABt; 2433 PetscFunctionReturn(PETSC_SUCCESS); 2434 } 2435 2436 static PetscErrorCode MatProductSetFromOptions_MPIDense(Mat C) 2437 { 2438 Mat_Product *product = C->product; 2439 2440 PetscFunctionBegin; 2441 switch (product->type) { 2442 case MATPRODUCT_AB: 2443 PetscCall(MatProductSetFromOptions_MPIDense_AB(C)); 2444 break; 2445 case MATPRODUCT_AtB: 2446 PetscCall(MatProductSetFromOptions_MPIDense_AtB(C)); 2447 break; 2448 case MATPRODUCT_ABt: 2449 PetscCall(MatProductSetFromOptions_MPIDense_ABt(C)); 2450 break; 2451 default: 2452 break; 2453 } 2454 PetscFunctionReturn(PETSC_SUCCESS); 2455 } 2456