1 2 /* 3 Basic functions for basic parallel dense matrices. 4 Portions of this code are under: 5 Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. 6 */ 7 8 #include <../src/mat/impls/dense/mpi/mpidense.h> /*I "petscmat.h" I*/ 9 #include <../src/mat/impls/aij/mpi/mpiaij.h> 10 #include <petscblaslapack.h> 11 12 /*@ 13 MatDenseGetLocalMatrix - For a `MATMPIDENSE` or `MATSEQDENSE` matrix returns the sequential 14 matrix that represents the operator. For sequential matrices it returns itself. 15 16 Input Parameter: 17 . A - the sequential or MPI `MATDENSE` matrix 18 19 Output Parameter: 20 . B - the inner matrix 21 22 Level: intermediate 23 24 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MATMPIDENSE`, `MATSEQDENSE` 25 @*/ 26 PetscErrorCode MatDenseGetLocalMatrix(Mat A, Mat *B) 27 { 28 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 29 PetscBool flg; 30 31 PetscFunctionBegin; 32 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 33 PetscValidPointer(B, 2); 34 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIDENSE, &flg)); 35 if (flg) *B = mat->A; 36 else { 37 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQDENSE, &flg)); 38 PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Not for matrix type %s", ((PetscObject)A)->type_name); 39 *B = A; 40 } 41 PetscFunctionReturn(PETSC_SUCCESS); 42 } 43 44 PetscErrorCode MatCopy_MPIDense(Mat A, Mat B, MatStructure s) 45 { 46 Mat_MPIDense *Amat = (Mat_MPIDense *)A->data; 47 Mat_MPIDense *Bmat = (Mat_MPIDense *)B->data; 48 49 PetscFunctionBegin; 50 PetscCall(MatCopy(Amat->A, Bmat->A, s)); 51 PetscFunctionReturn(PETSC_SUCCESS); 52 } 53 54 PetscErrorCode MatShift_MPIDense(Mat A, PetscScalar alpha) 55 { 56 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 57 PetscInt j, lda, rstart = A->rmap->rstart, rend = A->rmap->rend, rend2; 58 PetscScalar *v; 59 60 PetscFunctionBegin; 61 PetscCall(MatDenseGetArray(mat->A, &v)); 62 PetscCall(MatDenseGetLDA(mat->A, &lda)); 63 rend2 = PetscMin(rend, A->cmap->N); 64 if (rend2 > rstart) { 65 for (j = rstart; j < rend2; j++) v[j - rstart + j * lda] += alpha; 66 PetscCall(PetscLogFlops(rend2 - rstart)); 67 } 68 PetscCall(MatDenseRestoreArray(mat->A, &v)); 69 PetscFunctionReturn(PETSC_SUCCESS); 70 } 71 72 PetscErrorCode MatGetRow_MPIDense(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 73 { 74 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 75 PetscInt lrow, rstart = A->rmap->rstart, rend = A->rmap->rend; 76 77 PetscFunctionBegin; 78 PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "only local rows"); 79 lrow = row - rstart; 80 PetscCall(MatGetRow(mat->A, lrow, nz, (const PetscInt **)idx, (const PetscScalar **)v)); 81 PetscFunctionReturn(PETSC_SUCCESS); 82 } 83 84 PetscErrorCode MatRestoreRow_MPIDense(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 85 { 86 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 87 PetscInt lrow, rstart = A->rmap->rstart, rend = A->rmap->rend; 88 89 PetscFunctionBegin; 90 PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "only local rows"); 91 lrow = row - rstart; 92 PetscCall(MatRestoreRow(mat->A, lrow, nz, (const PetscInt **)idx, (const PetscScalar **)v)); 93 PetscFunctionReturn(PETSC_SUCCESS); 94 } 95 96 PetscErrorCode MatGetDiagonalBlock_MPIDense(Mat A, Mat *a) 97 { 98 Mat_MPIDense *mdn = (Mat_MPIDense *)A->data; 99 PetscInt m = A->rmap->n, rstart = A->rmap->rstart; 100 PetscScalar *array; 101 MPI_Comm comm; 102 PetscBool flg; 103 Mat B; 104 105 PetscFunctionBegin; 106 PetscCall(MatHasCongruentLayouts(A, &flg)); 107 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only square matrices supported."); 108 PetscCall(PetscObjectQuery((PetscObject)A, "DiagonalBlock", (PetscObject *)&B)); 109 if (!B) { /* This should use MatDenseGetSubMatrix (not create), but we would need a call like MatRestoreDiagonalBlock */ 110 #if PetscDefined(HAVE_CUDA) 111 PetscCall(PetscObjectTypeCompare((PetscObject)mdn->A, MATSEQDENSECUDA, &flg)); 112 PetscCheck(!flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature", MATSEQDENSECUDA); 113 #elif PetscDefined(HAVE_HIP) 114 PetscCall(PetscObjectTypeCompare((PetscObject)mdn->A, MATSEQDENSEHIP, &flg)); 115 PetscCheck(!flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature", MATSEQDENSEHIP); 116 #endif 117 PetscCall(PetscObjectGetComm((PetscObject)(mdn->A), &comm)); 118 PetscCall(MatCreate(comm, &B)); 119 PetscCall(MatSetSizes(B, m, m, m, m)); 120 PetscCall(MatSetType(B, ((PetscObject)mdn->A)->type_name)); 121 PetscCall(MatDenseGetArrayRead(mdn->A, (const PetscScalar **)&array)); 122 PetscCall(MatSeqDenseSetPreallocation(B, array + m * rstart)); 123 PetscCall(MatDenseRestoreArrayRead(mdn->A, (const PetscScalar **)&array)); 124 PetscCall(PetscObjectCompose((PetscObject)A, "DiagonalBlock", (PetscObject)B)); 125 *a = B; 126 PetscCall(MatDestroy(&B)); 127 } else *a = B; 128 PetscFunctionReturn(PETSC_SUCCESS); 129 } 130 131 PetscErrorCode MatSetValues_MPIDense(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], const PetscScalar v[], InsertMode addv) 132 { 133 Mat_MPIDense *A = (Mat_MPIDense *)mat->data; 134 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend, row; 135 PetscBool roworiented = A->roworiented; 136 137 PetscFunctionBegin; 138 for (i = 0; i < m; i++) { 139 if (idxm[i] < 0) continue; 140 PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large"); 141 if (idxm[i] >= rstart && idxm[i] < rend) { 142 row = idxm[i] - rstart; 143 if (roworiented) { 144 PetscCall(MatSetValues(A->A, 1, &row, n, idxn, v + i * n, addv)); 145 } else { 146 for (j = 0; j < n; j++) { 147 if (idxn[j] < 0) continue; 148 PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large"); 149 PetscCall(MatSetValues(A->A, 1, &row, 1, &idxn[j], v + i + j * m, addv)); 150 } 151 } 152 } else if (!A->donotstash) { 153 mat->assembled = PETSC_FALSE; 154 if (roworiented) { 155 PetscCall(MatStashValuesRow_Private(&mat->stash, idxm[i], n, idxn, v + i * n, PETSC_FALSE)); 156 } else { 157 PetscCall(MatStashValuesCol_Private(&mat->stash, idxm[i], n, idxn, v + i, m, PETSC_FALSE)); 158 } 159 } 160 } 161 PetscFunctionReturn(PETSC_SUCCESS); 162 } 163 164 PetscErrorCode MatGetValues_MPIDense(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[]) 165 { 166 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 167 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend, row; 168 169 PetscFunctionBegin; 170 for (i = 0; i < m; i++) { 171 if (idxm[i] < 0) continue; /* negative row */ 172 PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large"); 173 if (idxm[i] >= rstart && idxm[i] < rend) { 174 row = idxm[i] - rstart; 175 for (j = 0; j < n; j++) { 176 if (idxn[j] < 0) continue; /* negative column */ 177 PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large"); 178 PetscCall(MatGetValues(mdn->A, 1, &row, 1, &idxn[j], v + i * n + j)); 179 } 180 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported"); 181 } 182 PetscFunctionReturn(PETSC_SUCCESS); 183 } 184 185 static PetscErrorCode MatDenseGetLDA_MPIDense(Mat A, PetscInt *lda) 186 { 187 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 188 189 PetscFunctionBegin; 190 PetscCall(MatDenseGetLDA(a->A, lda)); 191 PetscFunctionReturn(PETSC_SUCCESS); 192 } 193 194 static PetscErrorCode MatDenseSetLDA_MPIDense(Mat A, PetscInt lda) 195 { 196 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 197 MatType mtype = MATSEQDENSE; 198 199 PetscFunctionBegin; 200 if (!a->A) { 201 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 202 PetscCall(PetscLayoutSetUp(A->rmap)); 203 PetscCall(PetscLayoutSetUp(A->cmap)); 204 PetscCall(MatCreate(PETSC_COMM_SELF, &a->A)); 205 PetscCall(MatSetSizes(a->A, A->rmap->n, A->cmap->N, A->rmap->n, A->cmap->N)); 206 #if PetscDefined(HAVE_CUDA) 207 PetscBool iscuda; 208 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIDENSECUDA, &iscuda)); 209 if (iscuda) mtype = MATSEQDENSECUDA; 210 #elif PetscDefined(HAVE_HIP) 211 PetscBool iship; 212 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIDENSEHIP, &iship)); 213 if (iship) mtype = MATSEQDENSEHIP; 214 #endif 215 PetscCall(MatSetType(a->A, mtype)); 216 } 217 PetscCall(MatDenseSetLDA(a->A, lda)); 218 PetscFunctionReturn(PETSC_SUCCESS); 219 } 220 221 static PetscErrorCode MatDenseGetArray_MPIDense(Mat A, PetscScalar **array) 222 { 223 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 224 225 PetscFunctionBegin; 226 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 227 PetscCall(MatDenseGetArray(a->A, array)); 228 PetscFunctionReturn(PETSC_SUCCESS); 229 } 230 231 static PetscErrorCode MatDenseGetArrayRead_MPIDense(Mat A, const PetscScalar **array) 232 { 233 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 234 235 PetscFunctionBegin; 236 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 237 PetscCall(MatDenseGetArrayRead(a->A, array)); 238 PetscFunctionReturn(PETSC_SUCCESS); 239 } 240 241 static PetscErrorCode MatDenseGetArrayWrite_MPIDense(Mat A, PetscScalar **array) 242 { 243 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 244 245 PetscFunctionBegin; 246 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 247 PetscCall(MatDenseGetArrayWrite(a->A, array)); 248 PetscFunctionReturn(PETSC_SUCCESS); 249 } 250 251 static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A, const PetscScalar *array) 252 { 253 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 254 255 PetscFunctionBegin; 256 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 257 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 258 PetscCall(MatDensePlaceArray(a->A, array)); 259 PetscFunctionReturn(PETSC_SUCCESS); 260 } 261 262 static PetscErrorCode MatDenseResetArray_MPIDense(Mat A) 263 { 264 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 265 266 PetscFunctionBegin; 267 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 268 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 269 PetscCall(MatDenseResetArray(a->A)); 270 PetscFunctionReturn(PETSC_SUCCESS); 271 } 272 273 static PetscErrorCode MatDenseReplaceArray_MPIDense(Mat A, const PetscScalar *array) 274 { 275 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 276 277 PetscFunctionBegin; 278 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 279 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 280 PetscCall(MatDenseReplaceArray(a->A, array)); 281 PetscFunctionReturn(PETSC_SUCCESS); 282 } 283 284 static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A, IS isrow, IS iscol, MatReuse scall, Mat *B) 285 { 286 Mat_MPIDense *mat = (Mat_MPIDense *)A->data, *newmatd; 287 PetscInt lda, i, j, rstart, rend, nrows, ncols, Ncols, nlrows, nlcols; 288 const PetscInt *irow, *icol; 289 const PetscScalar *v; 290 PetscScalar *bv; 291 Mat newmat; 292 IS iscol_local; 293 MPI_Comm comm_is, comm_mat; 294 295 PetscFunctionBegin; 296 PetscCall(PetscObjectGetComm((PetscObject)A, &comm_mat)); 297 PetscCall(PetscObjectGetComm((PetscObject)iscol, &comm_is)); 298 PetscCheck(comm_mat == comm_is, PETSC_COMM_SELF, PETSC_ERR_ARG_NOTSAMECOMM, "IS communicator must match matrix communicator"); 299 300 PetscCall(ISAllGather(iscol, &iscol_local)); 301 PetscCall(ISGetIndices(isrow, &irow)); 302 PetscCall(ISGetIndices(iscol_local, &icol)); 303 PetscCall(ISGetLocalSize(isrow, &nrows)); 304 PetscCall(ISGetLocalSize(iscol, &ncols)); 305 PetscCall(ISGetSize(iscol, &Ncols)); /* global number of columns, size of iscol_local */ 306 307 /* No parallel redistribution currently supported! Should really check each index set 308 to confirm that it is OK. ... Currently supports only submatrix same partitioning as 309 original matrix! */ 310 311 PetscCall(MatGetLocalSize(A, &nlrows, &nlcols)); 312 PetscCall(MatGetOwnershipRange(A, &rstart, &rend)); 313 314 /* Check submatrix call */ 315 if (scall == MAT_REUSE_MATRIX) { 316 /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */ 317 /* Really need to test rows and column sizes! */ 318 newmat = *B; 319 } else { 320 /* Create and fill new matrix */ 321 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &newmat)); 322 PetscCall(MatSetSizes(newmat, nrows, ncols, PETSC_DECIDE, Ncols)); 323 PetscCall(MatSetType(newmat, ((PetscObject)A)->type_name)); 324 PetscCall(MatMPIDenseSetPreallocation(newmat, NULL)); 325 } 326 327 /* Now extract the data pointers and do the copy, column at a time */ 328 newmatd = (Mat_MPIDense *)newmat->data; 329 PetscCall(MatDenseGetArray(newmatd->A, &bv)); 330 PetscCall(MatDenseGetArrayRead(mat->A, &v)); 331 PetscCall(MatDenseGetLDA(mat->A, &lda)); 332 for (i = 0; i < Ncols; i++) { 333 const PetscScalar *av = v + lda * icol[i]; 334 for (j = 0; j < nrows; j++) *bv++ = av[irow[j] - rstart]; 335 } 336 PetscCall(MatDenseRestoreArrayRead(mat->A, &v)); 337 PetscCall(MatDenseRestoreArray(newmatd->A, &bv)); 338 339 /* Assemble the matrices so that the correct flags are set */ 340 PetscCall(MatAssemblyBegin(newmat, MAT_FINAL_ASSEMBLY)); 341 PetscCall(MatAssemblyEnd(newmat, MAT_FINAL_ASSEMBLY)); 342 343 /* Free work space */ 344 PetscCall(ISRestoreIndices(isrow, &irow)); 345 PetscCall(ISRestoreIndices(iscol_local, &icol)); 346 PetscCall(ISDestroy(&iscol_local)); 347 *B = newmat; 348 PetscFunctionReturn(PETSC_SUCCESS); 349 } 350 351 PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A, PetscScalar **array) 352 { 353 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 354 355 PetscFunctionBegin; 356 PetscCall(MatDenseRestoreArray(a->A, array)); 357 PetscFunctionReturn(PETSC_SUCCESS); 358 } 359 360 PetscErrorCode MatDenseRestoreArrayRead_MPIDense(Mat A, const PetscScalar **array) 361 { 362 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 363 364 PetscFunctionBegin; 365 PetscCall(MatDenseRestoreArrayRead(a->A, array)); 366 PetscFunctionReturn(PETSC_SUCCESS); 367 } 368 369 PetscErrorCode MatDenseRestoreArrayWrite_MPIDense(Mat A, PetscScalar **array) 370 { 371 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 372 373 PetscFunctionBegin; 374 PetscCall(MatDenseRestoreArrayWrite(a->A, array)); 375 PetscFunctionReturn(PETSC_SUCCESS); 376 } 377 378 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat, MatAssemblyType mode) 379 { 380 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 381 PetscInt nstash, reallocs; 382 383 PetscFunctionBegin; 384 if (mdn->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS); 385 386 PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range)); 387 PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs)); 388 PetscCall(PetscInfo(mdn->A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 389 PetscFunctionReturn(PETSC_SUCCESS); 390 } 391 392 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat, MatAssemblyType mode) 393 { 394 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 395 PetscInt i, *row, *col, flg, j, rstart, ncols; 396 PetscMPIInt n; 397 PetscScalar *val; 398 399 PetscFunctionBegin; 400 if (!mdn->donotstash && !mat->nooffprocentries) { 401 /* wait on receives */ 402 while (1) { 403 PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg)); 404 if (!flg) break; 405 406 for (i = 0; i < n;) { 407 /* Now identify the consecutive vals belonging to the same row */ 408 for (j = i, rstart = row[j]; j < n; j++) { 409 if (row[j] != rstart) break; 410 } 411 if (j < n) ncols = j - i; 412 else ncols = n - i; 413 /* Now assemble all these values with a single function call */ 414 PetscCall(MatSetValues_MPIDense(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode)); 415 i = j; 416 } 417 } 418 PetscCall(MatStashScatterEnd_Private(&mat->stash)); 419 } 420 421 PetscCall(MatAssemblyBegin(mdn->A, mode)); 422 PetscCall(MatAssemblyEnd(mdn->A, mode)); 423 PetscFunctionReturn(PETSC_SUCCESS); 424 } 425 426 PetscErrorCode MatZeroEntries_MPIDense(Mat A) 427 { 428 Mat_MPIDense *l = (Mat_MPIDense *)A->data; 429 430 PetscFunctionBegin; 431 PetscCall(MatZeroEntries(l->A)); 432 PetscFunctionReturn(PETSC_SUCCESS); 433 } 434 435 PetscErrorCode MatZeroRows_MPIDense(Mat A, PetscInt n, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 436 { 437 Mat_MPIDense *l = (Mat_MPIDense *)A->data; 438 PetscInt i, len, *lrows; 439 440 PetscFunctionBegin; 441 /* get locally owned rows */ 442 PetscCall(PetscLayoutMapLocal(A->rmap, n, rows, &len, &lrows, NULL)); 443 /* fix right hand side if needed */ 444 if (x && b) { 445 const PetscScalar *xx; 446 PetscScalar *bb; 447 448 PetscCall(VecGetArrayRead(x, &xx)); 449 PetscCall(VecGetArrayWrite(b, &bb)); 450 for (i = 0; i < len; ++i) bb[lrows[i]] = diag * xx[lrows[i]]; 451 PetscCall(VecRestoreArrayRead(x, &xx)); 452 PetscCall(VecRestoreArrayWrite(b, &bb)); 453 } 454 PetscCall(MatZeroRows(l->A, len, lrows, 0.0, NULL, NULL)); 455 if (diag != 0.0) { 456 Vec d; 457 458 PetscCall(MatCreateVecs(A, NULL, &d)); 459 PetscCall(VecSet(d, diag)); 460 PetscCall(MatDiagonalSet(A, d, INSERT_VALUES)); 461 PetscCall(VecDestroy(&d)); 462 } 463 PetscCall(PetscFree(lrows)); 464 PetscFunctionReturn(PETSC_SUCCESS); 465 } 466 467 PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat, Vec, Vec); 468 PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat, Vec, Vec, Vec); 469 PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat, Vec, Vec); 470 PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat, Vec, Vec, Vec); 471 472 PetscErrorCode MatMult_MPIDense(Mat mat, Vec xx, Vec yy) 473 { 474 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 475 const PetscScalar *ax; 476 PetscScalar *ay; 477 PetscMemType axmtype, aymtype; 478 479 PetscFunctionBegin; 480 if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat)); 481 PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype)); 482 PetscCall(VecGetArrayAndMemType(mdn->lvec, &ay, &aymtype)); 483 PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE)); 484 PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE)); 485 PetscCall(VecRestoreArrayAndMemType(mdn->lvec, &ay)); 486 PetscCall(VecRestoreArrayReadAndMemType(xx, &ax)); 487 PetscCall((*mdn->A->ops->mult)(mdn->A, mdn->lvec, yy)); 488 PetscFunctionReturn(PETSC_SUCCESS); 489 } 490 491 PetscErrorCode MatMultAdd_MPIDense(Mat mat, Vec xx, Vec yy, Vec zz) 492 { 493 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 494 const PetscScalar *ax; 495 PetscScalar *ay; 496 PetscMemType axmtype, aymtype; 497 498 PetscFunctionBegin; 499 if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat)); 500 PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype)); 501 PetscCall(VecGetArrayAndMemType(mdn->lvec, &ay, &aymtype)); 502 PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE)); 503 PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE)); 504 PetscCall(VecRestoreArrayAndMemType(mdn->lvec, &ay)); 505 PetscCall(VecRestoreArrayReadAndMemType(xx, &ax)); 506 PetscCall((*mdn->A->ops->multadd)(mdn->A, mdn->lvec, yy, zz)); 507 PetscFunctionReturn(PETSC_SUCCESS); 508 } 509 510 PetscErrorCode MatMultTranspose_MPIDense(Mat A, Vec xx, Vec yy) 511 { 512 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 513 const PetscScalar *ax; 514 PetscScalar *ay; 515 PetscMemType axmtype, aymtype; 516 517 PetscFunctionBegin; 518 if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A)); 519 PetscCall(VecSet(yy, 0.0)); 520 PetscCall((*a->A->ops->multtranspose)(a->A, xx, a->lvec)); 521 PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype)); 522 PetscCall(VecGetArrayAndMemType(yy, &ay, &aymtype)); 523 PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM)); 524 PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM)); 525 PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax)); 526 PetscCall(VecRestoreArrayAndMemType(yy, &ay)); 527 PetscFunctionReturn(PETSC_SUCCESS); 528 } 529 530 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A, Vec xx, Vec yy, Vec zz) 531 { 532 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 533 const PetscScalar *ax; 534 PetscScalar *ay; 535 PetscMemType axmtype, aymtype; 536 537 PetscFunctionBegin; 538 if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A)); 539 PetscCall(VecCopy(yy, zz)); 540 PetscCall((*a->A->ops->multtranspose)(a->A, xx, a->lvec)); 541 PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype)); 542 PetscCall(VecGetArrayAndMemType(zz, &ay, &aymtype)); 543 PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM)); 544 PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM)); 545 PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax)); 546 PetscCall(VecRestoreArrayAndMemType(zz, &ay)); 547 PetscFunctionReturn(PETSC_SUCCESS); 548 } 549 550 PetscErrorCode MatGetDiagonal_MPIDense(Mat A, Vec v) 551 { 552 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 553 PetscInt lda, len, i, nl, ng, m = A->rmap->n, radd; 554 PetscScalar *x; 555 const PetscScalar *av; 556 557 PetscFunctionBegin; 558 PetscCall(VecGetArray(v, &x)); 559 PetscCall(VecGetSize(v, &ng)); 560 PetscCheck(ng == A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming mat and vec"); 561 PetscCall(VecGetLocalSize(v, &nl)); 562 len = PetscMin(a->A->rmap->n, a->A->cmap->n); 563 radd = A->rmap->rstart * m; 564 PetscCall(MatDenseGetArrayRead(a->A, &av)); 565 PetscCall(MatDenseGetLDA(a->A, &lda)); 566 for (i = 0; i < len; i++) x[i] = av[radd + i * lda + i]; 567 PetscCall(MatDenseRestoreArrayRead(a->A, &av)); 568 PetscCall(PetscArrayzero(x + i, nl - i)); 569 PetscCall(VecRestoreArray(v, &x)); 570 PetscFunctionReturn(PETSC_SUCCESS); 571 } 572 573 PetscErrorCode MatDestroy_MPIDense(Mat mat) 574 { 575 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 576 577 PetscFunctionBegin; 578 #if defined(PETSC_USE_LOG) 579 PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N)); 580 #endif 581 PetscCall(MatStashDestroy_Private(&mat->stash)); 582 PetscCheck(!mdn->vecinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 583 PetscCheck(!mdn->matinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 584 PetscCall(MatDestroy(&mdn->A)); 585 PetscCall(VecDestroy(&mdn->lvec)); 586 PetscCall(PetscSFDestroy(&mdn->Mvctx)); 587 PetscCall(VecDestroy(&mdn->cvec)); 588 PetscCall(MatDestroy(&mdn->cmat)); 589 590 PetscCall(PetscFree(mat->data)); 591 PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL)); 592 593 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetLDA_C", NULL)); 594 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseSetLDA_C", NULL)); 595 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArray_C", NULL)); 596 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArray_C", NULL)); 597 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayRead_C", NULL)); 598 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayRead_C", NULL)); 599 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayWrite_C", NULL)); 600 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayWrite_C", NULL)); 601 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDensePlaceArray_C", NULL)); 602 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseResetArray_C", NULL)); 603 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseReplaceArray_C", NULL)); 604 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", NULL)); 605 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpiaij_C", NULL)); 606 #if defined(PETSC_HAVE_ELEMENTAL) 607 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_elemental_C", NULL)); 608 #endif 609 #if defined(PETSC_HAVE_SCALAPACK) 610 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_scalapack_C", NULL)); 611 #endif 612 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIDenseSetPreallocation_C", NULL)); 613 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidense_C", NULL)); 614 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaij_C", NULL)); 615 #if defined(PETSC_HAVE_CUDA) 616 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidense_C", NULL)); 617 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijcusparse_C", NULL)); 618 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensecuda_C", NULL)); 619 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidensecuda_mpidense_C", NULL)); 620 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidensecuda_C", NULL)); 621 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C", NULL)); 622 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensecuda_mpiaij_C", NULL)); 623 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C", NULL)); 624 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArray_C", NULL)); 625 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArrayRead_C", NULL)); 626 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArrayWrite_C", NULL)); 627 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArray_C", NULL)); 628 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArrayRead_C", NULL)); 629 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArrayWrite_C", NULL)); 630 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAPlaceArray_C", NULL)); 631 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAResetArray_C", NULL)); 632 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAReplaceArray_C", NULL)); 633 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDASetPreallocation_C", NULL)); 634 #endif 635 #if defined(PETSC_HAVE_HIP) 636 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidense_C", NULL)); 637 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijhipsparse_C", NULL)); 638 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensehip_C", NULL)); 639 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidensehip_mpidense_C", NULL)); 640 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidensehip_C", NULL)); 641 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidensehip_C", NULL)); 642 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensehip_mpiaij_C", NULL)); 643 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensehip_mpiaijhipsparse_C", NULL)); 644 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArray_C", NULL)); 645 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArrayRead_C", NULL)); 646 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArrayWrite_C", NULL)); 647 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArray_C", NULL)); 648 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArrayRead_C", NULL)); 649 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArrayWrite_C", NULL)); 650 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPPlaceArray_C", NULL)); 651 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPResetArray_C", NULL)); 652 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPReplaceArray_C", NULL)); 653 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPSetPreallocation_C", NULL)); 654 #endif 655 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumn_C", NULL)); 656 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumn_C", NULL)); 657 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", NULL)); 658 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", NULL)); 659 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", NULL)); 660 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", NULL)); 661 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", NULL)); 662 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", NULL)); 663 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetSubMatrix_C", NULL)); 664 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreSubMatrix_C", NULL)); 665 666 PetscCall(PetscObjectCompose((PetscObject)mat, "DiagonalBlock", NULL)); 667 PetscFunctionReturn(PETSC_SUCCESS); 668 } 669 670 PETSC_INTERN PetscErrorCode MatView_SeqDense(Mat, PetscViewer); 671 672 #include <petscdraw.h> 673 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat, PetscViewer viewer) 674 { 675 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 676 PetscMPIInt rank; 677 PetscViewerType vtype; 678 PetscBool iascii, isdraw; 679 PetscViewer sviewer; 680 PetscViewerFormat format; 681 682 PetscFunctionBegin; 683 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 684 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 685 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 686 if (iascii) { 687 PetscCall(PetscViewerGetType(viewer, &vtype)); 688 PetscCall(PetscViewerGetFormat(viewer, &format)); 689 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 690 MatInfo info; 691 PetscCall(MatGetInfo(mat, MAT_LOCAL, &info)); 692 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 693 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %" PetscInt_FMT " \n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, 694 (PetscInt)info.memory)); 695 PetscCall(PetscViewerFlush(viewer)); 696 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 697 if (mdn->Mvctx) PetscCall(PetscSFView(mdn->Mvctx, viewer)); 698 PetscFunctionReturn(PETSC_SUCCESS); 699 } else if (format == PETSC_VIEWER_ASCII_INFO) { 700 PetscFunctionReturn(PETSC_SUCCESS); 701 } 702 } else if (isdraw) { 703 PetscDraw draw; 704 PetscBool isnull; 705 706 PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw)); 707 PetscCall(PetscDrawIsNull(draw, &isnull)); 708 if (isnull) PetscFunctionReturn(PETSC_SUCCESS); 709 } 710 711 { 712 /* assemble the entire matrix onto first processor. */ 713 Mat A; 714 PetscInt M = mat->rmap->N, N = mat->cmap->N, m, row, i, nz; 715 PetscInt *cols; 716 PetscScalar *vals; 717 718 PetscCall(MatCreate(PetscObjectComm((PetscObject)mat), &A)); 719 if (rank == 0) { 720 PetscCall(MatSetSizes(A, M, N, M, N)); 721 } else { 722 PetscCall(MatSetSizes(A, 0, 0, M, N)); 723 } 724 /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */ 725 PetscCall(MatSetType(A, MATMPIDENSE)); 726 PetscCall(MatMPIDenseSetPreallocation(A, NULL)); 727 728 /* Copy the matrix ... This isn't the most efficient means, 729 but it's quick for now */ 730 A->insertmode = INSERT_VALUES; 731 732 row = mat->rmap->rstart; 733 m = mdn->A->rmap->n; 734 for (i = 0; i < m; i++) { 735 PetscCall(MatGetRow_MPIDense(mat, row, &nz, &cols, &vals)); 736 PetscCall(MatSetValues_MPIDense(A, 1, &row, nz, cols, vals, INSERT_VALUES)); 737 PetscCall(MatRestoreRow_MPIDense(mat, row, &nz, &cols, &vals)); 738 row++; 739 } 740 741 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 742 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 743 PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 744 if (rank == 0) { 745 PetscCall(PetscObjectSetName((PetscObject)((Mat_MPIDense *)(A->data))->A, ((PetscObject)mat)->name)); 746 PetscCall(MatView_SeqDense(((Mat_MPIDense *)(A->data))->A, sviewer)); 747 } 748 PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 749 PetscCall(PetscViewerFlush(viewer)); 750 PetscCall(MatDestroy(&A)); 751 } 752 PetscFunctionReturn(PETSC_SUCCESS); 753 } 754 755 PetscErrorCode MatView_MPIDense(Mat mat, PetscViewer viewer) 756 { 757 PetscBool iascii, isbinary, isdraw, issocket; 758 759 PetscFunctionBegin; 760 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 761 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 762 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket)); 763 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 764 765 if (iascii || issocket || isdraw) { 766 PetscCall(MatView_MPIDense_ASCIIorDraworSocket(mat, viewer)); 767 } else if (isbinary) PetscCall(MatView_Dense_Binary(mat, viewer)); 768 PetscFunctionReturn(PETSC_SUCCESS); 769 } 770 771 PetscErrorCode MatGetInfo_MPIDense(Mat A, MatInfoType flag, MatInfo *info) 772 { 773 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 774 Mat mdn = mat->A; 775 PetscLogDouble isend[5], irecv[5]; 776 777 PetscFunctionBegin; 778 info->block_size = 1.0; 779 780 PetscCall(MatGetInfo(mdn, MAT_LOCAL, info)); 781 782 isend[0] = info->nz_used; 783 isend[1] = info->nz_allocated; 784 isend[2] = info->nz_unneeded; 785 isend[3] = info->memory; 786 isend[4] = info->mallocs; 787 if (flag == MAT_LOCAL) { 788 info->nz_used = isend[0]; 789 info->nz_allocated = isend[1]; 790 info->nz_unneeded = isend[2]; 791 info->memory = isend[3]; 792 info->mallocs = isend[4]; 793 } else if (flag == MAT_GLOBAL_MAX) { 794 PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)A))); 795 796 info->nz_used = irecv[0]; 797 info->nz_allocated = irecv[1]; 798 info->nz_unneeded = irecv[2]; 799 info->memory = irecv[3]; 800 info->mallocs = irecv[4]; 801 } else if (flag == MAT_GLOBAL_SUM) { 802 PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)A))); 803 804 info->nz_used = irecv[0]; 805 info->nz_allocated = irecv[1]; 806 info->nz_unneeded = irecv[2]; 807 info->memory = irecv[3]; 808 info->mallocs = irecv[4]; 809 } 810 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 811 info->fill_ratio_needed = 0; 812 info->factor_mallocs = 0; 813 PetscFunctionReturn(PETSC_SUCCESS); 814 } 815 816 PetscErrorCode MatSetOption_MPIDense(Mat A, MatOption op, PetscBool flg) 817 { 818 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 819 820 PetscFunctionBegin; 821 switch (op) { 822 case MAT_NEW_NONZERO_LOCATIONS: 823 case MAT_NEW_NONZERO_LOCATION_ERR: 824 case MAT_NEW_NONZERO_ALLOCATION_ERR: 825 MatCheckPreallocated(A, 1); 826 PetscCall(MatSetOption(a->A, op, flg)); 827 break; 828 case MAT_ROW_ORIENTED: 829 MatCheckPreallocated(A, 1); 830 a->roworiented = flg; 831 PetscCall(MatSetOption(a->A, op, flg)); 832 break; 833 case MAT_FORCE_DIAGONAL_ENTRIES: 834 case MAT_KEEP_NONZERO_PATTERN: 835 case MAT_USE_HASH_TABLE: 836 case MAT_SORTED_FULL: 837 PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op])); 838 break; 839 case MAT_IGNORE_OFF_PROC_ENTRIES: 840 a->donotstash = flg; 841 break; 842 case MAT_SYMMETRIC: 843 case MAT_STRUCTURALLY_SYMMETRIC: 844 case MAT_HERMITIAN: 845 case MAT_SYMMETRY_ETERNAL: 846 case MAT_STRUCTURAL_SYMMETRY_ETERNAL: 847 case MAT_SPD: 848 case MAT_IGNORE_LOWER_TRIANGULAR: 849 case MAT_IGNORE_ZERO_ENTRIES: 850 case MAT_SPD_ETERNAL: 851 /* if the diagonal matrix is square it inherits some of the properties above */ 852 PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op])); 853 break; 854 default: 855 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %s", MatOptions[op]); 856 } 857 PetscFunctionReturn(PETSC_SUCCESS); 858 } 859 860 PetscErrorCode MatDiagonalScale_MPIDense(Mat A, Vec ll, Vec rr) 861 { 862 Mat_MPIDense *mdn = (Mat_MPIDense *)A->data; 863 const PetscScalar *l; 864 PetscScalar x, *v, *vv, *r; 865 PetscInt i, j, s2a, s3a, s2, s3, m = mdn->A->rmap->n, n = mdn->A->cmap->n, lda; 866 867 PetscFunctionBegin; 868 PetscCall(MatDenseGetArray(mdn->A, &vv)); 869 PetscCall(MatDenseGetLDA(mdn->A, &lda)); 870 PetscCall(MatGetLocalSize(A, &s2, &s3)); 871 if (ll) { 872 PetscCall(VecGetLocalSize(ll, &s2a)); 873 PetscCheck(s2a == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Left scaling vector non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT, s2a, s2); 874 PetscCall(VecGetArrayRead(ll, &l)); 875 for (i = 0; i < m; i++) { 876 x = l[i]; 877 v = vv + i; 878 for (j = 0; j < n; j++) { 879 (*v) *= x; 880 v += lda; 881 } 882 } 883 PetscCall(VecRestoreArrayRead(ll, &l)); 884 PetscCall(PetscLogFlops(1.0 * n * m)); 885 } 886 if (rr) { 887 const PetscScalar *ar; 888 889 PetscCall(VecGetLocalSize(rr, &s3a)); 890 PetscCheck(s3a == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Right scaling vec non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT ".", s3a, s3); 891 PetscCall(VecGetArrayRead(rr, &ar)); 892 if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A)); 893 PetscCall(VecGetArray(mdn->lvec, &r)); 894 PetscCall(PetscSFBcastBegin(mdn->Mvctx, MPIU_SCALAR, ar, r, MPI_REPLACE)); 895 PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ar, r, MPI_REPLACE)); 896 PetscCall(VecRestoreArrayRead(rr, &ar)); 897 for (i = 0; i < n; i++) { 898 x = r[i]; 899 v = vv + i * lda; 900 for (j = 0; j < m; j++) (*v++) *= x; 901 } 902 PetscCall(VecRestoreArray(mdn->lvec, &r)); 903 PetscCall(PetscLogFlops(1.0 * n * m)); 904 } 905 PetscCall(MatDenseRestoreArray(mdn->A, &vv)); 906 PetscFunctionReturn(PETSC_SUCCESS); 907 } 908 909 PetscErrorCode MatNorm_MPIDense(Mat A, NormType type, PetscReal *nrm) 910 { 911 Mat_MPIDense *mdn = (Mat_MPIDense *)A->data; 912 PetscInt i, j; 913 PetscMPIInt size; 914 PetscReal sum = 0.0; 915 const PetscScalar *av, *v; 916 917 PetscFunctionBegin; 918 PetscCall(MatDenseGetArrayRead(mdn->A, &av)); 919 v = av; 920 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 921 if (size == 1) { 922 PetscCall(MatNorm(mdn->A, type, nrm)); 923 } else { 924 if (type == NORM_FROBENIUS) { 925 for (i = 0; i < mdn->A->cmap->n * mdn->A->rmap->n; i++) { 926 sum += PetscRealPart(PetscConj(*v) * (*v)); 927 v++; 928 } 929 PetscCall(MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A))); 930 *nrm = PetscSqrtReal(*nrm); 931 PetscCall(PetscLogFlops(2.0 * mdn->A->cmap->n * mdn->A->rmap->n)); 932 } else if (type == NORM_1) { 933 PetscReal *tmp, *tmp2; 934 PetscCall(PetscCalloc2(A->cmap->N, &tmp, A->cmap->N, &tmp2)); 935 *nrm = 0.0; 936 v = av; 937 for (j = 0; j < mdn->A->cmap->n; j++) { 938 for (i = 0; i < mdn->A->rmap->n; i++) { 939 tmp[j] += PetscAbsScalar(*v); 940 v++; 941 } 942 } 943 PetscCall(MPIU_Allreduce(tmp, tmp2, A->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A))); 944 for (j = 0; j < A->cmap->N; j++) { 945 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 946 } 947 PetscCall(PetscFree2(tmp, tmp2)); 948 PetscCall(PetscLogFlops(A->cmap->n * A->rmap->n)); 949 } else if (type == NORM_INFINITY) { /* max row norm */ 950 PetscReal ntemp; 951 PetscCall(MatNorm(mdn->A, type, &ntemp)); 952 PetscCall(MPIU_Allreduce(&ntemp, nrm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A))); 953 } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "No support for two norm"); 954 } 955 PetscCall(MatDenseRestoreArrayRead(mdn->A, &av)); 956 PetscFunctionReturn(PETSC_SUCCESS); 957 } 958 959 PetscErrorCode MatTranspose_MPIDense(Mat A, MatReuse reuse, Mat *matout) 960 { 961 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 962 Mat B; 963 PetscInt M = A->rmap->N, N = A->cmap->N, m, n, *rwork, rstart = A->rmap->rstart; 964 PetscInt j, i, lda; 965 PetscScalar *v; 966 967 PetscFunctionBegin; 968 if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout)); 969 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) { 970 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 971 PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M)); 972 PetscCall(MatSetType(B, ((PetscObject)A)->type_name)); 973 PetscCall(MatMPIDenseSetPreallocation(B, NULL)); 974 } else B = *matout; 975 976 m = a->A->rmap->n; 977 n = a->A->cmap->n; 978 PetscCall(MatDenseGetArrayRead(a->A, (const PetscScalar **)&v)); 979 PetscCall(MatDenseGetLDA(a->A, &lda)); 980 PetscCall(PetscMalloc1(m, &rwork)); 981 for (i = 0; i < m; i++) rwork[i] = rstart + i; 982 for (j = 0; j < n; j++) { 983 PetscCall(MatSetValues(B, 1, &j, m, rwork, v, INSERT_VALUES)); 984 v += lda; 985 } 986 PetscCall(MatDenseRestoreArrayRead(a->A, (const PetscScalar **)&v)); 987 PetscCall(PetscFree(rwork)); 988 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 989 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 990 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 991 *matout = B; 992 } else { 993 PetscCall(MatHeaderMerge(A, &B)); 994 } 995 PetscFunctionReturn(PETSC_SUCCESS); 996 } 997 998 static PetscErrorCode MatDuplicate_MPIDense(Mat, MatDuplicateOption, Mat *); 999 PETSC_INTERN PetscErrorCode MatScale_MPIDense(Mat, PetscScalar); 1000 1001 PetscErrorCode MatSetUp_MPIDense(Mat A) 1002 { 1003 PetscFunctionBegin; 1004 PetscCall(PetscLayoutSetUp(A->rmap)); 1005 PetscCall(PetscLayoutSetUp(A->cmap)); 1006 if (!A->preallocated) PetscCall(MatMPIDenseSetPreallocation(A, NULL)); 1007 PetscFunctionReturn(PETSC_SUCCESS); 1008 } 1009 1010 PetscErrorCode MatAXPY_MPIDense(Mat Y, PetscScalar alpha, Mat X, MatStructure str) 1011 { 1012 Mat_MPIDense *A = (Mat_MPIDense *)Y->data, *B = (Mat_MPIDense *)X->data; 1013 1014 PetscFunctionBegin; 1015 PetscCall(MatAXPY(A->A, alpha, B->A, str)); 1016 PetscFunctionReturn(PETSC_SUCCESS); 1017 } 1018 1019 PetscErrorCode MatConjugate_MPIDense(Mat mat) 1020 { 1021 Mat_MPIDense *a = (Mat_MPIDense *)mat->data; 1022 1023 PetscFunctionBegin; 1024 PetscCall(MatConjugate(a->A)); 1025 PetscFunctionReturn(PETSC_SUCCESS); 1026 } 1027 1028 PetscErrorCode MatRealPart_MPIDense(Mat A) 1029 { 1030 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1031 1032 PetscFunctionBegin; 1033 PetscCall(MatRealPart(a->A)); 1034 PetscFunctionReturn(PETSC_SUCCESS); 1035 } 1036 1037 PetscErrorCode MatImaginaryPart_MPIDense(Mat A) 1038 { 1039 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1040 1041 PetscFunctionBegin; 1042 PetscCall(MatImaginaryPart(a->A)); 1043 PetscFunctionReturn(PETSC_SUCCESS); 1044 } 1045 1046 static PetscErrorCode MatGetColumnVector_MPIDense(Mat A, Vec v, PetscInt col) 1047 { 1048 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1049 1050 PetscFunctionBegin; 1051 PetscCheck(a->A, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Missing local matrix"); 1052 PetscCheck(a->A->ops->getcolumnvector, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Missing get column operation"); 1053 PetscCall((*a->A->ops->getcolumnvector)(a->A, v, col)); 1054 PetscFunctionReturn(PETSC_SUCCESS); 1055 } 1056 1057 PETSC_INTERN PetscErrorCode MatGetColumnReductions_SeqDense(Mat, PetscInt, PetscReal *); 1058 1059 PetscErrorCode MatGetColumnReductions_MPIDense(Mat A, PetscInt type, PetscReal *reductions) 1060 { 1061 PetscInt i, m, n; 1062 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1063 PetscReal *work; 1064 1065 PetscFunctionBegin; 1066 PetscCall(MatGetSize(A, &m, &n)); 1067 PetscCall(PetscMalloc1(n, &work)); 1068 if (type == REDUCTION_MEAN_REALPART) { 1069 PetscCall(MatGetColumnReductions_SeqDense(a->A, (PetscInt)REDUCTION_SUM_REALPART, work)); 1070 } else if (type == REDUCTION_MEAN_IMAGINARYPART) { 1071 PetscCall(MatGetColumnReductions_SeqDense(a->A, (PetscInt)REDUCTION_SUM_IMAGINARYPART, work)); 1072 } else { 1073 PetscCall(MatGetColumnReductions_SeqDense(a->A, type, work)); 1074 } 1075 if (type == NORM_2) { 1076 for (i = 0; i < n; i++) work[i] *= work[i]; 1077 } 1078 if (type == NORM_INFINITY) { 1079 PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_MAX, A->hdr.comm)); 1080 } else { 1081 PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_SUM, A->hdr.comm)); 1082 } 1083 PetscCall(PetscFree(work)); 1084 if (type == NORM_2) { 1085 for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]); 1086 } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 1087 for (i = 0; i < n; i++) reductions[i] /= m; 1088 } 1089 PetscFunctionReturn(PETSC_SUCCESS); 1090 } 1091 1092 static PetscErrorCode MatSetRandom_MPIDense(Mat x, PetscRandom rctx) 1093 { 1094 Mat_MPIDense *d = (Mat_MPIDense *)x->data; 1095 1096 PetscFunctionBegin; 1097 PetscCall(MatSetRandom(d->A, rctx)); 1098 #if defined(PETSC_HAVE_DEVICE) 1099 x->offloadmask = d->A->offloadmask; 1100 #endif 1101 PetscFunctionReturn(PETSC_SUCCESS); 1102 } 1103 1104 static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A, PetscBool *missing, PetscInt *d) 1105 { 1106 PetscFunctionBegin; 1107 *missing = PETSC_FALSE; 1108 PetscFunctionReturn(PETSC_SUCCESS); 1109 } 1110 1111 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat, Mat, PetscReal, Mat); 1112 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat, Mat, Mat); 1113 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat, Mat, PetscReal, Mat); 1114 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat, Mat, Mat); 1115 static PetscErrorCode MatEqual_MPIDense(Mat, Mat, PetscBool *); 1116 static PetscErrorCode MatLoad_MPIDense(Mat, PetscViewer); 1117 1118 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense, 1119 MatGetRow_MPIDense, 1120 MatRestoreRow_MPIDense, 1121 MatMult_MPIDense, 1122 /* 4*/ MatMultAdd_MPIDense, 1123 MatMultTranspose_MPIDense, 1124 MatMultTransposeAdd_MPIDense, 1125 NULL, 1126 NULL, 1127 NULL, 1128 /* 10*/ NULL, 1129 NULL, 1130 NULL, 1131 NULL, 1132 MatTranspose_MPIDense, 1133 /* 15*/ MatGetInfo_MPIDense, 1134 MatEqual_MPIDense, 1135 MatGetDiagonal_MPIDense, 1136 MatDiagonalScale_MPIDense, 1137 MatNorm_MPIDense, 1138 /* 20*/ MatAssemblyBegin_MPIDense, 1139 MatAssemblyEnd_MPIDense, 1140 MatSetOption_MPIDense, 1141 MatZeroEntries_MPIDense, 1142 /* 24*/ MatZeroRows_MPIDense, 1143 NULL, 1144 NULL, 1145 NULL, 1146 NULL, 1147 /* 29*/ MatSetUp_MPIDense, 1148 NULL, 1149 NULL, 1150 MatGetDiagonalBlock_MPIDense, 1151 NULL, 1152 /* 34*/ MatDuplicate_MPIDense, 1153 NULL, 1154 NULL, 1155 NULL, 1156 NULL, 1157 /* 39*/ MatAXPY_MPIDense, 1158 MatCreateSubMatrices_MPIDense, 1159 NULL, 1160 MatGetValues_MPIDense, 1161 MatCopy_MPIDense, 1162 /* 44*/ NULL, 1163 MatScale_MPIDense, 1164 MatShift_MPIDense, 1165 NULL, 1166 NULL, 1167 /* 49*/ MatSetRandom_MPIDense, 1168 NULL, 1169 NULL, 1170 NULL, 1171 NULL, 1172 /* 54*/ NULL, 1173 NULL, 1174 NULL, 1175 NULL, 1176 NULL, 1177 /* 59*/ MatCreateSubMatrix_MPIDense, 1178 MatDestroy_MPIDense, 1179 MatView_MPIDense, 1180 NULL, 1181 NULL, 1182 /* 64*/ NULL, 1183 NULL, 1184 NULL, 1185 NULL, 1186 NULL, 1187 /* 69*/ NULL, 1188 NULL, 1189 NULL, 1190 NULL, 1191 NULL, 1192 /* 74*/ NULL, 1193 NULL, 1194 NULL, 1195 NULL, 1196 NULL, 1197 /* 79*/ NULL, 1198 NULL, 1199 NULL, 1200 NULL, 1201 /* 83*/ MatLoad_MPIDense, 1202 NULL, 1203 NULL, 1204 NULL, 1205 NULL, 1206 NULL, 1207 /* 89*/ NULL, 1208 NULL, 1209 NULL, 1210 NULL, 1211 NULL, 1212 /* 94*/ NULL, 1213 NULL, 1214 MatMatTransposeMultSymbolic_MPIDense_MPIDense, 1215 MatMatTransposeMultNumeric_MPIDense_MPIDense, 1216 NULL, 1217 /* 99*/ MatProductSetFromOptions_MPIDense, 1218 NULL, 1219 NULL, 1220 MatConjugate_MPIDense, 1221 NULL, 1222 /*104*/ NULL, 1223 MatRealPart_MPIDense, 1224 MatImaginaryPart_MPIDense, 1225 NULL, 1226 NULL, 1227 /*109*/ NULL, 1228 NULL, 1229 NULL, 1230 MatGetColumnVector_MPIDense, 1231 MatMissingDiagonal_MPIDense, 1232 /*114*/ NULL, 1233 NULL, 1234 NULL, 1235 NULL, 1236 NULL, 1237 /*119*/ NULL, 1238 NULL, 1239 NULL, 1240 NULL, 1241 NULL, 1242 /*124*/ NULL, 1243 MatGetColumnReductions_MPIDense, 1244 NULL, 1245 NULL, 1246 NULL, 1247 /*129*/ NULL, 1248 NULL, 1249 MatTransposeMatMultSymbolic_MPIDense_MPIDense, 1250 MatTransposeMatMultNumeric_MPIDense_MPIDense, 1251 NULL, 1252 /*134*/ NULL, 1253 NULL, 1254 NULL, 1255 NULL, 1256 NULL, 1257 /*139*/ NULL, 1258 NULL, 1259 NULL, 1260 NULL, 1261 NULL, 1262 MatCreateMPIMatConcatenateSeqMat_MPIDense, 1263 /*145*/ NULL, 1264 NULL, 1265 NULL, 1266 NULL, 1267 NULL, 1268 /*150*/ NULL, 1269 NULL}; 1270 1271 PetscErrorCode MatMPIDenseSetPreallocation_MPIDense(Mat mat, PetscScalar *data) 1272 { 1273 Mat_MPIDense *a = (Mat_MPIDense *)mat->data; 1274 MatType mtype = MATSEQDENSE; 1275 1276 PetscFunctionBegin; 1277 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1278 PetscCall(PetscLayoutSetUp(mat->rmap)); 1279 PetscCall(PetscLayoutSetUp(mat->cmap)); 1280 if (!a->A) { 1281 PetscCall(MatCreate(PETSC_COMM_SELF, &a->A)); 1282 PetscCall(MatSetSizes(a->A, mat->rmap->n, mat->cmap->N, mat->rmap->n, mat->cmap->N)); 1283 } 1284 #if defined(PETSC_HAVE_CUDA) 1285 PetscBool iscuda; 1286 PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPIDENSECUDA, &iscuda)); 1287 if (iscuda) mtype = MATSEQDENSECUDA; 1288 #endif 1289 #if defined(PETSC_HAVE_HIP) 1290 PetscBool iship; 1291 PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPIDENSEHIP, &iship)); 1292 if (iship) mtype = MATSEQDENSEHIP; 1293 #endif 1294 PetscCall(MatSetType(a->A, mtype)); 1295 PetscCall(MatSeqDenseSetPreallocation(a->A, data)); 1296 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 1297 mat->offloadmask = a->A->offloadmask; 1298 #endif 1299 mat->preallocated = PETSC_TRUE; 1300 mat->assembled = PETSC_TRUE; 1301 PetscFunctionReturn(PETSC_SUCCESS); 1302 } 1303 1304 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIDense(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 1305 { 1306 Mat B, C; 1307 1308 PetscFunctionBegin; 1309 PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &C)); 1310 PetscCall(MatConvert_SeqAIJ_SeqDense(C, MATSEQDENSE, MAT_INITIAL_MATRIX, &B)); 1311 PetscCall(MatDestroy(&C)); 1312 if (reuse == MAT_REUSE_MATRIX) { 1313 C = *newmat; 1314 } else C = NULL; 1315 PetscCall(MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A), B, A->cmap->n, !C ? MAT_INITIAL_MATRIX : MAT_REUSE_MATRIX, &C)); 1316 PetscCall(MatDestroy(&B)); 1317 if (reuse == MAT_INPLACE_MATRIX) { 1318 PetscCall(MatHeaderReplace(A, &C)); 1319 } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C; 1320 PetscFunctionReturn(PETSC_SUCCESS); 1321 } 1322 1323 PetscErrorCode MatConvert_MPIDense_MPIAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 1324 { 1325 Mat B, C; 1326 1327 PetscFunctionBegin; 1328 PetscCall(MatDenseGetLocalMatrix(A, &C)); 1329 PetscCall(MatConvert_SeqDense_SeqAIJ(C, MATSEQAIJ, MAT_INITIAL_MATRIX, &B)); 1330 if (reuse == MAT_REUSE_MATRIX) { 1331 C = *newmat; 1332 } else C = NULL; 1333 PetscCall(MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A), B, A->cmap->n, !C ? MAT_INITIAL_MATRIX : MAT_REUSE_MATRIX, &C)); 1334 PetscCall(MatDestroy(&B)); 1335 if (reuse == MAT_INPLACE_MATRIX) { 1336 PetscCall(MatHeaderReplace(A, &C)); 1337 } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C; 1338 PetscFunctionReturn(PETSC_SUCCESS); 1339 } 1340 1341 #if defined(PETSC_HAVE_ELEMENTAL) 1342 PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 1343 { 1344 Mat mat_elemental; 1345 PetscScalar *v; 1346 PetscInt m = A->rmap->n, N = A->cmap->N, rstart = A->rmap->rstart, i, *rows, *cols; 1347 1348 PetscFunctionBegin; 1349 if (reuse == MAT_REUSE_MATRIX) { 1350 mat_elemental = *newmat; 1351 PetscCall(MatZeroEntries(*newmat)); 1352 } else { 1353 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental)); 1354 PetscCall(MatSetSizes(mat_elemental, PETSC_DECIDE, PETSC_DECIDE, A->rmap->N, A->cmap->N)); 1355 PetscCall(MatSetType(mat_elemental, MATELEMENTAL)); 1356 PetscCall(MatSetUp(mat_elemental)); 1357 PetscCall(MatSetOption(mat_elemental, MAT_ROW_ORIENTED, PETSC_FALSE)); 1358 } 1359 1360 PetscCall(PetscMalloc2(m, &rows, N, &cols)); 1361 for (i = 0; i < N; i++) cols[i] = i; 1362 for (i = 0; i < m; i++) rows[i] = rstart + i; 1363 1364 /* PETSc-Elemental interface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */ 1365 PetscCall(MatDenseGetArray(A, &v)); 1366 PetscCall(MatSetValues(mat_elemental, m, rows, N, cols, v, ADD_VALUES)); 1367 PetscCall(MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY)); 1368 PetscCall(MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY)); 1369 PetscCall(MatDenseRestoreArray(A, &v)); 1370 PetscCall(PetscFree2(rows, cols)); 1371 1372 if (reuse == MAT_INPLACE_MATRIX) { 1373 PetscCall(MatHeaderReplace(A, &mat_elemental)); 1374 } else { 1375 *newmat = mat_elemental; 1376 } 1377 PetscFunctionReturn(PETSC_SUCCESS); 1378 } 1379 #endif 1380 1381 static PetscErrorCode MatDenseGetColumn_MPIDense(Mat A, PetscInt col, PetscScalar **vals) 1382 { 1383 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 1384 1385 PetscFunctionBegin; 1386 PetscCall(MatDenseGetColumn(mat->A, col, vals)); 1387 PetscFunctionReturn(PETSC_SUCCESS); 1388 } 1389 1390 static PetscErrorCode MatDenseRestoreColumn_MPIDense(Mat A, PetscScalar **vals) 1391 { 1392 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 1393 1394 PetscFunctionBegin; 1395 PetscCall(MatDenseRestoreColumn(mat->A, vals)); 1396 PetscFunctionReturn(PETSC_SUCCESS); 1397 } 1398 1399 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIDense(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat) 1400 { 1401 Mat_MPIDense *mat; 1402 PetscInt m, nloc, N; 1403 1404 PetscFunctionBegin; 1405 PetscCall(MatGetSize(inmat, &m, &N)); 1406 PetscCall(MatGetLocalSize(inmat, NULL, &nloc)); 1407 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 1408 PetscInt sum; 1409 1410 if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnership(comm, &n, &N)); 1411 /* Check sum(n) = N */ 1412 PetscCall(MPIU_Allreduce(&n, &sum, 1, MPIU_INT, MPI_SUM, comm)); 1413 PetscCheck(sum == N, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT, sum, N); 1414 1415 PetscCall(MatCreateDense(comm, m, n, PETSC_DETERMINE, N, NULL, outmat)); 1416 PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 1417 } 1418 1419 /* numeric phase */ 1420 mat = (Mat_MPIDense *)(*outmat)->data; 1421 PetscCall(MatCopy(inmat, mat->A, SAME_NONZERO_PATTERN)); 1422 PetscFunctionReturn(PETSC_SUCCESS); 1423 } 1424 1425 PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat A, PetscInt col, Vec *v) 1426 { 1427 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1428 PetscInt lda; 1429 1430 PetscFunctionBegin; 1431 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1432 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1433 if (!a->cvec) PetscCall(MatDenseCreateColumnVec_Private(A, &a->cvec)); 1434 a->vecinuse = col + 1; 1435 PetscCall(MatDenseGetLDA(a->A, &lda)); 1436 PetscCall(MatDenseGetArray(a->A, (PetscScalar **)&a->ptrinuse)); 1437 PetscCall(VecPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda)); 1438 *v = a->cvec; 1439 PetscFunctionReturn(PETSC_SUCCESS); 1440 } 1441 1442 PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat A, PetscInt col, Vec *v) 1443 { 1444 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1445 1446 PetscFunctionBegin; 1447 PetscCheck(a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first"); 1448 PetscCheck(a->cvec, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing internal column vector"); 1449 a->vecinuse = 0; 1450 PetscCall(MatDenseRestoreArray(a->A, (PetscScalar **)&a->ptrinuse)); 1451 PetscCall(VecResetArray(a->cvec)); 1452 if (v) *v = NULL; 1453 PetscFunctionReturn(PETSC_SUCCESS); 1454 } 1455 1456 PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat A, PetscInt col, Vec *v) 1457 { 1458 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1459 PetscInt lda; 1460 1461 PetscFunctionBegin; 1462 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1463 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1464 if (!a->cvec) PetscCall(MatDenseCreateColumnVec_Private(A, &a->cvec)); 1465 a->vecinuse = col + 1; 1466 PetscCall(MatDenseGetLDA(a->A, &lda)); 1467 PetscCall(MatDenseGetArrayRead(a->A, &a->ptrinuse)); 1468 PetscCall(VecPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda)); 1469 PetscCall(VecLockReadPush(a->cvec)); 1470 *v = a->cvec; 1471 PetscFunctionReturn(PETSC_SUCCESS); 1472 } 1473 1474 PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat A, PetscInt col, Vec *v) 1475 { 1476 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1477 1478 PetscFunctionBegin; 1479 PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first"); 1480 PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector"); 1481 a->vecinuse = 0; 1482 PetscCall(MatDenseRestoreArrayRead(a->A, &a->ptrinuse)); 1483 PetscCall(VecLockReadPop(a->cvec)); 1484 PetscCall(VecResetArray(a->cvec)); 1485 if (v) *v = NULL; 1486 PetscFunctionReturn(PETSC_SUCCESS); 1487 } 1488 1489 PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat A, PetscInt col, Vec *v) 1490 { 1491 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1492 PetscInt lda; 1493 1494 PetscFunctionBegin; 1495 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1496 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1497 if (!a->cvec) PetscCall(MatDenseCreateColumnVec_Private(A, &a->cvec)); 1498 a->vecinuse = col + 1; 1499 PetscCall(MatDenseGetLDA(a->A, &lda)); 1500 PetscCall(MatDenseGetArrayWrite(a->A, (PetscScalar **)&a->ptrinuse)); 1501 PetscCall(VecPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda)); 1502 *v = a->cvec; 1503 PetscFunctionReturn(PETSC_SUCCESS); 1504 } 1505 1506 PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat A, PetscInt col, Vec *v) 1507 { 1508 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1509 1510 PetscFunctionBegin; 1511 PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first"); 1512 PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector"); 1513 a->vecinuse = 0; 1514 PetscCall(MatDenseRestoreArrayWrite(a->A, (PetscScalar **)&a->ptrinuse)); 1515 PetscCall(VecResetArray(a->cvec)); 1516 if (v) *v = NULL; 1517 PetscFunctionReturn(PETSC_SUCCESS); 1518 } 1519 1520 PetscErrorCode MatDenseGetSubMatrix_MPIDense(Mat A, PetscInt rbegin, PetscInt rend, PetscInt cbegin, PetscInt cend, Mat *v) 1521 { 1522 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1523 Mat_MPIDense *c; 1524 MPI_Comm comm; 1525 PetscInt pbegin, pend; 1526 1527 PetscFunctionBegin; 1528 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 1529 PetscCheck(!a->vecinuse, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1530 PetscCheck(!a->matinuse, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1531 pbegin = PetscMax(0, PetscMin(A->rmap->rend, rbegin) - A->rmap->rstart); 1532 pend = PetscMin(A->rmap->n, PetscMax(0, rend - A->rmap->rstart)); 1533 if (!a->cmat) { 1534 PetscCall(MatCreate(comm, &a->cmat)); 1535 PetscCall(MatSetType(a->cmat, ((PetscObject)A)->type_name)); 1536 if (rend - rbegin == A->rmap->N) PetscCall(PetscLayoutReference(A->rmap, &a->cmat->rmap)); 1537 else { 1538 PetscCall(PetscLayoutSetLocalSize(a->cmat->rmap, pend - pbegin)); 1539 PetscCall(PetscLayoutSetSize(a->cmat->rmap, rend - rbegin)); 1540 PetscCall(PetscLayoutSetUp(a->cmat->rmap)); 1541 } 1542 PetscCall(PetscLayoutSetSize(a->cmat->cmap, cend - cbegin)); 1543 PetscCall(PetscLayoutSetUp(a->cmat->cmap)); 1544 } else { 1545 PetscBool same = (PetscBool)(rend - rbegin == a->cmat->rmap->N); 1546 if (same && a->cmat->rmap->N != A->rmap->N) { 1547 same = (PetscBool)(pend - pbegin == a->cmat->rmap->n); 1548 PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &same, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A))); 1549 } 1550 if (!same) { 1551 PetscCall(PetscLayoutDestroy(&a->cmat->rmap)); 1552 PetscCall(PetscLayoutCreate(comm, &a->cmat->rmap)); 1553 PetscCall(PetscLayoutSetLocalSize(a->cmat->rmap, pend - pbegin)); 1554 PetscCall(PetscLayoutSetSize(a->cmat->rmap, rend - rbegin)); 1555 PetscCall(PetscLayoutSetUp(a->cmat->rmap)); 1556 } 1557 if (cend - cbegin != a->cmat->cmap->N) { 1558 PetscCall(PetscLayoutDestroy(&a->cmat->cmap)); 1559 PetscCall(PetscLayoutCreate(comm, &a->cmat->cmap)); 1560 PetscCall(PetscLayoutSetSize(a->cmat->cmap, cend - cbegin)); 1561 PetscCall(PetscLayoutSetUp(a->cmat->cmap)); 1562 } 1563 } 1564 c = (Mat_MPIDense *)a->cmat->data; 1565 PetscCheck(!c->A, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1566 PetscCall(MatDenseGetSubMatrix(a->A, pbegin, pend, cbegin, cend, &c->A)); 1567 1568 a->cmat->preallocated = PETSC_TRUE; 1569 a->cmat->assembled = PETSC_TRUE; 1570 #if defined(PETSC_HAVE_DEVICE) 1571 a->cmat->offloadmask = c->A->offloadmask; 1572 #endif 1573 a->matinuse = cbegin + 1; 1574 *v = a->cmat; 1575 PetscFunctionReturn(PETSC_SUCCESS); 1576 } 1577 1578 PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat A, Mat *v) 1579 { 1580 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1581 Mat_MPIDense *c; 1582 1583 PetscFunctionBegin; 1584 PetscCheck(a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetSubMatrix() first"); 1585 PetscCheck(a->cmat, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal matrix"); 1586 PetscCheck(*v == a->cmat, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Not the matrix obtained from MatDenseGetSubMatrix()"); 1587 a->matinuse = 0; 1588 c = (Mat_MPIDense *)a->cmat->data; 1589 PetscCall(MatDenseRestoreSubMatrix(a->A, &c->A)); 1590 if (v) *v = NULL; 1591 #if defined(PETSC_HAVE_DEVICE) 1592 A->offloadmask = a->A->offloadmask; 1593 #endif 1594 PetscFunctionReturn(PETSC_SUCCESS); 1595 } 1596 1597 /*MC 1598 MATMPIDENSE - MATMPIDENSE = "mpidense" - A matrix type to be used for distributed dense matrices. 1599 1600 Options Database Key: 1601 . -mat_type mpidense - sets the matrix type to `MATMPIDENSE` during a call to `MatSetFromOptions()` 1602 1603 Level: beginner 1604 1605 .seealso: [](ch_matrices), `Mat`, `MatCreateDense()`, `MATSEQDENSE`, `MATDENSE` 1606 M*/ 1607 PetscErrorCode MatCreate_MPIDense(Mat mat) 1608 { 1609 Mat_MPIDense *a; 1610 1611 PetscFunctionBegin; 1612 PetscCall(PetscNew(&a)); 1613 mat->data = (void *)a; 1614 mat->ops[0] = MatOps_Values; 1615 1616 mat->insertmode = NOT_SET_VALUES; 1617 1618 /* build cache for off array entries formed */ 1619 a->donotstash = PETSC_FALSE; 1620 1621 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)mat), 1, &mat->stash)); 1622 1623 /* stuff used for matrix vector multiply */ 1624 a->lvec = NULL; 1625 a->Mvctx = NULL; 1626 a->roworiented = PETSC_TRUE; 1627 1628 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetLDA_C", MatDenseGetLDA_MPIDense)); 1629 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseSetLDA_C", MatDenseSetLDA_MPIDense)); 1630 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArray_C", MatDenseGetArray_MPIDense)); 1631 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArray_C", MatDenseRestoreArray_MPIDense)); 1632 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayRead_C", MatDenseGetArrayRead_MPIDense)); 1633 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayRead_C", MatDenseRestoreArrayRead_MPIDense)); 1634 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayWrite_C", MatDenseGetArrayWrite_MPIDense)); 1635 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayWrite_C", MatDenseRestoreArrayWrite_MPIDense)); 1636 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDensePlaceArray_C", MatDensePlaceArray_MPIDense)); 1637 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseResetArray_C", MatDenseResetArray_MPIDense)); 1638 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseReplaceArray_C", MatDenseReplaceArray_MPIDense)); 1639 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", MatDenseGetColumnVec_MPIDense)); 1640 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", MatDenseRestoreColumnVec_MPIDense)); 1641 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", MatDenseGetColumnVecRead_MPIDense)); 1642 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", MatDenseRestoreColumnVecRead_MPIDense)); 1643 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", MatDenseGetColumnVecWrite_MPIDense)); 1644 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", MatDenseRestoreColumnVecWrite_MPIDense)); 1645 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetSubMatrix_C", MatDenseGetSubMatrix_MPIDense)); 1646 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreSubMatrix_C", MatDenseRestoreSubMatrix_MPIDense)); 1647 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", MatConvert_MPIAIJ_MPIDense)); 1648 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpiaij_C", MatConvert_MPIDense_MPIAIJ)); 1649 #if defined(PETSC_HAVE_ELEMENTAL) 1650 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_elemental_C", MatConvert_MPIDense_Elemental)); 1651 #endif 1652 #if defined(PETSC_HAVE_SCALAPACK) 1653 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_scalapack_C", MatConvert_Dense_ScaLAPACK)); 1654 #endif 1655 #if defined(PETSC_HAVE_CUDA) 1656 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensecuda_C", MatConvert_MPIDense_MPIDenseCUDA)); 1657 #endif 1658 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIDenseSetPreallocation_C", MatMPIDenseSetPreallocation_MPIDense)); 1659 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense)); 1660 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaij_C", MatProductSetFromOptions_MPIDense_MPIAIJ)); 1661 #if defined(PETSC_HAVE_CUDA) 1662 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense)); 1663 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijcusparse_C", MatProductSetFromOptions_MPIDense_MPIAIJ)); 1664 #endif 1665 #if defined(PETSC_HAVE_HIP) 1666 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensehip_C", MatConvert_MPIDense_MPIDenseHIP)); 1667 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense)); 1668 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijhipsparse_C", MatProductSetFromOptions_MPIDense_MPIAIJ)); 1669 #endif 1670 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumn_C", MatDenseGetColumn_MPIDense)); 1671 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumn_C", MatDenseRestoreColumn_MPIDense)); 1672 PetscCall(PetscObjectChangeTypeName((PetscObject)mat, MATMPIDENSE)); 1673 PetscFunctionReturn(PETSC_SUCCESS); 1674 } 1675 1676 /*MC 1677 MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices. 1678 1679 This matrix type is identical to `MATSEQDENSE` when constructed with a single process communicator, 1680 and `MATMPIDENSE` otherwise. 1681 1682 Options Database Key: 1683 . -mat_type dense - sets the matrix type to `MATDENSE` during a call to `MatSetFromOptions()` 1684 1685 Level: beginner 1686 1687 .seealso: [](ch_matrices), `Mat`, `MATSEQDENSE`, `MATMPIDENSE`, `MATDENSECUDA`, `MATDENSEHIP` 1688 M*/ 1689 1690 /*@C 1691 MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries 1692 1693 Collective 1694 1695 Input Parameters: 1696 . B - the matrix 1697 - data - optional location of matrix data. Set to `NULL` for PETSc 1698 to control all matrix memory allocation. 1699 1700 Level: intermediate 1701 1702 Notes: 1703 The dense format is fully compatible with standard Fortran 1704 storage by columns. 1705 1706 The data input variable is intended primarily for Fortran programmers 1707 who wish to allocate their own matrix memory space. Most users should 1708 set `data` to `NULL`. 1709 1710 .seealso: [](ch_matrices), `Mat`, `MATMPIDENSE`, `MatCreate()`, `MatCreateSeqDense()`, `MatSetValues()` 1711 @*/ 1712 PetscErrorCode MatMPIDenseSetPreallocation(Mat B, PetscScalar *data) 1713 { 1714 PetscFunctionBegin; 1715 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 1716 PetscTryMethod(B, "MatMPIDenseSetPreallocation_C", (Mat, PetscScalar *), (B, data)); 1717 PetscFunctionReturn(PETSC_SUCCESS); 1718 } 1719 1720 /*@ 1721 MatDensePlaceArray - Allows one to replace the array in a `MATDENSE` matrix with an 1722 array provided by the user. This is useful to avoid copying an array 1723 into a matrix 1724 1725 Not Collective 1726 1727 Input Parameters: 1728 + mat - the matrix 1729 - array - the array in column major order 1730 1731 Level: developer 1732 1733 Note: 1734 You can return to the original array with a call to `MatDenseResetArray()`. The user is responsible for freeing this array; it will not be 1735 freed when the matrix is destroyed. 1736 1737 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseGetArray()`, `MatDenseResetArray()`, `VecPlaceArray()`, `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`, `VecResetArray()`, 1738 `MatDenseReplaceArray()` 1739 @*/ 1740 PetscErrorCode MatDensePlaceArray(Mat mat, const PetscScalar *array) 1741 { 1742 PetscFunctionBegin; 1743 PetscValidHeaderSpecific(mat, MAT_CLASSID, 1); 1744 PetscUseMethod(mat, "MatDensePlaceArray_C", (Mat, const PetscScalar *), (mat, array)); 1745 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 1746 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 1747 mat->offloadmask = PETSC_OFFLOAD_CPU; 1748 #endif 1749 PetscFunctionReturn(PETSC_SUCCESS); 1750 } 1751 1752 /*@ 1753 MatDenseResetArray - Resets the matrix array to that it previously had before the call to `MatDensePlaceArray()` 1754 1755 Not Collective 1756 1757 Input Parameter: 1758 . mat - the matrix 1759 1760 Level: developer 1761 1762 Note: 1763 You can only call this after a call to `MatDensePlaceArray()` 1764 1765 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseGetArray()`, `MatDensePlaceArray()`, `VecPlaceArray()`, `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`, `VecResetArray()` 1766 @*/ 1767 PetscErrorCode MatDenseResetArray(Mat mat) 1768 { 1769 PetscFunctionBegin; 1770 PetscValidHeaderSpecific(mat, MAT_CLASSID, 1); 1771 PetscUseMethod(mat, "MatDenseResetArray_C", (Mat), (mat)); 1772 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 1773 PetscFunctionReturn(PETSC_SUCCESS); 1774 } 1775 1776 /*@ 1777 MatDenseReplaceArray - Allows one to replace the array in a dense matrix with an 1778 array provided by the user. This is useful to avoid copying an array 1779 into a matrix 1780 1781 Not Collective 1782 1783 Input Parameters: 1784 + mat - the matrix 1785 - array - the array in column major order 1786 1787 Level: developer 1788 1789 Note: 1790 The memory passed in MUST be obtained with `PetscMalloc()` and CANNOT be 1791 freed by the user. It will be freed when the matrix is destroyed. 1792 1793 .seealso: [](ch_matrices), `Mat`, `MatDensePlaceArray()`, `MatDenseGetArray()`, `VecReplaceArray()` 1794 @*/ 1795 PetscErrorCode MatDenseReplaceArray(Mat mat, const PetscScalar *array) 1796 { 1797 PetscFunctionBegin; 1798 PetscValidHeaderSpecific(mat, MAT_CLASSID, 1); 1799 PetscUseMethod(mat, "MatDenseReplaceArray_C", (Mat, const PetscScalar *), (mat, array)); 1800 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 1801 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 1802 mat->offloadmask = PETSC_OFFLOAD_CPU; 1803 #endif 1804 PetscFunctionReturn(PETSC_SUCCESS); 1805 } 1806 1807 /*@C 1808 MatCreateDense - Creates a matrix in `MATDENSE` format. 1809 1810 Collective 1811 1812 Input Parameters: 1813 + comm - MPI communicator 1814 . m - number of local rows (or `PETSC_DECIDE` to have calculated if `M` is given) 1815 . n - number of local columns (or `PETSC_DECIDE` to have calculated if `N` is given) 1816 . M - number of global rows (or `PETSC_DECIDE` to have calculated if `m` is given) 1817 . N - number of global columns (or `PETSC_DECIDE` to have calculated if `n` is given) 1818 - data - optional location of matrix data. Set data to `NULL` (`PETSC_NULL_SCALAR` for Fortran users) for PETSc 1819 to control all matrix memory allocation. 1820 1821 Output Parameter: 1822 . A - the matrix 1823 1824 Level: intermediate 1825 1826 Notes: 1827 The dense format is fully compatible with standard Fortran 1828 storage by columns. 1829 1830 Although local portions of the matrix are stored in column-major 1831 order, the matrix is partitioned across MPI ranks by row. 1832 1833 The data input variable is intended primarily for Fortran programmers 1834 who wish to allocate their own matrix memory space. Most users should 1835 set `data` to `NULL` (`PETSC_NULL_SCALAR` for Fortran users). 1836 1837 The user MUST specify either the local or global matrix dimensions 1838 (possibly both). 1839 1840 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatCreate()`, `MatCreateSeqDense()`, `MatSetValues()` 1841 @*/ 1842 PetscErrorCode MatCreateDense(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscScalar *data, Mat *A) 1843 { 1844 PetscFunctionBegin; 1845 PetscCall(MatCreate(comm, A)); 1846 PetscCall(MatSetSizes(*A, m, n, M, N)); 1847 PetscCall(MatSetType(*A, MATDENSE)); 1848 PetscCall(MatSeqDenseSetPreallocation(*A, data)); 1849 PetscCall(MatMPIDenseSetPreallocation(*A, data)); 1850 PetscFunctionReturn(PETSC_SUCCESS); 1851 } 1852 1853 static PetscErrorCode MatDuplicate_MPIDense(Mat A, MatDuplicateOption cpvalues, Mat *newmat) 1854 { 1855 Mat mat; 1856 Mat_MPIDense *a, *oldmat = (Mat_MPIDense *)A->data; 1857 1858 PetscFunctionBegin; 1859 *newmat = NULL; 1860 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat)); 1861 PetscCall(MatSetSizes(mat, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 1862 PetscCall(MatSetType(mat, ((PetscObject)A)->type_name)); 1863 a = (Mat_MPIDense *)mat->data; 1864 1865 mat->factortype = A->factortype; 1866 mat->assembled = PETSC_TRUE; 1867 mat->preallocated = PETSC_TRUE; 1868 1869 mat->insertmode = NOT_SET_VALUES; 1870 a->donotstash = oldmat->donotstash; 1871 1872 PetscCall(PetscLayoutReference(A->rmap, &mat->rmap)); 1873 PetscCall(PetscLayoutReference(A->cmap, &mat->cmap)); 1874 1875 PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A)); 1876 1877 *newmat = mat; 1878 PetscFunctionReturn(PETSC_SUCCESS); 1879 } 1880 1881 PetscErrorCode MatLoad_MPIDense(Mat newMat, PetscViewer viewer) 1882 { 1883 PetscBool isbinary; 1884 #if defined(PETSC_HAVE_HDF5) 1885 PetscBool ishdf5; 1886 #endif 1887 1888 PetscFunctionBegin; 1889 PetscValidHeaderSpecific(newMat, MAT_CLASSID, 1); 1890 PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2); 1891 /* force binary viewer to load .info file if it has not yet done so */ 1892 PetscCall(PetscViewerSetUp(viewer)); 1893 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 1894 #if defined(PETSC_HAVE_HDF5) 1895 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5)); 1896 #endif 1897 if (isbinary) { 1898 PetscCall(MatLoad_Dense_Binary(newMat, viewer)); 1899 #if defined(PETSC_HAVE_HDF5) 1900 } else if (ishdf5) { 1901 PetscCall(MatLoad_Dense_HDF5(newMat, viewer)); 1902 #endif 1903 } else SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name); 1904 PetscFunctionReturn(PETSC_SUCCESS); 1905 } 1906 1907 static PetscErrorCode MatEqual_MPIDense(Mat A, Mat B, PetscBool *flag) 1908 { 1909 Mat_MPIDense *matB = (Mat_MPIDense *)B->data, *matA = (Mat_MPIDense *)A->data; 1910 Mat a, b; 1911 1912 PetscFunctionBegin; 1913 a = matA->A; 1914 b = matB->A; 1915 PetscCall(MatEqual(a, b, flag)); 1916 PetscCall(MPIU_Allreduce(MPI_IN_PLACE, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A))); 1917 PetscFunctionReturn(PETSC_SUCCESS); 1918 } 1919 1920 PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(void *data) 1921 { 1922 Mat_TransMatMultDense *atb = (Mat_TransMatMultDense *)data; 1923 1924 PetscFunctionBegin; 1925 PetscCall(PetscFree2(atb->sendbuf, atb->recvcounts)); 1926 PetscCall(MatDestroy(&atb->atb)); 1927 PetscCall(PetscFree(atb)); 1928 PetscFunctionReturn(PETSC_SUCCESS); 1929 } 1930 1931 PetscErrorCode MatDestroy_MatMatTransMult_MPIDense_MPIDense(void *data) 1932 { 1933 Mat_MatTransMultDense *abt = (Mat_MatTransMultDense *)data; 1934 1935 PetscFunctionBegin; 1936 PetscCall(PetscFree2(abt->buf[0], abt->buf[1])); 1937 PetscCall(PetscFree2(abt->recvcounts, abt->recvdispls)); 1938 PetscCall(PetscFree(abt)); 1939 PetscFunctionReturn(PETSC_SUCCESS); 1940 } 1941 1942 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C) 1943 { 1944 Mat_MPIDense *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data; 1945 Mat_TransMatMultDense *atb; 1946 MPI_Comm comm; 1947 PetscMPIInt size, *recvcounts; 1948 PetscScalar *carray, *sendbuf; 1949 const PetscScalar *atbarray; 1950 PetscInt i, cN = C->cmap->N, proc, k, j, lda; 1951 const PetscInt *ranges; 1952 1953 PetscFunctionBegin; 1954 MatCheckProduct(C, 3); 1955 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 1956 atb = (Mat_TransMatMultDense *)C->product->data; 1957 recvcounts = atb->recvcounts; 1958 sendbuf = atb->sendbuf; 1959 1960 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 1961 PetscCallMPI(MPI_Comm_size(comm, &size)); 1962 1963 /* compute atbarray = aseq^T * bseq */ 1964 PetscCall(MatTransposeMatMult(a->A, b->A, atb->atb ? MAT_REUSE_MATRIX : MAT_INITIAL_MATRIX, PETSC_DEFAULT, &atb->atb)); 1965 1966 PetscCall(MatGetOwnershipRanges(C, &ranges)); 1967 1968 /* arrange atbarray into sendbuf */ 1969 PetscCall(MatDenseGetArrayRead(atb->atb, &atbarray)); 1970 PetscCall(MatDenseGetLDA(atb->atb, &lda)); 1971 for (proc = 0, k = 0; proc < size; proc++) { 1972 for (j = 0; j < cN; j++) { 1973 for (i = ranges[proc]; i < ranges[proc + 1]; i++) sendbuf[k++] = atbarray[i + j * lda]; 1974 } 1975 } 1976 PetscCall(MatDenseRestoreArrayRead(atb->atb, &atbarray)); 1977 1978 /* sum all atbarray to local values of C */ 1979 PetscCall(MatDenseGetArrayWrite(c->A, &carray)); 1980 PetscCallMPI(MPI_Reduce_scatter(sendbuf, carray, recvcounts, MPIU_SCALAR, MPIU_SUM, comm)); 1981 PetscCall(MatDenseRestoreArrayWrite(c->A, &carray)); 1982 PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY)); 1983 PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY)); 1984 PetscFunctionReturn(PETSC_SUCCESS); 1985 } 1986 1987 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C) 1988 { 1989 MPI_Comm comm; 1990 PetscMPIInt size; 1991 PetscInt cm = A->cmap->n, cM, cN = B->cmap->N; 1992 Mat_TransMatMultDense *atb; 1993 PetscBool cisdense = PETSC_FALSE; 1994 PetscInt i; 1995 const PetscInt *ranges; 1996 1997 PetscFunctionBegin; 1998 MatCheckProduct(C, 4); 1999 PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty"); 2000 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 2001 if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) { 2002 SETERRQ(comm, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend); 2003 } 2004 2005 /* create matrix product C */ 2006 PetscCall(MatSetSizes(C, cm, B->cmap->n, A->cmap->N, B->cmap->N)); 2007 #if defined(PETSC_HAVE_CUDA) 2008 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSECUDA, "")); 2009 #endif 2010 #if defined(PETSC_HAVE_HIP) 2011 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSEHIP, "")); 2012 #endif 2013 if (!cisdense) PetscCall(MatSetType(C, ((PetscObject)A)->type_name)); 2014 PetscCall(MatSetUp(C)); 2015 2016 /* create data structure for reuse C */ 2017 PetscCallMPI(MPI_Comm_size(comm, &size)); 2018 PetscCall(PetscNew(&atb)); 2019 cM = C->rmap->N; 2020 PetscCall(PetscMalloc2(cM * cN, &atb->sendbuf, size, &atb->recvcounts)); 2021 PetscCall(MatGetOwnershipRanges(C, &ranges)); 2022 for (i = 0; i < size; i++) atb->recvcounts[i] = (ranges[i + 1] - ranges[i]) * cN; 2023 2024 C->product->data = atb; 2025 C->product->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense; 2026 PetscFunctionReturn(PETSC_SUCCESS); 2027 } 2028 2029 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C) 2030 { 2031 MPI_Comm comm; 2032 PetscMPIInt i, size; 2033 PetscInt maxRows, bufsiz; 2034 PetscMPIInt tag; 2035 PetscInt alg; 2036 Mat_MatTransMultDense *abt; 2037 Mat_Product *product = C->product; 2038 PetscBool flg; 2039 2040 PetscFunctionBegin; 2041 MatCheckProduct(C, 4); 2042 PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty"); 2043 /* check local size of A and B */ 2044 PetscCheck(A->cmap->n == B->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local column dimensions are incompatible, A (%" PetscInt_FMT ") != B (%" PetscInt_FMT ")", A->cmap->n, B->cmap->n); 2045 2046 PetscCall(PetscStrcmp(product->alg, "allgatherv", &flg)); 2047 alg = flg ? 0 : 1; 2048 2049 /* setup matrix product C */ 2050 PetscCall(MatSetSizes(C, A->rmap->n, B->rmap->n, A->rmap->N, B->rmap->N)); 2051 PetscCall(MatSetType(C, MATMPIDENSE)); 2052 PetscCall(MatSetUp(C)); 2053 PetscCall(PetscObjectGetNewTag((PetscObject)C, &tag)); 2054 2055 /* create data structure for reuse C */ 2056 PetscCall(PetscObjectGetComm((PetscObject)C, &comm)); 2057 PetscCallMPI(MPI_Comm_size(comm, &size)); 2058 PetscCall(PetscNew(&abt)); 2059 abt->tag = tag; 2060 abt->alg = alg; 2061 switch (alg) { 2062 case 1: /* alg: "cyclic" */ 2063 for (maxRows = 0, i = 0; i < size; i++) maxRows = PetscMax(maxRows, (B->rmap->range[i + 1] - B->rmap->range[i])); 2064 bufsiz = A->cmap->N * maxRows; 2065 PetscCall(PetscMalloc2(bufsiz, &(abt->buf[0]), bufsiz, &(abt->buf[1]))); 2066 break; 2067 default: /* alg: "allgatherv" */ 2068 PetscCall(PetscMalloc2(B->rmap->n * B->cmap->N, &(abt->buf[0]), B->rmap->N * B->cmap->N, &(abt->buf[1]))); 2069 PetscCall(PetscMalloc2(size, &(abt->recvcounts), size + 1, &(abt->recvdispls))); 2070 for (i = 0; i <= size; i++) abt->recvdispls[i] = B->rmap->range[i] * A->cmap->N; 2071 for (i = 0; i < size; i++) abt->recvcounts[i] = abt->recvdispls[i + 1] - abt->recvdispls[i]; 2072 break; 2073 } 2074 2075 C->product->data = abt; 2076 C->product->destroy = MatDestroy_MatMatTransMult_MPIDense_MPIDense; 2077 C->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_MPIDense_MPIDense; 2078 PetscFunctionReturn(PETSC_SUCCESS); 2079 } 2080 2081 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(Mat A, Mat B, Mat C) 2082 { 2083 Mat_MPIDense *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data; 2084 Mat_MatTransMultDense *abt; 2085 MPI_Comm comm; 2086 PetscMPIInt rank, size, sendsiz, recvsiz, sendto, recvfrom, recvisfrom; 2087 PetscScalar *sendbuf, *recvbuf = NULL, *cv; 2088 PetscInt i, cK = A->cmap->N, k, j, bn; 2089 PetscScalar _DOne = 1.0, _DZero = 0.0; 2090 const PetscScalar *av, *bv; 2091 PetscBLASInt cm, cn, ck, alda, blda = 0, clda; 2092 MPI_Request reqs[2]; 2093 const PetscInt *ranges; 2094 2095 PetscFunctionBegin; 2096 MatCheckProduct(C, 3); 2097 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 2098 abt = (Mat_MatTransMultDense *)C->product->data; 2099 PetscCall(PetscObjectGetComm((PetscObject)C, &comm)); 2100 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 2101 PetscCallMPI(MPI_Comm_size(comm, &size)); 2102 PetscCall(MatDenseGetArrayRead(a->A, &av)); 2103 PetscCall(MatDenseGetArrayRead(b->A, &bv)); 2104 PetscCall(MatDenseGetArrayWrite(c->A, &cv)); 2105 PetscCall(MatDenseGetLDA(a->A, &i)); 2106 PetscCall(PetscBLASIntCast(i, &alda)); 2107 PetscCall(MatDenseGetLDA(b->A, &i)); 2108 PetscCall(PetscBLASIntCast(i, &blda)); 2109 PetscCall(MatDenseGetLDA(c->A, &i)); 2110 PetscCall(PetscBLASIntCast(i, &clda)); 2111 PetscCall(MatGetOwnershipRanges(B, &ranges)); 2112 bn = B->rmap->n; 2113 if (blda == bn) { 2114 sendbuf = (PetscScalar *)bv; 2115 } else { 2116 sendbuf = abt->buf[0]; 2117 for (k = 0, i = 0; i < cK; i++) { 2118 for (j = 0; j < bn; j++, k++) sendbuf[k] = bv[i * blda + j]; 2119 } 2120 } 2121 if (size > 1) { 2122 sendto = (rank + size - 1) % size; 2123 recvfrom = (rank + size + 1) % size; 2124 } else { 2125 sendto = recvfrom = 0; 2126 } 2127 PetscCall(PetscBLASIntCast(cK, &ck)); 2128 PetscCall(PetscBLASIntCast(c->A->rmap->n, &cm)); 2129 recvisfrom = rank; 2130 for (i = 0; i < size; i++) { 2131 /* we have finished receiving in sending, bufs can be read/modified */ 2132 PetscInt nextrecvisfrom = (recvisfrom + 1) % size; /* which process the next recvbuf will originate on */ 2133 PetscInt nextbn = ranges[nextrecvisfrom + 1] - ranges[nextrecvisfrom]; 2134 2135 if (nextrecvisfrom != rank) { 2136 /* start the cyclic sends from sendbuf, to recvbuf (which will switch to sendbuf) */ 2137 sendsiz = cK * bn; 2138 recvsiz = cK * nextbn; 2139 recvbuf = (i & 1) ? abt->buf[0] : abt->buf[1]; 2140 PetscCallMPI(MPI_Isend(sendbuf, sendsiz, MPIU_SCALAR, sendto, abt->tag, comm, &reqs[0])); 2141 PetscCallMPI(MPI_Irecv(recvbuf, recvsiz, MPIU_SCALAR, recvfrom, abt->tag, comm, &reqs[1])); 2142 } 2143 2144 /* local aseq * sendbuf^T */ 2145 PetscCall(PetscBLASIntCast(ranges[recvisfrom + 1] - ranges[recvisfrom], &cn)); 2146 if (cm && cn && ck) PetscCallBLAS("BLASgemm", BLASgemm_("N", "T", &cm, &cn, &ck, &_DOne, av, &alda, sendbuf, &cn, &_DZero, cv + clda * ranges[recvisfrom], &clda)); 2147 2148 if (nextrecvisfrom != rank) { 2149 /* wait for the sends and receives to complete, swap sendbuf and recvbuf */ 2150 PetscCallMPI(MPI_Waitall(2, reqs, MPI_STATUSES_IGNORE)); 2151 } 2152 bn = nextbn; 2153 recvisfrom = nextrecvisfrom; 2154 sendbuf = recvbuf; 2155 } 2156 PetscCall(MatDenseRestoreArrayRead(a->A, &av)); 2157 PetscCall(MatDenseRestoreArrayRead(b->A, &bv)); 2158 PetscCall(MatDenseRestoreArrayWrite(c->A, &cv)); 2159 PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY)); 2160 PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY)); 2161 PetscFunctionReturn(PETSC_SUCCESS); 2162 } 2163 2164 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(Mat A, Mat B, Mat C) 2165 { 2166 Mat_MPIDense *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data; 2167 Mat_MatTransMultDense *abt; 2168 MPI_Comm comm; 2169 PetscMPIInt size; 2170 PetscScalar *cv, *sendbuf, *recvbuf; 2171 const PetscScalar *av, *bv; 2172 PetscInt blda, i, cK = A->cmap->N, k, j, bn; 2173 PetscScalar _DOne = 1.0, _DZero = 0.0; 2174 PetscBLASInt cm, cn, ck, alda, clda; 2175 2176 PetscFunctionBegin; 2177 MatCheckProduct(C, 3); 2178 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 2179 abt = (Mat_MatTransMultDense *)C->product->data; 2180 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 2181 PetscCallMPI(MPI_Comm_size(comm, &size)); 2182 PetscCall(MatDenseGetArrayRead(a->A, &av)); 2183 PetscCall(MatDenseGetArrayRead(b->A, &bv)); 2184 PetscCall(MatDenseGetArrayWrite(c->A, &cv)); 2185 PetscCall(MatDenseGetLDA(a->A, &i)); 2186 PetscCall(PetscBLASIntCast(i, &alda)); 2187 PetscCall(MatDenseGetLDA(b->A, &blda)); 2188 PetscCall(MatDenseGetLDA(c->A, &i)); 2189 PetscCall(PetscBLASIntCast(i, &clda)); 2190 /* copy transpose of B into buf[0] */ 2191 bn = B->rmap->n; 2192 sendbuf = abt->buf[0]; 2193 recvbuf = abt->buf[1]; 2194 for (k = 0, j = 0; j < bn; j++) { 2195 for (i = 0; i < cK; i++, k++) sendbuf[k] = bv[i * blda + j]; 2196 } 2197 PetscCall(MatDenseRestoreArrayRead(b->A, &bv)); 2198 PetscCallMPI(MPI_Allgatherv(sendbuf, bn * cK, MPIU_SCALAR, recvbuf, abt->recvcounts, abt->recvdispls, MPIU_SCALAR, comm)); 2199 PetscCall(PetscBLASIntCast(cK, &ck)); 2200 PetscCall(PetscBLASIntCast(c->A->rmap->n, &cm)); 2201 PetscCall(PetscBLASIntCast(c->A->cmap->n, &cn)); 2202 if (cm && cn && ck) PetscCallBLAS("BLASgemm", BLASgemm_("N", "N", &cm, &cn, &ck, &_DOne, av, &alda, recvbuf, &ck, &_DZero, cv, &clda)); 2203 PetscCall(MatDenseRestoreArrayRead(a->A, &av)); 2204 PetscCall(MatDenseRestoreArrayRead(b->A, &bv)); 2205 PetscCall(MatDenseRestoreArrayWrite(c->A, &cv)); 2206 PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY)); 2207 PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY)); 2208 PetscFunctionReturn(PETSC_SUCCESS); 2209 } 2210 2211 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C) 2212 { 2213 Mat_MatTransMultDense *abt; 2214 2215 PetscFunctionBegin; 2216 MatCheckProduct(C, 3); 2217 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 2218 abt = (Mat_MatTransMultDense *)C->product->data; 2219 switch (abt->alg) { 2220 case 1: 2221 PetscCall(MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(A, B, C)); 2222 break; 2223 default: 2224 PetscCall(MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(A, B, C)); 2225 break; 2226 } 2227 PetscFunctionReturn(PETSC_SUCCESS); 2228 } 2229 2230 PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(void *data) 2231 { 2232 Mat_MatMultDense *ab = (Mat_MatMultDense *)data; 2233 2234 PetscFunctionBegin; 2235 PetscCall(MatDestroy(&ab->Ce)); 2236 PetscCall(MatDestroy(&ab->Ae)); 2237 PetscCall(MatDestroy(&ab->Be)); 2238 PetscCall(PetscFree(ab)); 2239 PetscFunctionReturn(PETSC_SUCCESS); 2240 } 2241 2242 #if defined(PETSC_HAVE_ELEMENTAL) 2243 PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C) 2244 { 2245 Mat_MatMultDense *ab; 2246 2247 PetscFunctionBegin; 2248 MatCheckProduct(C, 3); 2249 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Missing product data"); 2250 ab = (Mat_MatMultDense *)C->product->data; 2251 PetscCall(MatConvert_MPIDense_Elemental(A, MATELEMENTAL, MAT_REUSE_MATRIX, &ab->Ae)); 2252 PetscCall(MatConvert_MPIDense_Elemental(B, MATELEMENTAL, MAT_REUSE_MATRIX, &ab->Be)); 2253 PetscCall(MatMatMultNumeric_Elemental(ab->Ae, ab->Be, ab->Ce)); 2254 PetscCall(MatConvert(ab->Ce, MATMPIDENSE, MAT_REUSE_MATRIX, &C)); 2255 PetscFunctionReturn(PETSC_SUCCESS); 2256 } 2257 2258 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C) 2259 { 2260 Mat Ae, Be, Ce; 2261 Mat_MatMultDense *ab; 2262 2263 PetscFunctionBegin; 2264 MatCheckProduct(C, 4); 2265 PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty"); 2266 /* check local size of A and B */ 2267 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 2268 SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend); 2269 } 2270 2271 /* create elemental matrices Ae and Be */ 2272 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &Ae)); 2273 PetscCall(MatSetSizes(Ae, PETSC_DECIDE, PETSC_DECIDE, A->rmap->N, A->cmap->N)); 2274 PetscCall(MatSetType(Ae, MATELEMENTAL)); 2275 PetscCall(MatSetUp(Ae)); 2276 PetscCall(MatSetOption(Ae, MAT_ROW_ORIENTED, PETSC_FALSE)); 2277 2278 PetscCall(MatCreate(PetscObjectComm((PetscObject)B), &Be)); 2279 PetscCall(MatSetSizes(Be, PETSC_DECIDE, PETSC_DECIDE, B->rmap->N, B->cmap->N)); 2280 PetscCall(MatSetType(Be, MATELEMENTAL)); 2281 PetscCall(MatSetUp(Be)); 2282 PetscCall(MatSetOption(Be, MAT_ROW_ORIENTED, PETSC_FALSE)); 2283 2284 /* compute symbolic Ce = Ae*Be */ 2285 PetscCall(MatCreate(PetscObjectComm((PetscObject)C), &Ce)); 2286 PetscCall(MatMatMultSymbolic_Elemental(Ae, Be, fill, Ce)); 2287 2288 /* setup C */ 2289 PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, PETSC_DECIDE, PETSC_DECIDE)); 2290 PetscCall(MatSetType(C, MATDENSE)); 2291 PetscCall(MatSetUp(C)); 2292 2293 /* create data structure for reuse Cdense */ 2294 PetscCall(PetscNew(&ab)); 2295 ab->Ae = Ae; 2296 ab->Be = Be; 2297 ab->Ce = Ce; 2298 2299 C->product->data = ab; 2300 C->product->destroy = MatDestroy_MatMatMult_MPIDense_MPIDense; 2301 C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense; 2302 PetscFunctionReturn(PETSC_SUCCESS); 2303 } 2304 #endif 2305 2306 #if defined(PETSC_HAVE_ELEMENTAL) 2307 static PetscErrorCode MatProductSetFromOptions_MPIDense_AB(Mat C) 2308 { 2309 PetscFunctionBegin; 2310 C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIDense; 2311 C->ops->productsymbolic = MatProductSymbolic_AB; 2312 PetscFunctionReturn(PETSC_SUCCESS); 2313 } 2314 #endif 2315 2316 static PetscErrorCode MatProductSetFromOptions_MPIDense_AtB(Mat C) 2317 { 2318 Mat_Product *product = C->product; 2319 Mat A = product->A, B = product->B; 2320 2321 PetscFunctionBegin; 2322 if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) 2323 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend); 2324 C->ops->transposematmultsymbolic = MatTransposeMatMultSymbolic_MPIDense_MPIDense; 2325 C->ops->productsymbolic = MatProductSymbolic_AtB; 2326 PetscFunctionReturn(PETSC_SUCCESS); 2327 } 2328 2329 static PetscErrorCode MatProductSetFromOptions_MPIDense_ABt(Mat C) 2330 { 2331 Mat_Product *product = C->product; 2332 const char *algTypes[2] = {"allgatherv", "cyclic"}; 2333 PetscInt alg, nalg = 2; 2334 PetscBool flg = PETSC_FALSE; 2335 2336 PetscFunctionBegin; 2337 /* Set default algorithm */ 2338 alg = 0; /* default is allgatherv */ 2339 PetscCall(PetscStrcmp(product->alg, "default", &flg)); 2340 if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg])); 2341 2342 /* Get runtime option */ 2343 if (product->api_user) { 2344 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatMatTransposeMult", "Mat"); 2345 PetscCall(PetscOptionsEList("-matmattransmult_mpidense_mpidense_via", "Algorithmic approach", "MatMatTransposeMult", algTypes, nalg, algTypes[alg], &alg, &flg)); 2346 PetscOptionsEnd(); 2347 } else { 2348 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_ABt", "Mat"); 2349 PetscCall(PetscOptionsEList("-mat_product_algorithm", "Algorithmic approach", "MatProduct_ABt", algTypes, nalg, algTypes[alg], &alg, &flg)); 2350 PetscOptionsEnd(); 2351 } 2352 if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg])); 2353 2354 C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_MPIDense_MPIDense; 2355 C->ops->productsymbolic = MatProductSymbolic_ABt; 2356 PetscFunctionReturn(PETSC_SUCCESS); 2357 } 2358 2359 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense(Mat C) 2360 { 2361 Mat_Product *product = C->product; 2362 2363 PetscFunctionBegin; 2364 switch (product->type) { 2365 #if defined(PETSC_HAVE_ELEMENTAL) 2366 case MATPRODUCT_AB: 2367 PetscCall(MatProductSetFromOptions_MPIDense_AB(C)); 2368 break; 2369 #endif 2370 case MATPRODUCT_AtB: 2371 PetscCall(MatProductSetFromOptions_MPIDense_AtB(C)); 2372 break; 2373 case MATPRODUCT_ABt: 2374 PetscCall(MatProductSetFromOptions_MPIDense_ABt(C)); 2375 break; 2376 default: 2377 break; 2378 } 2379 PetscFunctionReturn(PETSC_SUCCESS); 2380 } 2381