1 2 /* 3 Basic functions for basic parallel dense matrices. 4 Portions of this code are under: 5 Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. 6 */ 7 8 #include <../src/mat/impls/dense/mpi/mpidense.h> /*I "petscmat.h" I*/ 9 #include <../src/mat/impls/aij/mpi/mpiaij.h> 10 #include <petscblaslapack.h> 11 12 /*@ 13 MatDenseGetLocalMatrix - For a `MATMPIDENSE` or `MATSEQDENSE` matrix returns the sequential 14 matrix that represents the operator. For sequential matrices it returns itself. 15 16 Input Parameter: 17 . A - the sequential or MPI `MATDENSE` matrix 18 19 Output Parameter: 20 . B - the inner matrix 21 22 Level: intermediate 23 24 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MATMPIDENSE`, `MATSEQDENSE` 25 @*/ 26 PetscErrorCode MatDenseGetLocalMatrix(Mat A, Mat *B) 27 { 28 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 29 PetscBool flg; 30 31 PetscFunctionBegin; 32 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 33 PetscValidPointer(B, 2); 34 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIDENSE, &flg)); 35 if (flg) *B = mat->A; 36 else { 37 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQDENSE, &flg)); 38 PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Not for matrix type %s", ((PetscObject)A)->type_name); 39 *B = A; 40 } 41 PetscFunctionReturn(PETSC_SUCCESS); 42 } 43 44 PetscErrorCode MatCopy_MPIDense(Mat A, Mat B, MatStructure s) 45 { 46 Mat_MPIDense *Amat = (Mat_MPIDense *)A->data; 47 Mat_MPIDense *Bmat = (Mat_MPIDense *)B->data; 48 49 PetscFunctionBegin; 50 PetscCall(MatCopy(Amat->A, Bmat->A, s)); 51 PetscFunctionReturn(PETSC_SUCCESS); 52 } 53 54 PetscErrorCode MatShift_MPIDense(Mat A, PetscScalar alpha) 55 { 56 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 57 PetscInt j, lda, rstart = A->rmap->rstart, rend = A->rmap->rend, rend2; 58 PetscScalar *v; 59 60 PetscFunctionBegin; 61 PetscCall(MatDenseGetArray(mat->A, &v)); 62 PetscCall(MatDenseGetLDA(mat->A, &lda)); 63 rend2 = PetscMin(rend, A->cmap->N); 64 if (rend2 > rstart) { 65 for (j = rstart; j < rend2; j++) v[j - rstart + j * lda] += alpha; 66 PetscCall(PetscLogFlops(rend2 - rstart)); 67 } 68 PetscCall(MatDenseRestoreArray(mat->A, &v)); 69 PetscFunctionReturn(PETSC_SUCCESS); 70 } 71 72 PetscErrorCode MatGetRow_MPIDense(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 73 { 74 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 75 PetscInt lrow, rstart = A->rmap->rstart, rend = A->rmap->rend; 76 77 PetscFunctionBegin; 78 PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "only local rows"); 79 lrow = row - rstart; 80 PetscCall(MatGetRow(mat->A, lrow, nz, (const PetscInt **)idx, (const PetscScalar **)v)); 81 PetscFunctionReturn(PETSC_SUCCESS); 82 } 83 84 PetscErrorCode MatRestoreRow_MPIDense(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 85 { 86 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 87 PetscInt lrow, rstart = A->rmap->rstart, rend = A->rmap->rend; 88 89 PetscFunctionBegin; 90 PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "only local rows"); 91 lrow = row - rstart; 92 PetscCall(MatRestoreRow(mat->A, lrow, nz, (const PetscInt **)idx, (const PetscScalar **)v)); 93 PetscFunctionReturn(PETSC_SUCCESS); 94 } 95 96 PetscErrorCode MatGetDiagonalBlock_MPIDense(Mat A, Mat *a) 97 { 98 Mat_MPIDense *mdn = (Mat_MPIDense *)A->data; 99 PetscInt m = A->rmap->n, rstart = A->rmap->rstart; 100 PetscScalar *array; 101 MPI_Comm comm; 102 PetscBool flg; 103 Mat B; 104 105 PetscFunctionBegin; 106 PetscCall(MatHasCongruentLayouts(A, &flg)); 107 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only square matrices supported."); 108 PetscCall(PetscObjectQuery((PetscObject)A, "DiagonalBlock", (PetscObject *)&B)); 109 if (!B) { /* This should use MatDenseGetSubMatrix (not create), but we would need a call like MatRestoreDiagonalBlock */ 110 #if PetscDefined(HAVE_CUDA) 111 PetscCall(PetscObjectTypeCompare((PetscObject)mdn->A, MATSEQDENSECUDA, &flg)); 112 PetscCheck(!flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature", MATSEQDENSECUDA); 113 #elif PetscDefined(HAVE_HIP) 114 PetscCall(PetscObjectTypeCompare((PetscObject)mdn->A, MATSEQDENSEHIP, &flg)); 115 PetscCheck(!flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature", MATSEQDENSEHIP); 116 #endif 117 PetscCall(PetscObjectGetComm((PetscObject)(mdn->A), &comm)); 118 PetscCall(MatCreate(comm, &B)); 119 PetscCall(MatSetSizes(B, m, m, m, m)); 120 PetscCall(MatSetType(B, ((PetscObject)mdn->A)->type_name)); 121 PetscCall(MatDenseGetArrayRead(mdn->A, (const PetscScalar **)&array)); 122 PetscCall(MatSeqDenseSetPreallocation(B, array + m * rstart)); 123 PetscCall(MatDenseRestoreArrayRead(mdn->A, (const PetscScalar **)&array)); 124 PetscCall(PetscObjectCompose((PetscObject)A, "DiagonalBlock", (PetscObject)B)); 125 *a = B; 126 PetscCall(MatDestroy(&B)); 127 } else *a = B; 128 PetscFunctionReturn(PETSC_SUCCESS); 129 } 130 131 PetscErrorCode MatSetValues_MPIDense(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], const PetscScalar v[], InsertMode addv) 132 { 133 Mat_MPIDense *A = (Mat_MPIDense *)mat->data; 134 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend, row; 135 PetscBool roworiented = A->roworiented; 136 137 PetscFunctionBegin; 138 for (i = 0; i < m; i++) { 139 if (idxm[i] < 0) continue; 140 PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large"); 141 if (idxm[i] >= rstart && idxm[i] < rend) { 142 row = idxm[i] - rstart; 143 if (roworiented) { 144 PetscCall(MatSetValues(A->A, 1, &row, n, idxn, v + i * n, addv)); 145 } else { 146 for (j = 0; j < n; j++) { 147 if (idxn[j] < 0) continue; 148 PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large"); 149 PetscCall(MatSetValues(A->A, 1, &row, 1, &idxn[j], v + i + j * m, addv)); 150 } 151 } 152 } else if (!A->donotstash) { 153 mat->assembled = PETSC_FALSE; 154 if (roworiented) { 155 PetscCall(MatStashValuesRow_Private(&mat->stash, idxm[i], n, idxn, v + i * n, PETSC_FALSE)); 156 } else { 157 PetscCall(MatStashValuesCol_Private(&mat->stash, idxm[i], n, idxn, v + i, m, PETSC_FALSE)); 158 } 159 } 160 } 161 PetscFunctionReturn(PETSC_SUCCESS); 162 } 163 164 PetscErrorCode MatGetValues_MPIDense(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[]) 165 { 166 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 167 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend, row; 168 169 PetscFunctionBegin; 170 for (i = 0; i < m; i++) { 171 if (idxm[i] < 0) continue; /* negative row */ 172 PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large"); 173 if (idxm[i] >= rstart && idxm[i] < rend) { 174 row = idxm[i] - rstart; 175 for (j = 0; j < n; j++) { 176 if (idxn[j] < 0) continue; /* negative column */ 177 PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large"); 178 PetscCall(MatGetValues(mdn->A, 1, &row, 1, &idxn[j], v + i * n + j)); 179 } 180 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported"); 181 } 182 PetscFunctionReturn(PETSC_SUCCESS); 183 } 184 185 static PetscErrorCode MatDenseGetLDA_MPIDense(Mat A, PetscInt *lda) 186 { 187 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 188 189 PetscFunctionBegin; 190 PetscCall(MatDenseGetLDA(a->A, lda)); 191 PetscFunctionReturn(PETSC_SUCCESS); 192 } 193 194 static PetscErrorCode MatDenseSetLDA_MPIDense(Mat A, PetscInt lda) 195 { 196 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 197 MatType mtype = MATSEQDENSE; 198 199 PetscFunctionBegin; 200 if (!a->A) { 201 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 202 PetscCall(PetscLayoutSetUp(A->rmap)); 203 PetscCall(PetscLayoutSetUp(A->cmap)); 204 PetscCall(MatCreate(PETSC_COMM_SELF, &a->A)); 205 PetscCall(MatSetSizes(a->A, A->rmap->n, A->cmap->N, A->rmap->n, A->cmap->N)); 206 #if PetscDefined(HAVE_CUDA) 207 PetscBool iscuda; 208 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIDENSECUDA, &iscuda)); 209 if (iscuda) mtype = MATSEQDENSECUDA; 210 #elif PetscDefined(HAVE_HIP) 211 PetscBool iship; 212 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIDENSEHIP, &iship)); 213 if (iship) mtype = MATSEQDENSEHIP; 214 #endif 215 PetscCall(MatSetType(a->A, mtype)); 216 } 217 PetscCall(MatDenseSetLDA(a->A, lda)); 218 PetscFunctionReturn(PETSC_SUCCESS); 219 } 220 221 static PetscErrorCode MatDenseGetArray_MPIDense(Mat A, PetscScalar **array) 222 { 223 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 224 225 PetscFunctionBegin; 226 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 227 PetscCall(MatDenseGetArray(a->A, array)); 228 PetscFunctionReturn(PETSC_SUCCESS); 229 } 230 231 static PetscErrorCode MatDenseGetArrayRead_MPIDense(Mat A, const PetscScalar **array) 232 { 233 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 234 235 PetscFunctionBegin; 236 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 237 PetscCall(MatDenseGetArrayRead(a->A, array)); 238 PetscFunctionReturn(PETSC_SUCCESS); 239 } 240 241 static PetscErrorCode MatDenseGetArrayWrite_MPIDense(Mat A, PetscScalar **array) 242 { 243 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 244 245 PetscFunctionBegin; 246 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 247 PetscCall(MatDenseGetArrayWrite(a->A, array)); 248 PetscFunctionReturn(PETSC_SUCCESS); 249 } 250 251 static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A, const PetscScalar *array) 252 { 253 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 254 255 PetscFunctionBegin; 256 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 257 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 258 PetscCall(MatDensePlaceArray(a->A, array)); 259 PetscFunctionReturn(PETSC_SUCCESS); 260 } 261 262 static PetscErrorCode MatDenseResetArray_MPIDense(Mat A) 263 { 264 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 265 266 PetscFunctionBegin; 267 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 268 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 269 PetscCall(MatDenseResetArray(a->A)); 270 PetscFunctionReturn(PETSC_SUCCESS); 271 } 272 273 static PetscErrorCode MatDenseReplaceArray_MPIDense(Mat A, const PetscScalar *array) 274 { 275 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 276 277 PetscFunctionBegin; 278 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 279 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 280 PetscCall(MatDenseReplaceArray(a->A, array)); 281 PetscFunctionReturn(PETSC_SUCCESS); 282 } 283 284 static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A, IS isrow, IS iscol, MatReuse scall, Mat *B) 285 { 286 Mat_MPIDense *mat = (Mat_MPIDense *)A->data, *newmatd; 287 PetscInt lda, i, j, rstart, rend, nrows, ncols, Ncols, nlrows, nlcols; 288 const PetscInt *irow, *icol; 289 const PetscScalar *v; 290 PetscScalar *bv; 291 Mat newmat; 292 IS iscol_local; 293 MPI_Comm comm_is, comm_mat; 294 295 PetscFunctionBegin; 296 PetscCall(PetscObjectGetComm((PetscObject)A, &comm_mat)); 297 PetscCall(PetscObjectGetComm((PetscObject)iscol, &comm_is)); 298 PetscCheck(comm_mat == comm_is, PETSC_COMM_SELF, PETSC_ERR_ARG_NOTSAMECOMM, "IS communicator must match matrix communicator"); 299 300 PetscCall(ISAllGather(iscol, &iscol_local)); 301 PetscCall(ISGetIndices(isrow, &irow)); 302 PetscCall(ISGetIndices(iscol_local, &icol)); 303 PetscCall(ISGetLocalSize(isrow, &nrows)); 304 PetscCall(ISGetLocalSize(iscol, &ncols)); 305 PetscCall(ISGetSize(iscol, &Ncols)); /* global number of columns, size of iscol_local */ 306 307 /* No parallel redistribution currently supported! Should really check each index set 308 to confirm that it is OK. ... Currently supports only submatrix same partitioning as 309 original matrix! */ 310 311 PetscCall(MatGetLocalSize(A, &nlrows, &nlcols)); 312 PetscCall(MatGetOwnershipRange(A, &rstart, &rend)); 313 314 /* Check submatrix call */ 315 if (scall == MAT_REUSE_MATRIX) { 316 /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */ 317 /* Really need to test rows and column sizes! */ 318 newmat = *B; 319 } else { 320 /* Create and fill new matrix */ 321 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &newmat)); 322 PetscCall(MatSetSizes(newmat, nrows, ncols, PETSC_DECIDE, Ncols)); 323 PetscCall(MatSetType(newmat, ((PetscObject)A)->type_name)); 324 PetscCall(MatMPIDenseSetPreallocation(newmat, NULL)); 325 } 326 327 /* Now extract the data pointers and do the copy, column at a time */ 328 newmatd = (Mat_MPIDense *)newmat->data; 329 PetscCall(MatDenseGetArray(newmatd->A, &bv)); 330 PetscCall(MatDenseGetArrayRead(mat->A, &v)); 331 PetscCall(MatDenseGetLDA(mat->A, &lda)); 332 for (i = 0; i < Ncols; i++) { 333 const PetscScalar *av = v + lda * icol[i]; 334 for (j = 0; j < nrows; j++) *bv++ = av[irow[j] - rstart]; 335 } 336 PetscCall(MatDenseRestoreArrayRead(mat->A, &v)); 337 PetscCall(MatDenseRestoreArray(newmatd->A, &bv)); 338 339 /* Assemble the matrices so that the correct flags are set */ 340 PetscCall(MatAssemblyBegin(newmat, MAT_FINAL_ASSEMBLY)); 341 PetscCall(MatAssemblyEnd(newmat, MAT_FINAL_ASSEMBLY)); 342 343 /* Free work space */ 344 PetscCall(ISRestoreIndices(isrow, &irow)); 345 PetscCall(ISRestoreIndices(iscol_local, &icol)); 346 PetscCall(ISDestroy(&iscol_local)); 347 *B = newmat; 348 PetscFunctionReturn(PETSC_SUCCESS); 349 } 350 351 PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A, PetscScalar **array) 352 { 353 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 354 355 PetscFunctionBegin; 356 PetscCall(MatDenseRestoreArray(a->A, array)); 357 PetscFunctionReturn(PETSC_SUCCESS); 358 } 359 360 PetscErrorCode MatDenseRestoreArrayRead_MPIDense(Mat A, const PetscScalar **array) 361 { 362 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 363 364 PetscFunctionBegin; 365 PetscCall(MatDenseRestoreArrayRead(a->A, array)); 366 PetscFunctionReturn(PETSC_SUCCESS); 367 } 368 369 PetscErrorCode MatDenseRestoreArrayWrite_MPIDense(Mat A, PetscScalar **array) 370 { 371 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 372 373 PetscFunctionBegin; 374 PetscCall(MatDenseRestoreArrayWrite(a->A, array)); 375 PetscFunctionReturn(PETSC_SUCCESS); 376 } 377 378 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat, MatAssemblyType mode) 379 { 380 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 381 PetscInt nstash, reallocs; 382 383 PetscFunctionBegin; 384 if (mdn->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS); 385 386 PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range)); 387 PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs)); 388 PetscCall(PetscInfo(mdn->A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 389 PetscFunctionReturn(PETSC_SUCCESS); 390 } 391 392 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat, MatAssemblyType mode) 393 { 394 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 395 PetscInt i, *row, *col, flg, j, rstart, ncols; 396 PetscMPIInt n; 397 PetscScalar *val; 398 399 PetscFunctionBegin; 400 if (!mdn->donotstash && !mat->nooffprocentries) { 401 /* wait on receives */ 402 while (1) { 403 PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg)); 404 if (!flg) break; 405 406 for (i = 0; i < n;) { 407 /* Now identify the consecutive vals belonging to the same row */ 408 for (j = i, rstart = row[j]; j < n; j++) { 409 if (row[j] != rstart) break; 410 } 411 if (j < n) ncols = j - i; 412 else ncols = n - i; 413 /* Now assemble all these values with a single function call */ 414 PetscCall(MatSetValues_MPIDense(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode)); 415 i = j; 416 } 417 } 418 PetscCall(MatStashScatterEnd_Private(&mat->stash)); 419 } 420 421 PetscCall(MatAssemblyBegin(mdn->A, mode)); 422 PetscCall(MatAssemblyEnd(mdn->A, mode)); 423 PetscFunctionReturn(PETSC_SUCCESS); 424 } 425 426 PetscErrorCode MatZeroEntries_MPIDense(Mat A) 427 { 428 Mat_MPIDense *l = (Mat_MPIDense *)A->data; 429 430 PetscFunctionBegin; 431 PetscCall(MatZeroEntries(l->A)); 432 PetscFunctionReturn(PETSC_SUCCESS); 433 } 434 435 PetscErrorCode MatZeroRows_MPIDense(Mat A, PetscInt n, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 436 { 437 Mat_MPIDense *l = (Mat_MPIDense *)A->data; 438 PetscInt i, len, *lrows; 439 440 PetscFunctionBegin; 441 /* get locally owned rows */ 442 PetscCall(PetscLayoutMapLocal(A->rmap, n, rows, &len, &lrows, NULL)); 443 /* fix right hand side if needed */ 444 if (x && b) { 445 const PetscScalar *xx; 446 PetscScalar *bb; 447 448 PetscCall(VecGetArrayRead(x, &xx)); 449 PetscCall(VecGetArrayWrite(b, &bb)); 450 for (i = 0; i < len; ++i) bb[lrows[i]] = diag * xx[lrows[i]]; 451 PetscCall(VecRestoreArrayRead(x, &xx)); 452 PetscCall(VecRestoreArrayWrite(b, &bb)); 453 } 454 PetscCall(MatZeroRows(l->A, len, lrows, 0.0, NULL, NULL)); 455 if (diag != 0.0) { 456 Vec d; 457 458 PetscCall(MatCreateVecs(A, NULL, &d)); 459 PetscCall(VecSet(d, diag)); 460 PetscCall(MatDiagonalSet(A, d, INSERT_VALUES)); 461 PetscCall(VecDestroy(&d)); 462 } 463 PetscCall(PetscFree(lrows)); 464 PetscFunctionReturn(PETSC_SUCCESS); 465 } 466 467 PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat, Vec, Vec); 468 PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat, Vec, Vec, Vec); 469 PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat, Vec, Vec); 470 PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat, Vec, Vec, Vec); 471 472 PetscErrorCode MatMult_MPIDense(Mat mat, Vec xx, Vec yy) 473 { 474 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 475 const PetscScalar *ax; 476 PetscScalar *ay; 477 PetscMemType axmtype, aymtype; 478 479 PetscFunctionBegin; 480 if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat)); 481 PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype)); 482 PetscCall(VecGetArrayAndMemType(mdn->lvec, &ay, &aymtype)); 483 PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE)); 484 PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE)); 485 PetscCall(VecRestoreArrayAndMemType(mdn->lvec, &ay)); 486 PetscCall(VecRestoreArrayReadAndMemType(xx, &ax)); 487 PetscCall((*mdn->A->ops->mult)(mdn->A, mdn->lvec, yy)); 488 PetscFunctionReturn(PETSC_SUCCESS); 489 } 490 491 PetscErrorCode MatMultAdd_MPIDense(Mat mat, Vec xx, Vec yy, Vec zz) 492 { 493 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 494 const PetscScalar *ax; 495 PetscScalar *ay; 496 PetscMemType axmtype, aymtype; 497 498 PetscFunctionBegin; 499 if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat)); 500 PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype)); 501 PetscCall(VecGetArrayAndMemType(mdn->lvec, &ay, &aymtype)); 502 PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE)); 503 PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE)); 504 PetscCall(VecRestoreArrayAndMemType(mdn->lvec, &ay)); 505 PetscCall(VecRestoreArrayReadAndMemType(xx, &ax)); 506 PetscCall((*mdn->A->ops->multadd)(mdn->A, mdn->lvec, yy, zz)); 507 PetscFunctionReturn(PETSC_SUCCESS); 508 } 509 510 PetscErrorCode MatMultTranspose_MPIDense(Mat A, Vec xx, Vec yy) 511 { 512 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 513 const PetscScalar *ax; 514 PetscScalar *ay; 515 PetscMemType axmtype, aymtype; 516 517 PetscFunctionBegin; 518 if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A)); 519 PetscCall(VecSet(yy, 0.0)); 520 PetscCall((*a->A->ops->multtranspose)(a->A, xx, a->lvec)); 521 PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype)); 522 PetscCall(VecGetArrayAndMemType(yy, &ay, &aymtype)); 523 PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM)); 524 PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM)); 525 PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax)); 526 PetscCall(VecRestoreArrayAndMemType(yy, &ay)); 527 PetscFunctionReturn(PETSC_SUCCESS); 528 } 529 530 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A, Vec xx, Vec yy, Vec zz) 531 { 532 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 533 const PetscScalar *ax; 534 PetscScalar *ay; 535 PetscMemType axmtype, aymtype; 536 537 PetscFunctionBegin; 538 if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A)); 539 PetscCall(VecCopy(yy, zz)); 540 PetscCall((*a->A->ops->multtranspose)(a->A, xx, a->lvec)); 541 PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype)); 542 PetscCall(VecGetArrayAndMemType(zz, &ay, &aymtype)); 543 PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM)); 544 PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM)); 545 PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax)); 546 PetscCall(VecRestoreArrayAndMemType(zz, &ay)); 547 PetscFunctionReturn(PETSC_SUCCESS); 548 } 549 550 PetscErrorCode MatGetDiagonal_MPIDense(Mat A, Vec v) 551 { 552 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 553 PetscInt lda, len, i, n, m = A->rmap->n, radd; 554 PetscScalar *x, zero = 0.0; 555 const PetscScalar *av; 556 557 PetscFunctionBegin; 558 PetscCall(VecSet(v, zero)); 559 PetscCall(VecGetArray(v, &x)); 560 PetscCall(VecGetSize(v, &n)); 561 PetscCheck(n == A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming mat and vec"); 562 len = PetscMin(a->A->rmap->n, a->A->cmap->n); 563 radd = A->rmap->rstart * m; 564 PetscCall(MatDenseGetArrayRead(a->A, &av)); 565 PetscCall(MatDenseGetLDA(a->A, &lda)); 566 for (i = 0; i < len; i++) x[i] = av[radd + i * lda + i]; 567 PetscCall(MatDenseRestoreArrayRead(a->A, &av)); 568 PetscCall(VecRestoreArray(v, &x)); 569 PetscFunctionReturn(PETSC_SUCCESS); 570 } 571 572 PetscErrorCode MatDestroy_MPIDense(Mat mat) 573 { 574 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 575 576 PetscFunctionBegin; 577 #if defined(PETSC_USE_LOG) 578 PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N)); 579 #endif 580 PetscCall(MatStashDestroy_Private(&mat->stash)); 581 PetscCheck(!mdn->vecinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 582 PetscCheck(!mdn->matinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 583 PetscCall(MatDestroy(&mdn->A)); 584 PetscCall(VecDestroy(&mdn->lvec)); 585 PetscCall(PetscSFDestroy(&mdn->Mvctx)); 586 PetscCall(VecDestroy(&mdn->cvec)); 587 PetscCall(MatDestroy(&mdn->cmat)); 588 589 PetscCall(PetscFree(mat->data)); 590 PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL)); 591 592 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetLDA_C", NULL)); 593 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseSetLDA_C", NULL)); 594 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArray_C", NULL)); 595 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArray_C", NULL)); 596 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayRead_C", NULL)); 597 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayRead_C", NULL)); 598 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayWrite_C", NULL)); 599 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayWrite_C", NULL)); 600 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDensePlaceArray_C", NULL)); 601 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseResetArray_C", NULL)); 602 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseReplaceArray_C", NULL)); 603 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", NULL)); 604 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpiaij_C", NULL)); 605 #if defined(PETSC_HAVE_ELEMENTAL) 606 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_elemental_C", NULL)); 607 #endif 608 #if defined(PETSC_HAVE_SCALAPACK) 609 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_scalapack_C", NULL)); 610 #endif 611 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIDenseSetPreallocation_C", NULL)); 612 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidense_C", NULL)); 613 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaij_C", NULL)); 614 #if defined(PETSC_HAVE_CUDA) 615 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidense_C", NULL)); 616 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijcusparse_C", NULL)); 617 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensecuda_C", NULL)); 618 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidensecuda_mpidense_C", NULL)); 619 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidensecuda_C", NULL)); 620 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C", NULL)); 621 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensecuda_mpiaij_C", NULL)); 622 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C", NULL)); 623 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArray_C", NULL)); 624 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArrayRead_C", NULL)); 625 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArrayWrite_C", NULL)); 626 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArray_C", NULL)); 627 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArrayRead_C", NULL)); 628 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArrayWrite_C", NULL)); 629 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAPlaceArray_C", NULL)); 630 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAResetArray_C", NULL)); 631 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAReplaceArray_C", NULL)); 632 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDASetPreallocation_C", NULL)); 633 #endif 634 #if defined(PETSC_HAVE_HIP) 635 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidense_C", NULL)); 636 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijhipsparse_C", NULL)); 637 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensehip_C", NULL)); 638 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidensehip_mpidense_C", NULL)); 639 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidensehip_C", NULL)); 640 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidensehip_C", NULL)); 641 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensehip_mpiaij_C", NULL)); 642 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensehip_mpiaijhipsparse_C", NULL)); 643 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArray_C", NULL)); 644 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArrayRead_C", NULL)); 645 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArrayWrite_C", NULL)); 646 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArray_C", NULL)); 647 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArrayRead_C", NULL)); 648 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArrayWrite_C", NULL)); 649 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPPlaceArray_C", NULL)); 650 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPResetArray_C", NULL)); 651 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPReplaceArray_C", NULL)); 652 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPSetPreallocation_C", NULL)); 653 #endif 654 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumn_C", NULL)); 655 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumn_C", NULL)); 656 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", NULL)); 657 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", NULL)); 658 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", NULL)); 659 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", NULL)); 660 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", NULL)); 661 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", NULL)); 662 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetSubMatrix_C", NULL)); 663 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreSubMatrix_C", NULL)); 664 665 PetscCall(PetscObjectCompose((PetscObject)mat, "DiagonalBlock", NULL)); 666 PetscFunctionReturn(PETSC_SUCCESS); 667 } 668 669 PETSC_INTERN PetscErrorCode MatView_SeqDense(Mat, PetscViewer); 670 671 #include <petscdraw.h> 672 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat, PetscViewer viewer) 673 { 674 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 675 PetscMPIInt rank; 676 PetscViewerType vtype; 677 PetscBool iascii, isdraw; 678 PetscViewer sviewer; 679 PetscViewerFormat format; 680 681 PetscFunctionBegin; 682 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 683 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 684 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 685 if (iascii) { 686 PetscCall(PetscViewerGetType(viewer, &vtype)); 687 PetscCall(PetscViewerGetFormat(viewer, &format)); 688 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 689 MatInfo info; 690 PetscCall(MatGetInfo(mat, MAT_LOCAL, &info)); 691 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 692 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %" PetscInt_FMT " \n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, 693 (PetscInt)info.memory)); 694 PetscCall(PetscViewerFlush(viewer)); 695 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 696 if (mdn->Mvctx) PetscCall(PetscSFView(mdn->Mvctx, viewer)); 697 PetscFunctionReturn(PETSC_SUCCESS); 698 } else if (format == PETSC_VIEWER_ASCII_INFO) { 699 PetscFunctionReturn(PETSC_SUCCESS); 700 } 701 } else if (isdraw) { 702 PetscDraw draw; 703 PetscBool isnull; 704 705 PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw)); 706 PetscCall(PetscDrawIsNull(draw, &isnull)); 707 if (isnull) PetscFunctionReturn(PETSC_SUCCESS); 708 } 709 710 { 711 /* assemble the entire matrix onto first processor. */ 712 Mat A; 713 PetscInt M = mat->rmap->N, N = mat->cmap->N, m, row, i, nz; 714 PetscInt *cols; 715 PetscScalar *vals; 716 717 PetscCall(MatCreate(PetscObjectComm((PetscObject)mat), &A)); 718 if (rank == 0) { 719 PetscCall(MatSetSizes(A, M, N, M, N)); 720 } else { 721 PetscCall(MatSetSizes(A, 0, 0, M, N)); 722 } 723 /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */ 724 PetscCall(MatSetType(A, MATMPIDENSE)); 725 PetscCall(MatMPIDenseSetPreallocation(A, NULL)); 726 727 /* Copy the matrix ... This isn't the most efficient means, 728 but it's quick for now */ 729 A->insertmode = INSERT_VALUES; 730 731 row = mat->rmap->rstart; 732 m = mdn->A->rmap->n; 733 for (i = 0; i < m; i++) { 734 PetscCall(MatGetRow_MPIDense(mat, row, &nz, &cols, &vals)); 735 PetscCall(MatSetValues_MPIDense(A, 1, &row, nz, cols, vals, INSERT_VALUES)); 736 PetscCall(MatRestoreRow_MPIDense(mat, row, &nz, &cols, &vals)); 737 row++; 738 } 739 740 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 741 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 742 PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 743 if (rank == 0) { 744 PetscCall(PetscObjectSetName((PetscObject)((Mat_MPIDense *)(A->data))->A, ((PetscObject)mat)->name)); 745 PetscCall(MatView_SeqDense(((Mat_MPIDense *)(A->data))->A, sviewer)); 746 } 747 PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 748 PetscCall(PetscViewerFlush(viewer)); 749 PetscCall(MatDestroy(&A)); 750 } 751 PetscFunctionReturn(PETSC_SUCCESS); 752 } 753 754 PetscErrorCode MatView_MPIDense(Mat mat, PetscViewer viewer) 755 { 756 PetscBool iascii, isbinary, isdraw, issocket; 757 758 PetscFunctionBegin; 759 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 760 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 761 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket)); 762 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 763 764 if (iascii || issocket || isdraw) { 765 PetscCall(MatView_MPIDense_ASCIIorDraworSocket(mat, viewer)); 766 } else if (isbinary) PetscCall(MatView_Dense_Binary(mat, viewer)); 767 PetscFunctionReturn(PETSC_SUCCESS); 768 } 769 770 PetscErrorCode MatGetInfo_MPIDense(Mat A, MatInfoType flag, MatInfo *info) 771 { 772 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 773 Mat mdn = mat->A; 774 PetscLogDouble isend[5], irecv[5]; 775 776 PetscFunctionBegin; 777 info->block_size = 1.0; 778 779 PetscCall(MatGetInfo(mdn, MAT_LOCAL, info)); 780 781 isend[0] = info->nz_used; 782 isend[1] = info->nz_allocated; 783 isend[2] = info->nz_unneeded; 784 isend[3] = info->memory; 785 isend[4] = info->mallocs; 786 if (flag == MAT_LOCAL) { 787 info->nz_used = isend[0]; 788 info->nz_allocated = isend[1]; 789 info->nz_unneeded = isend[2]; 790 info->memory = isend[3]; 791 info->mallocs = isend[4]; 792 } else if (flag == MAT_GLOBAL_MAX) { 793 PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)A))); 794 795 info->nz_used = irecv[0]; 796 info->nz_allocated = irecv[1]; 797 info->nz_unneeded = irecv[2]; 798 info->memory = irecv[3]; 799 info->mallocs = irecv[4]; 800 } else if (flag == MAT_GLOBAL_SUM) { 801 PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)A))); 802 803 info->nz_used = irecv[0]; 804 info->nz_allocated = irecv[1]; 805 info->nz_unneeded = irecv[2]; 806 info->memory = irecv[3]; 807 info->mallocs = irecv[4]; 808 } 809 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 810 info->fill_ratio_needed = 0; 811 info->factor_mallocs = 0; 812 PetscFunctionReturn(PETSC_SUCCESS); 813 } 814 815 PetscErrorCode MatSetOption_MPIDense(Mat A, MatOption op, PetscBool flg) 816 { 817 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 818 819 PetscFunctionBegin; 820 switch (op) { 821 case MAT_NEW_NONZERO_LOCATIONS: 822 case MAT_NEW_NONZERO_LOCATION_ERR: 823 case MAT_NEW_NONZERO_ALLOCATION_ERR: 824 MatCheckPreallocated(A, 1); 825 PetscCall(MatSetOption(a->A, op, flg)); 826 break; 827 case MAT_ROW_ORIENTED: 828 MatCheckPreallocated(A, 1); 829 a->roworiented = flg; 830 PetscCall(MatSetOption(a->A, op, flg)); 831 break; 832 case MAT_FORCE_DIAGONAL_ENTRIES: 833 case MAT_KEEP_NONZERO_PATTERN: 834 case MAT_USE_HASH_TABLE: 835 case MAT_SORTED_FULL: 836 PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op])); 837 break; 838 case MAT_IGNORE_OFF_PROC_ENTRIES: 839 a->donotstash = flg; 840 break; 841 case MAT_SYMMETRIC: 842 case MAT_STRUCTURALLY_SYMMETRIC: 843 case MAT_HERMITIAN: 844 case MAT_SYMMETRY_ETERNAL: 845 case MAT_STRUCTURAL_SYMMETRY_ETERNAL: 846 case MAT_SPD: 847 case MAT_IGNORE_LOWER_TRIANGULAR: 848 case MAT_IGNORE_ZERO_ENTRIES: 849 case MAT_SPD_ETERNAL: 850 /* if the diagonal matrix is square it inherits some of the properties above */ 851 PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op])); 852 break; 853 default: 854 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %s", MatOptions[op]); 855 } 856 PetscFunctionReturn(PETSC_SUCCESS); 857 } 858 859 PetscErrorCode MatDiagonalScale_MPIDense(Mat A, Vec ll, Vec rr) 860 { 861 Mat_MPIDense *mdn = (Mat_MPIDense *)A->data; 862 const PetscScalar *l; 863 PetscScalar x, *v, *vv, *r; 864 PetscInt i, j, s2a, s3a, s2, s3, m = mdn->A->rmap->n, n = mdn->A->cmap->n, lda; 865 866 PetscFunctionBegin; 867 PetscCall(MatDenseGetArray(mdn->A, &vv)); 868 PetscCall(MatDenseGetLDA(mdn->A, &lda)); 869 PetscCall(MatGetLocalSize(A, &s2, &s3)); 870 if (ll) { 871 PetscCall(VecGetLocalSize(ll, &s2a)); 872 PetscCheck(s2a == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Left scaling vector non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT, s2a, s2); 873 PetscCall(VecGetArrayRead(ll, &l)); 874 for (i = 0; i < m; i++) { 875 x = l[i]; 876 v = vv + i; 877 for (j = 0; j < n; j++) { 878 (*v) *= x; 879 v += lda; 880 } 881 } 882 PetscCall(VecRestoreArrayRead(ll, &l)); 883 PetscCall(PetscLogFlops(1.0 * n * m)); 884 } 885 if (rr) { 886 const PetscScalar *ar; 887 888 PetscCall(VecGetLocalSize(rr, &s3a)); 889 PetscCheck(s3a == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Right scaling vec non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT ".", s3a, s3); 890 PetscCall(VecGetArrayRead(rr, &ar)); 891 if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A)); 892 PetscCall(VecGetArray(mdn->lvec, &r)); 893 PetscCall(PetscSFBcastBegin(mdn->Mvctx, MPIU_SCALAR, ar, r, MPI_REPLACE)); 894 PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ar, r, MPI_REPLACE)); 895 PetscCall(VecRestoreArrayRead(rr, &ar)); 896 for (i = 0; i < n; i++) { 897 x = r[i]; 898 v = vv + i * lda; 899 for (j = 0; j < m; j++) (*v++) *= x; 900 } 901 PetscCall(VecRestoreArray(mdn->lvec, &r)); 902 PetscCall(PetscLogFlops(1.0 * n * m)); 903 } 904 PetscCall(MatDenseRestoreArray(mdn->A, &vv)); 905 PetscFunctionReturn(PETSC_SUCCESS); 906 } 907 908 PetscErrorCode MatNorm_MPIDense(Mat A, NormType type, PetscReal *nrm) 909 { 910 Mat_MPIDense *mdn = (Mat_MPIDense *)A->data; 911 PetscInt i, j; 912 PetscMPIInt size; 913 PetscReal sum = 0.0; 914 const PetscScalar *av, *v; 915 916 PetscFunctionBegin; 917 PetscCall(MatDenseGetArrayRead(mdn->A, &av)); 918 v = av; 919 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 920 if (size == 1) { 921 PetscCall(MatNorm(mdn->A, type, nrm)); 922 } else { 923 if (type == NORM_FROBENIUS) { 924 for (i = 0; i < mdn->A->cmap->n * mdn->A->rmap->n; i++) { 925 sum += PetscRealPart(PetscConj(*v) * (*v)); 926 v++; 927 } 928 PetscCall(MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A))); 929 *nrm = PetscSqrtReal(*nrm); 930 PetscCall(PetscLogFlops(2.0 * mdn->A->cmap->n * mdn->A->rmap->n)); 931 } else if (type == NORM_1) { 932 PetscReal *tmp, *tmp2; 933 PetscCall(PetscCalloc2(A->cmap->N, &tmp, A->cmap->N, &tmp2)); 934 *nrm = 0.0; 935 v = av; 936 for (j = 0; j < mdn->A->cmap->n; j++) { 937 for (i = 0; i < mdn->A->rmap->n; i++) { 938 tmp[j] += PetscAbsScalar(*v); 939 v++; 940 } 941 } 942 PetscCall(MPIU_Allreduce(tmp, tmp2, A->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A))); 943 for (j = 0; j < A->cmap->N; j++) { 944 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 945 } 946 PetscCall(PetscFree2(tmp, tmp2)); 947 PetscCall(PetscLogFlops(A->cmap->n * A->rmap->n)); 948 } else if (type == NORM_INFINITY) { /* max row norm */ 949 PetscReal ntemp; 950 PetscCall(MatNorm(mdn->A, type, &ntemp)); 951 PetscCall(MPIU_Allreduce(&ntemp, nrm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A))); 952 } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "No support for two norm"); 953 } 954 PetscCall(MatDenseRestoreArrayRead(mdn->A, &av)); 955 PetscFunctionReturn(PETSC_SUCCESS); 956 } 957 958 PetscErrorCode MatTranspose_MPIDense(Mat A, MatReuse reuse, Mat *matout) 959 { 960 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 961 Mat B; 962 PetscInt M = A->rmap->N, N = A->cmap->N, m, n, *rwork, rstart = A->rmap->rstart; 963 PetscInt j, i, lda; 964 PetscScalar *v; 965 966 PetscFunctionBegin; 967 if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout)); 968 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) { 969 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 970 PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M)); 971 PetscCall(MatSetType(B, ((PetscObject)A)->type_name)); 972 PetscCall(MatMPIDenseSetPreallocation(B, NULL)); 973 } else B = *matout; 974 975 m = a->A->rmap->n; 976 n = a->A->cmap->n; 977 PetscCall(MatDenseGetArrayRead(a->A, (const PetscScalar **)&v)); 978 PetscCall(MatDenseGetLDA(a->A, &lda)); 979 PetscCall(PetscMalloc1(m, &rwork)); 980 for (i = 0; i < m; i++) rwork[i] = rstart + i; 981 for (j = 0; j < n; j++) { 982 PetscCall(MatSetValues(B, 1, &j, m, rwork, v, INSERT_VALUES)); 983 v += lda; 984 } 985 PetscCall(MatDenseRestoreArrayRead(a->A, (const PetscScalar **)&v)); 986 PetscCall(PetscFree(rwork)); 987 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 988 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 989 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 990 *matout = B; 991 } else { 992 PetscCall(MatHeaderMerge(A, &B)); 993 } 994 PetscFunctionReturn(PETSC_SUCCESS); 995 } 996 997 static PetscErrorCode MatDuplicate_MPIDense(Mat, MatDuplicateOption, Mat *); 998 PETSC_INTERN PetscErrorCode MatScale_MPIDense(Mat, PetscScalar); 999 1000 PetscErrorCode MatSetUp_MPIDense(Mat A) 1001 { 1002 PetscFunctionBegin; 1003 PetscCall(PetscLayoutSetUp(A->rmap)); 1004 PetscCall(PetscLayoutSetUp(A->cmap)); 1005 if (!A->preallocated) PetscCall(MatMPIDenseSetPreallocation(A, NULL)); 1006 PetscFunctionReturn(PETSC_SUCCESS); 1007 } 1008 1009 PetscErrorCode MatAXPY_MPIDense(Mat Y, PetscScalar alpha, Mat X, MatStructure str) 1010 { 1011 Mat_MPIDense *A = (Mat_MPIDense *)Y->data, *B = (Mat_MPIDense *)X->data; 1012 1013 PetscFunctionBegin; 1014 PetscCall(MatAXPY(A->A, alpha, B->A, str)); 1015 PetscFunctionReturn(PETSC_SUCCESS); 1016 } 1017 1018 PetscErrorCode MatConjugate_MPIDense(Mat mat) 1019 { 1020 Mat_MPIDense *a = (Mat_MPIDense *)mat->data; 1021 1022 PetscFunctionBegin; 1023 PetscCall(MatConjugate(a->A)); 1024 PetscFunctionReturn(PETSC_SUCCESS); 1025 } 1026 1027 PetscErrorCode MatRealPart_MPIDense(Mat A) 1028 { 1029 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1030 1031 PetscFunctionBegin; 1032 PetscCall(MatRealPart(a->A)); 1033 PetscFunctionReturn(PETSC_SUCCESS); 1034 } 1035 1036 PetscErrorCode MatImaginaryPart_MPIDense(Mat A) 1037 { 1038 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1039 1040 PetscFunctionBegin; 1041 PetscCall(MatImaginaryPart(a->A)); 1042 PetscFunctionReturn(PETSC_SUCCESS); 1043 } 1044 1045 static PetscErrorCode MatGetColumnVector_MPIDense(Mat A, Vec v, PetscInt col) 1046 { 1047 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1048 1049 PetscFunctionBegin; 1050 PetscCheck(a->A, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Missing local matrix"); 1051 PetscCheck(a->A->ops->getcolumnvector, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Missing get column operation"); 1052 PetscCall((*a->A->ops->getcolumnvector)(a->A, v, col)); 1053 PetscFunctionReturn(PETSC_SUCCESS); 1054 } 1055 1056 PETSC_INTERN PetscErrorCode MatGetColumnReductions_SeqDense(Mat, PetscInt, PetscReal *); 1057 1058 PetscErrorCode MatGetColumnReductions_MPIDense(Mat A, PetscInt type, PetscReal *reductions) 1059 { 1060 PetscInt i, m, n; 1061 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1062 PetscReal *work; 1063 1064 PetscFunctionBegin; 1065 PetscCall(MatGetSize(A, &m, &n)); 1066 PetscCall(PetscMalloc1(n, &work)); 1067 if (type == REDUCTION_MEAN_REALPART) { 1068 PetscCall(MatGetColumnReductions_SeqDense(a->A, (PetscInt)REDUCTION_SUM_REALPART, work)); 1069 } else if (type == REDUCTION_MEAN_IMAGINARYPART) { 1070 PetscCall(MatGetColumnReductions_SeqDense(a->A, (PetscInt)REDUCTION_SUM_IMAGINARYPART, work)); 1071 } else { 1072 PetscCall(MatGetColumnReductions_SeqDense(a->A, type, work)); 1073 } 1074 if (type == NORM_2) { 1075 for (i = 0; i < n; i++) work[i] *= work[i]; 1076 } 1077 if (type == NORM_INFINITY) { 1078 PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_MAX, A->hdr.comm)); 1079 } else { 1080 PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_SUM, A->hdr.comm)); 1081 } 1082 PetscCall(PetscFree(work)); 1083 if (type == NORM_2) { 1084 for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]); 1085 } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 1086 for (i = 0; i < n; i++) reductions[i] /= m; 1087 } 1088 PetscFunctionReturn(PETSC_SUCCESS); 1089 } 1090 1091 static PetscErrorCode MatSetRandom_MPIDense(Mat x, PetscRandom rctx) 1092 { 1093 Mat_MPIDense *d = (Mat_MPIDense *)x->data; 1094 1095 PetscFunctionBegin; 1096 PetscCall(MatSetRandom(d->A, rctx)); 1097 #if defined(PETSC_HAVE_DEVICE) 1098 x->offloadmask = d->A->offloadmask; 1099 #endif 1100 PetscFunctionReturn(PETSC_SUCCESS); 1101 } 1102 1103 static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A, PetscBool *missing, PetscInt *d) 1104 { 1105 PetscFunctionBegin; 1106 *missing = PETSC_FALSE; 1107 PetscFunctionReturn(PETSC_SUCCESS); 1108 } 1109 1110 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat, Mat, PetscReal, Mat); 1111 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat, Mat, Mat); 1112 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat, Mat, PetscReal, Mat); 1113 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat, Mat, Mat); 1114 static PetscErrorCode MatEqual_MPIDense(Mat, Mat, PetscBool *); 1115 static PetscErrorCode MatLoad_MPIDense(Mat, PetscViewer); 1116 1117 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense, 1118 MatGetRow_MPIDense, 1119 MatRestoreRow_MPIDense, 1120 MatMult_MPIDense, 1121 /* 4*/ MatMultAdd_MPIDense, 1122 MatMultTranspose_MPIDense, 1123 MatMultTransposeAdd_MPIDense, 1124 NULL, 1125 NULL, 1126 NULL, 1127 /* 10*/ NULL, 1128 NULL, 1129 NULL, 1130 NULL, 1131 MatTranspose_MPIDense, 1132 /* 15*/ MatGetInfo_MPIDense, 1133 MatEqual_MPIDense, 1134 MatGetDiagonal_MPIDense, 1135 MatDiagonalScale_MPIDense, 1136 MatNorm_MPIDense, 1137 /* 20*/ MatAssemblyBegin_MPIDense, 1138 MatAssemblyEnd_MPIDense, 1139 MatSetOption_MPIDense, 1140 MatZeroEntries_MPIDense, 1141 /* 24*/ MatZeroRows_MPIDense, 1142 NULL, 1143 NULL, 1144 NULL, 1145 NULL, 1146 /* 29*/ MatSetUp_MPIDense, 1147 NULL, 1148 NULL, 1149 MatGetDiagonalBlock_MPIDense, 1150 NULL, 1151 /* 34*/ MatDuplicate_MPIDense, 1152 NULL, 1153 NULL, 1154 NULL, 1155 NULL, 1156 /* 39*/ MatAXPY_MPIDense, 1157 MatCreateSubMatrices_MPIDense, 1158 NULL, 1159 MatGetValues_MPIDense, 1160 MatCopy_MPIDense, 1161 /* 44*/ NULL, 1162 MatScale_MPIDense, 1163 MatShift_MPIDense, 1164 NULL, 1165 NULL, 1166 /* 49*/ MatSetRandom_MPIDense, 1167 NULL, 1168 NULL, 1169 NULL, 1170 NULL, 1171 /* 54*/ NULL, 1172 NULL, 1173 NULL, 1174 NULL, 1175 NULL, 1176 /* 59*/ MatCreateSubMatrix_MPIDense, 1177 MatDestroy_MPIDense, 1178 MatView_MPIDense, 1179 NULL, 1180 NULL, 1181 /* 64*/ NULL, 1182 NULL, 1183 NULL, 1184 NULL, 1185 NULL, 1186 /* 69*/ NULL, 1187 NULL, 1188 NULL, 1189 NULL, 1190 NULL, 1191 /* 74*/ NULL, 1192 NULL, 1193 NULL, 1194 NULL, 1195 NULL, 1196 /* 79*/ NULL, 1197 NULL, 1198 NULL, 1199 NULL, 1200 /* 83*/ MatLoad_MPIDense, 1201 NULL, 1202 NULL, 1203 NULL, 1204 NULL, 1205 NULL, 1206 /* 89*/ NULL, 1207 NULL, 1208 NULL, 1209 NULL, 1210 NULL, 1211 /* 94*/ NULL, 1212 NULL, 1213 MatMatTransposeMultSymbolic_MPIDense_MPIDense, 1214 MatMatTransposeMultNumeric_MPIDense_MPIDense, 1215 NULL, 1216 /* 99*/ MatProductSetFromOptions_MPIDense, 1217 NULL, 1218 NULL, 1219 MatConjugate_MPIDense, 1220 NULL, 1221 /*104*/ NULL, 1222 MatRealPart_MPIDense, 1223 MatImaginaryPart_MPIDense, 1224 NULL, 1225 NULL, 1226 /*109*/ NULL, 1227 NULL, 1228 NULL, 1229 MatGetColumnVector_MPIDense, 1230 MatMissingDiagonal_MPIDense, 1231 /*114*/ NULL, 1232 NULL, 1233 NULL, 1234 NULL, 1235 NULL, 1236 /*119*/ NULL, 1237 NULL, 1238 NULL, 1239 NULL, 1240 NULL, 1241 /*124*/ NULL, 1242 MatGetColumnReductions_MPIDense, 1243 NULL, 1244 NULL, 1245 NULL, 1246 /*129*/ NULL, 1247 NULL, 1248 MatTransposeMatMultSymbolic_MPIDense_MPIDense, 1249 MatTransposeMatMultNumeric_MPIDense_MPIDense, 1250 NULL, 1251 /*134*/ NULL, 1252 NULL, 1253 NULL, 1254 NULL, 1255 NULL, 1256 /*139*/ NULL, 1257 NULL, 1258 NULL, 1259 NULL, 1260 NULL, 1261 MatCreateMPIMatConcatenateSeqMat_MPIDense, 1262 /*145*/ NULL, 1263 NULL, 1264 NULL, 1265 NULL, 1266 NULL, 1267 /*150*/ NULL, 1268 NULL}; 1269 1270 PetscErrorCode MatMPIDenseSetPreallocation_MPIDense(Mat mat, PetscScalar *data) 1271 { 1272 Mat_MPIDense *a = (Mat_MPIDense *)mat->data; 1273 MatType mtype = MATSEQDENSE; 1274 1275 PetscFunctionBegin; 1276 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1277 PetscCall(PetscLayoutSetUp(mat->rmap)); 1278 PetscCall(PetscLayoutSetUp(mat->cmap)); 1279 if (!a->A) { 1280 PetscCall(MatCreate(PETSC_COMM_SELF, &a->A)); 1281 PetscCall(MatSetSizes(a->A, mat->rmap->n, mat->cmap->N, mat->rmap->n, mat->cmap->N)); 1282 } 1283 #if defined(PETSC_HAVE_CUDA) 1284 PetscBool iscuda; 1285 PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPIDENSECUDA, &iscuda)); 1286 if (iscuda) mtype = MATSEQDENSECUDA; 1287 #endif 1288 #if defined(PETSC_HAVE_HIP) 1289 PetscBool iship; 1290 PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPIDENSEHIP, &iship)); 1291 if (iship) mtype = MATSEQDENSEHIP; 1292 #endif 1293 PetscCall(MatSetType(a->A, mtype)); 1294 PetscCall(MatSeqDenseSetPreallocation(a->A, data)); 1295 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 1296 mat->offloadmask = a->A->offloadmask; 1297 #endif 1298 mat->preallocated = PETSC_TRUE; 1299 mat->assembled = PETSC_TRUE; 1300 PetscFunctionReturn(PETSC_SUCCESS); 1301 } 1302 1303 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIDense(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 1304 { 1305 Mat B, C; 1306 1307 PetscFunctionBegin; 1308 PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &C)); 1309 PetscCall(MatConvert_SeqAIJ_SeqDense(C, MATSEQDENSE, MAT_INITIAL_MATRIX, &B)); 1310 PetscCall(MatDestroy(&C)); 1311 if (reuse == MAT_REUSE_MATRIX) { 1312 C = *newmat; 1313 } else C = NULL; 1314 PetscCall(MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A), B, A->cmap->n, !C ? MAT_INITIAL_MATRIX : MAT_REUSE_MATRIX, &C)); 1315 PetscCall(MatDestroy(&B)); 1316 if (reuse == MAT_INPLACE_MATRIX) { 1317 PetscCall(MatHeaderReplace(A, &C)); 1318 } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C; 1319 PetscFunctionReturn(PETSC_SUCCESS); 1320 } 1321 1322 PetscErrorCode MatConvert_MPIDense_MPIAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 1323 { 1324 Mat B, C; 1325 1326 PetscFunctionBegin; 1327 PetscCall(MatDenseGetLocalMatrix(A, &C)); 1328 PetscCall(MatConvert_SeqDense_SeqAIJ(C, MATSEQAIJ, MAT_INITIAL_MATRIX, &B)); 1329 if (reuse == MAT_REUSE_MATRIX) { 1330 C = *newmat; 1331 } else C = NULL; 1332 PetscCall(MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A), B, A->cmap->n, !C ? MAT_INITIAL_MATRIX : MAT_REUSE_MATRIX, &C)); 1333 PetscCall(MatDestroy(&B)); 1334 if (reuse == MAT_INPLACE_MATRIX) { 1335 PetscCall(MatHeaderReplace(A, &C)); 1336 } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C; 1337 PetscFunctionReturn(PETSC_SUCCESS); 1338 } 1339 1340 #if defined(PETSC_HAVE_ELEMENTAL) 1341 PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 1342 { 1343 Mat mat_elemental; 1344 PetscScalar *v; 1345 PetscInt m = A->rmap->n, N = A->cmap->N, rstart = A->rmap->rstart, i, *rows, *cols; 1346 1347 PetscFunctionBegin; 1348 if (reuse == MAT_REUSE_MATRIX) { 1349 mat_elemental = *newmat; 1350 PetscCall(MatZeroEntries(*newmat)); 1351 } else { 1352 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental)); 1353 PetscCall(MatSetSizes(mat_elemental, PETSC_DECIDE, PETSC_DECIDE, A->rmap->N, A->cmap->N)); 1354 PetscCall(MatSetType(mat_elemental, MATELEMENTAL)); 1355 PetscCall(MatSetUp(mat_elemental)); 1356 PetscCall(MatSetOption(mat_elemental, MAT_ROW_ORIENTED, PETSC_FALSE)); 1357 } 1358 1359 PetscCall(PetscMalloc2(m, &rows, N, &cols)); 1360 for (i = 0; i < N; i++) cols[i] = i; 1361 for (i = 0; i < m; i++) rows[i] = rstart + i; 1362 1363 /* PETSc-Elemental interface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */ 1364 PetscCall(MatDenseGetArray(A, &v)); 1365 PetscCall(MatSetValues(mat_elemental, m, rows, N, cols, v, ADD_VALUES)); 1366 PetscCall(MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY)); 1367 PetscCall(MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY)); 1368 PetscCall(MatDenseRestoreArray(A, &v)); 1369 PetscCall(PetscFree2(rows, cols)); 1370 1371 if (reuse == MAT_INPLACE_MATRIX) { 1372 PetscCall(MatHeaderReplace(A, &mat_elemental)); 1373 } else { 1374 *newmat = mat_elemental; 1375 } 1376 PetscFunctionReturn(PETSC_SUCCESS); 1377 } 1378 #endif 1379 1380 static PetscErrorCode MatDenseGetColumn_MPIDense(Mat A, PetscInt col, PetscScalar **vals) 1381 { 1382 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 1383 1384 PetscFunctionBegin; 1385 PetscCall(MatDenseGetColumn(mat->A, col, vals)); 1386 PetscFunctionReturn(PETSC_SUCCESS); 1387 } 1388 1389 static PetscErrorCode MatDenseRestoreColumn_MPIDense(Mat A, PetscScalar **vals) 1390 { 1391 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 1392 1393 PetscFunctionBegin; 1394 PetscCall(MatDenseRestoreColumn(mat->A, vals)); 1395 PetscFunctionReturn(PETSC_SUCCESS); 1396 } 1397 1398 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIDense(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat) 1399 { 1400 Mat_MPIDense *mat; 1401 PetscInt m, nloc, N; 1402 1403 PetscFunctionBegin; 1404 PetscCall(MatGetSize(inmat, &m, &N)); 1405 PetscCall(MatGetLocalSize(inmat, NULL, &nloc)); 1406 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 1407 PetscInt sum; 1408 1409 if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnership(comm, &n, &N)); 1410 /* Check sum(n) = N */ 1411 PetscCall(MPIU_Allreduce(&n, &sum, 1, MPIU_INT, MPI_SUM, comm)); 1412 PetscCheck(sum == N, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT, sum, N); 1413 1414 PetscCall(MatCreateDense(comm, m, n, PETSC_DETERMINE, N, NULL, outmat)); 1415 PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 1416 } 1417 1418 /* numeric phase */ 1419 mat = (Mat_MPIDense *)(*outmat)->data; 1420 PetscCall(MatCopy(inmat, mat->A, SAME_NONZERO_PATTERN)); 1421 PetscFunctionReturn(PETSC_SUCCESS); 1422 } 1423 1424 PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat A, PetscInt col, Vec *v) 1425 { 1426 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1427 PetscInt lda; 1428 1429 PetscFunctionBegin; 1430 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1431 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1432 if (!a->cvec) PetscCall(MatDenseCreateColumnVec_Private(A, &a->cvec)); 1433 a->vecinuse = col + 1; 1434 PetscCall(MatDenseGetLDA(a->A, &lda)); 1435 PetscCall(MatDenseGetArray(a->A, (PetscScalar **)&a->ptrinuse)); 1436 PetscCall(VecPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda)); 1437 *v = a->cvec; 1438 PetscFunctionReturn(PETSC_SUCCESS); 1439 } 1440 1441 PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat A, PetscInt col, Vec *v) 1442 { 1443 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1444 1445 PetscFunctionBegin; 1446 PetscCheck(a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first"); 1447 PetscCheck(a->cvec, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing internal column vector"); 1448 a->vecinuse = 0; 1449 PetscCall(MatDenseRestoreArray(a->A, (PetscScalar **)&a->ptrinuse)); 1450 PetscCall(VecResetArray(a->cvec)); 1451 if (v) *v = NULL; 1452 PetscFunctionReturn(PETSC_SUCCESS); 1453 } 1454 1455 PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat A, PetscInt col, Vec *v) 1456 { 1457 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1458 PetscInt lda; 1459 1460 PetscFunctionBegin; 1461 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1462 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1463 if (!a->cvec) PetscCall(MatDenseCreateColumnVec_Private(A, &a->cvec)); 1464 a->vecinuse = col + 1; 1465 PetscCall(MatDenseGetLDA(a->A, &lda)); 1466 PetscCall(MatDenseGetArrayRead(a->A, &a->ptrinuse)); 1467 PetscCall(VecPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda)); 1468 PetscCall(VecLockReadPush(a->cvec)); 1469 *v = a->cvec; 1470 PetscFunctionReturn(PETSC_SUCCESS); 1471 } 1472 1473 PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat A, PetscInt col, Vec *v) 1474 { 1475 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1476 1477 PetscFunctionBegin; 1478 PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first"); 1479 PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector"); 1480 a->vecinuse = 0; 1481 PetscCall(MatDenseRestoreArrayRead(a->A, &a->ptrinuse)); 1482 PetscCall(VecLockReadPop(a->cvec)); 1483 PetscCall(VecResetArray(a->cvec)); 1484 if (v) *v = NULL; 1485 PetscFunctionReturn(PETSC_SUCCESS); 1486 } 1487 1488 PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat A, PetscInt col, Vec *v) 1489 { 1490 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1491 PetscInt lda; 1492 1493 PetscFunctionBegin; 1494 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1495 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1496 if (!a->cvec) PetscCall(MatDenseCreateColumnVec_Private(A, &a->cvec)); 1497 a->vecinuse = col + 1; 1498 PetscCall(MatDenseGetLDA(a->A, &lda)); 1499 PetscCall(MatDenseGetArrayWrite(a->A, (PetscScalar **)&a->ptrinuse)); 1500 PetscCall(VecPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda)); 1501 *v = a->cvec; 1502 PetscFunctionReturn(PETSC_SUCCESS); 1503 } 1504 1505 PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat A, PetscInt col, Vec *v) 1506 { 1507 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1508 1509 PetscFunctionBegin; 1510 PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first"); 1511 PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector"); 1512 a->vecinuse = 0; 1513 PetscCall(MatDenseRestoreArrayWrite(a->A, (PetscScalar **)&a->ptrinuse)); 1514 PetscCall(VecResetArray(a->cvec)); 1515 if (v) *v = NULL; 1516 PetscFunctionReturn(PETSC_SUCCESS); 1517 } 1518 1519 PetscErrorCode MatDenseGetSubMatrix_MPIDense(Mat A, PetscInt rbegin, PetscInt rend, PetscInt cbegin, PetscInt cend, Mat *v) 1520 { 1521 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1522 Mat_MPIDense *c; 1523 MPI_Comm comm; 1524 PetscInt pbegin, pend; 1525 1526 PetscFunctionBegin; 1527 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 1528 PetscCheck(!a->vecinuse, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1529 PetscCheck(!a->matinuse, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1530 pbegin = PetscMax(0, PetscMin(A->rmap->rend, rbegin) - A->rmap->rstart); 1531 pend = PetscMin(A->rmap->n, PetscMax(0, rend - A->rmap->rstart)); 1532 if (!a->cmat) { 1533 PetscCall(MatCreate(comm, &a->cmat)); 1534 PetscCall(MatSetType(a->cmat, ((PetscObject)A)->type_name)); 1535 if (rend - rbegin == A->rmap->N) PetscCall(PetscLayoutReference(A->rmap, &a->cmat->rmap)); 1536 else { 1537 PetscCall(PetscLayoutSetLocalSize(a->cmat->rmap, pend - pbegin)); 1538 PetscCall(PetscLayoutSetSize(a->cmat->rmap, rend - rbegin)); 1539 PetscCall(PetscLayoutSetUp(a->cmat->rmap)); 1540 } 1541 PetscCall(PetscLayoutSetSize(a->cmat->cmap, cend - cbegin)); 1542 PetscCall(PetscLayoutSetUp(a->cmat->cmap)); 1543 } else { 1544 PetscBool same = (PetscBool)(rend - rbegin == a->cmat->rmap->N); 1545 if (same && a->cmat->rmap->N != A->rmap->N) { 1546 same = (PetscBool)(pend - pbegin == a->cmat->rmap->n); 1547 PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &same, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A))); 1548 } 1549 if (!same) { 1550 PetscCall(PetscLayoutDestroy(&a->cmat->rmap)); 1551 PetscCall(PetscLayoutCreate(comm, &a->cmat->rmap)); 1552 PetscCall(PetscLayoutSetLocalSize(a->cmat->rmap, pend - pbegin)); 1553 PetscCall(PetscLayoutSetSize(a->cmat->rmap, rend - rbegin)); 1554 PetscCall(PetscLayoutSetUp(a->cmat->rmap)); 1555 } 1556 if (cend - cbegin != a->cmat->cmap->N) { 1557 PetscCall(PetscLayoutDestroy(&a->cmat->cmap)); 1558 PetscCall(PetscLayoutCreate(comm, &a->cmat->cmap)); 1559 PetscCall(PetscLayoutSetSize(a->cmat->cmap, cend - cbegin)); 1560 PetscCall(PetscLayoutSetUp(a->cmat->cmap)); 1561 } 1562 } 1563 c = (Mat_MPIDense *)a->cmat->data; 1564 PetscCheck(!c->A, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1565 PetscCall(MatDenseGetSubMatrix(a->A, pbegin, pend, cbegin, cend, &c->A)); 1566 1567 a->cmat->preallocated = PETSC_TRUE; 1568 a->cmat->assembled = PETSC_TRUE; 1569 #if defined(PETSC_HAVE_DEVICE) 1570 a->cmat->offloadmask = c->A->offloadmask; 1571 #endif 1572 a->matinuse = cbegin + 1; 1573 *v = a->cmat; 1574 PetscFunctionReturn(PETSC_SUCCESS); 1575 } 1576 1577 PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat A, Mat *v) 1578 { 1579 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1580 Mat_MPIDense *c; 1581 1582 PetscFunctionBegin; 1583 PetscCheck(a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetSubMatrix() first"); 1584 PetscCheck(a->cmat, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal matrix"); 1585 PetscCheck(*v == a->cmat, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Not the matrix obtained from MatDenseGetSubMatrix()"); 1586 a->matinuse = 0; 1587 c = (Mat_MPIDense *)a->cmat->data; 1588 PetscCall(MatDenseRestoreSubMatrix(a->A, &c->A)); 1589 if (v) *v = NULL; 1590 #if defined(PETSC_HAVE_DEVICE) 1591 A->offloadmask = a->A->offloadmask; 1592 #endif 1593 PetscFunctionReturn(PETSC_SUCCESS); 1594 } 1595 1596 /*MC 1597 MATMPIDENSE - MATMPIDENSE = "mpidense" - A matrix type to be used for distributed dense matrices. 1598 1599 Options Database Key: 1600 . -mat_type mpidense - sets the matrix type to `MATMPIDENSE` during a call to `MatSetFromOptions()` 1601 1602 Level: beginner 1603 1604 .seealso: [](ch_matrices), `Mat`, `MatCreateDense()`, `MATSEQDENSE`, `MATDENSE` 1605 M*/ 1606 PetscErrorCode MatCreate_MPIDense(Mat mat) 1607 { 1608 Mat_MPIDense *a; 1609 1610 PetscFunctionBegin; 1611 PetscCall(PetscNew(&a)); 1612 mat->data = (void *)a; 1613 mat->ops[0] = MatOps_Values; 1614 1615 mat->insertmode = NOT_SET_VALUES; 1616 1617 /* build cache for off array entries formed */ 1618 a->donotstash = PETSC_FALSE; 1619 1620 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)mat), 1, &mat->stash)); 1621 1622 /* stuff used for matrix vector multiply */ 1623 a->lvec = NULL; 1624 a->Mvctx = NULL; 1625 a->roworiented = PETSC_TRUE; 1626 1627 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetLDA_C", MatDenseGetLDA_MPIDense)); 1628 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseSetLDA_C", MatDenseSetLDA_MPIDense)); 1629 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArray_C", MatDenseGetArray_MPIDense)); 1630 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArray_C", MatDenseRestoreArray_MPIDense)); 1631 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayRead_C", MatDenseGetArrayRead_MPIDense)); 1632 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayRead_C", MatDenseRestoreArrayRead_MPIDense)); 1633 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayWrite_C", MatDenseGetArrayWrite_MPIDense)); 1634 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayWrite_C", MatDenseRestoreArrayWrite_MPIDense)); 1635 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDensePlaceArray_C", MatDensePlaceArray_MPIDense)); 1636 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseResetArray_C", MatDenseResetArray_MPIDense)); 1637 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseReplaceArray_C", MatDenseReplaceArray_MPIDense)); 1638 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", MatDenseGetColumnVec_MPIDense)); 1639 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", MatDenseRestoreColumnVec_MPIDense)); 1640 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", MatDenseGetColumnVecRead_MPIDense)); 1641 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", MatDenseRestoreColumnVecRead_MPIDense)); 1642 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", MatDenseGetColumnVecWrite_MPIDense)); 1643 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", MatDenseRestoreColumnVecWrite_MPIDense)); 1644 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetSubMatrix_C", MatDenseGetSubMatrix_MPIDense)); 1645 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreSubMatrix_C", MatDenseRestoreSubMatrix_MPIDense)); 1646 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", MatConvert_MPIAIJ_MPIDense)); 1647 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpiaij_C", MatConvert_MPIDense_MPIAIJ)); 1648 #if defined(PETSC_HAVE_ELEMENTAL) 1649 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_elemental_C", MatConvert_MPIDense_Elemental)); 1650 #endif 1651 #if defined(PETSC_HAVE_SCALAPACK) 1652 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_scalapack_C", MatConvert_Dense_ScaLAPACK)); 1653 #endif 1654 #if defined(PETSC_HAVE_CUDA) 1655 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensecuda_C", MatConvert_MPIDense_MPIDenseCUDA)); 1656 #endif 1657 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIDenseSetPreallocation_C", MatMPIDenseSetPreallocation_MPIDense)); 1658 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense)); 1659 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaij_C", MatProductSetFromOptions_MPIDense_MPIAIJ)); 1660 #if defined(PETSC_HAVE_CUDA) 1661 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense)); 1662 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijcusparse_C", MatProductSetFromOptions_MPIDense_MPIAIJ)); 1663 #endif 1664 #if defined(PETSC_HAVE_HIP) 1665 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensehip_C", MatConvert_MPIDense_MPIDenseHIP)); 1666 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense)); 1667 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijhipsparse_C", MatProductSetFromOptions_MPIDense_MPIAIJ)); 1668 #endif 1669 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumn_C", MatDenseGetColumn_MPIDense)); 1670 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumn_C", MatDenseRestoreColumn_MPIDense)); 1671 PetscCall(PetscObjectChangeTypeName((PetscObject)mat, MATMPIDENSE)); 1672 PetscFunctionReturn(PETSC_SUCCESS); 1673 } 1674 1675 /*MC 1676 MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices. 1677 1678 This matrix type is identical to `MATSEQDENSE` when constructed with a single process communicator, 1679 and `MATMPIDENSE` otherwise. 1680 1681 Options Database Key: 1682 . -mat_type dense - sets the matrix type to `MATDENSE` during a call to `MatSetFromOptions()` 1683 1684 Level: beginner 1685 1686 .seealso: [](ch_matrices), `Mat`, `MATSEQDENSE`, `MATMPIDENSE`, `MATDENSECUDA`, `MATDENSEHIP` 1687 M*/ 1688 1689 /*@C 1690 MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries 1691 1692 Collective 1693 1694 Input Parameters: 1695 . B - the matrix 1696 - data - optional location of matrix data. Set to `NULL` for PETSc 1697 to control all matrix memory allocation. 1698 1699 Level: intermediate 1700 1701 Notes: 1702 The dense format is fully compatible with standard Fortran 1703 storage by columns. 1704 1705 The data input variable is intended primarily for Fortran programmers 1706 who wish to allocate their own matrix memory space. Most users should 1707 set `data` to `NULL`. 1708 1709 .seealso: [](ch_matrices), `Mat`, `MATMPIDENSE`, `MatCreate()`, `MatCreateSeqDense()`, `MatSetValues()` 1710 @*/ 1711 PetscErrorCode MatMPIDenseSetPreallocation(Mat B, PetscScalar *data) 1712 { 1713 PetscFunctionBegin; 1714 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 1715 PetscTryMethod(B, "MatMPIDenseSetPreallocation_C", (Mat, PetscScalar *), (B, data)); 1716 PetscFunctionReturn(PETSC_SUCCESS); 1717 } 1718 1719 /*@ 1720 MatDensePlaceArray - Allows one to replace the array in a `MATDENSE` matrix with an 1721 array provided by the user. This is useful to avoid copying an array 1722 into a matrix 1723 1724 Not Collective 1725 1726 Input Parameters: 1727 + mat - the matrix 1728 - array - the array in column major order 1729 1730 Level: developer 1731 1732 Note: 1733 You can return to the original array with a call to `MatDenseResetArray()`. The user is responsible for freeing this array; it will not be 1734 freed when the matrix is destroyed. 1735 1736 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseGetArray()`, `MatDenseResetArray()`, `VecPlaceArray()`, `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`, `VecResetArray()`, 1737 `MatDenseReplaceArray()` 1738 @*/ 1739 PetscErrorCode MatDensePlaceArray(Mat mat, const PetscScalar *array) 1740 { 1741 PetscFunctionBegin; 1742 PetscValidHeaderSpecific(mat, MAT_CLASSID, 1); 1743 PetscUseMethod(mat, "MatDensePlaceArray_C", (Mat, const PetscScalar *), (mat, array)); 1744 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 1745 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 1746 mat->offloadmask = PETSC_OFFLOAD_CPU; 1747 #endif 1748 PetscFunctionReturn(PETSC_SUCCESS); 1749 } 1750 1751 /*@ 1752 MatDenseResetArray - Resets the matrix array to that it previously had before the call to `MatDensePlaceArray()` 1753 1754 Not Collective 1755 1756 Input Parameter: 1757 . mat - the matrix 1758 1759 Level: developer 1760 1761 Note: 1762 You can only call this after a call to `MatDensePlaceArray()` 1763 1764 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseGetArray()`, `MatDensePlaceArray()`, `VecPlaceArray()`, `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`, `VecResetArray()` 1765 @*/ 1766 PetscErrorCode MatDenseResetArray(Mat mat) 1767 { 1768 PetscFunctionBegin; 1769 PetscValidHeaderSpecific(mat, MAT_CLASSID, 1); 1770 PetscUseMethod(mat, "MatDenseResetArray_C", (Mat), (mat)); 1771 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 1772 PetscFunctionReturn(PETSC_SUCCESS); 1773 } 1774 1775 /*@ 1776 MatDenseReplaceArray - Allows one to replace the array in a dense matrix with an 1777 array provided by the user. This is useful to avoid copying an array 1778 into a matrix 1779 1780 Not Collective 1781 1782 Input Parameters: 1783 + mat - the matrix 1784 - array - the array in column major order 1785 1786 Level: developer 1787 1788 Note: 1789 The memory passed in MUST be obtained with `PetscMalloc()` and CANNOT be 1790 freed by the user. It will be freed when the matrix is destroyed. 1791 1792 .seealso: [](ch_matrices), `Mat`, `MatDensePlaceArray()`, `MatDenseGetArray()`, `VecReplaceArray()` 1793 @*/ 1794 PetscErrorCode MatDenseReplaceArray(Mat mat, const PetscScalar *array) 1795 { 1796 PetscFunctionBegin; 1797 PetscValidHeaderSpecific(mat, MAT_CLASSID, 1); 1798 PetscUseMethod(mat, "MatDenseReplaceArray_C", (Mat, const PetscScalar *), (mat, array)); 1799 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 1800 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 1801 mat->offloadmask = PETSC_OFFLOAD_CPU; 1802 #endif 1803 PetscFunctionReturn(PETSC_SUCCESS); 1804 } 1805 1806 /*@C 1807 MatCreateDense - Creates a matrix in `MATDENSE` format. 1808 1809 Collective 1810 1811 Input Parameters: 1812 + comm - MPI communicator 1813 . m - number of local rows (or `PETSC_DECIDE` to have calculated if `M` is given) 1814 . n - number of local columns (or `PETSC_DECIDE` to have calculated if `N` is given) 1815 . M - number of global rows (or `PETSC_DECIDE` to have calculated if `m` is given) 1816 . N - number of global columns (or `PETSC_DECIDE` to have calculated if `n` is given) 1817 - data - optional location of matrix data. Set data to `NULL` (`PETSC_NULL_SCALAR` for Fortran users) for PETSc 1818 to control all matrix memory allocation. 1819 1820 Output Parameter: 1821 . A - the matrix 1822 1823 Level: intermediate 1824 1825 Notes: 1826 The dense format is fully compatible with standard Fortran 1827 storage by columns. 1828 1829 Although local portions of the matrix are stored in column-major 1830 order, the matrix is partitioned across MPI ranks by row. 1831 1832 The data input variable is intended primarily for Fortran programmers 1833 who wish to allocate their own matrix memory space. Most users should 1834 set `data` to `NULL` (`PETSC_NULL_SCALAR` for Fortran users). 1835 1836 The user MUST specify either the local or global matrix dimensions 1837 (possibly both). 1838 1839 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatCreate()`, `MatCreateSeqDense()`, `MatSetValues()` 1840 @*/ 1841 PetscErrorCode MatCreateDense(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscScalar *data, Mat *A) 1842 { 1843 PetscFunctionBegin; 1844 PetscCall(MatCreate(comm, A)); 1845 PetscCall(MatSetSizes(*A, m, n, M, N)); 1846 PetscCall(MatSetType(*A, MATDENSE)); 1847 PetscCall(MatSeqDenseSetPreallocation(*A, data)); 1848 PetscCall(MatMPIDenseSetPreallocation(*A, data)); 1849 PetscFunctionReturn(PETSC_SUCCESS); 1850 } 1851 1852 static PetscErrorCode MatDuplicate_MPIDense(Mat A, MatDuplicateOption cpvalues, Mat *newmat) 1853 { 1854 Mat mat; 1855 Mat_MPIDense *a, *oldmat = (Mat_MPIDense *)A->data; 1856 1857 PetscFunctionBegin; 1858 *newmat = NULL; 1859 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat)); 1860 PetscCall(MatSetSizes(mat, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 1861 PetscCall(MatSetType(mat, ((PetscObject)A)->type_name)); 1862 a = (Mat_MPIDense *)mat->data; 1863 1864 mat->factortype = A->factortype; 1865 mat->assembled = PETSC_TRUE; 1866 mat->preallocated = PETSC_TRUE; 1867 1868 mat->insertmode = NOT_SET_VALUES; 1869 a->donotstash = oldmat->donotstash; 1870 1871 PetscCall(PetscLayoutReference(A->rmap, &mat->rmap)); 1872 PetscCall(PetscLayoutReference(A->cmap, &mat->cmap)); 1873 1874 PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A)); 1875 1876 *newmat = mat; 1877 PetscFunctionReturn(PETSC_SUCCESS); 1878 } 1879 1880 PetscErrorCode MatLoad_MPIDense(Mat newMat, PetscViewer viewer) 1881 { 1882 PetscBool isbinary; 1883 #if defined(PETSC_HAVE_HDF5) 1884 PetscBool ishdf5; 1885 #endif 1886 1887 PetscFunctionBegin; 1888 PetscValidHeaderSpecific(newMat, MAT_CLASSID, 1); 1889 PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2); 1890 /* force binary viewer to load .info file if it has not yet done so */ 1891 PetscCall(PetscViewerSetUp(viewer)); 1892 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 1893 #if defined(PETSC_HAVE_HDF5) 1894 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5)); 1895 #endif 1896 if (isbinary) { 1897 PetscCall(MatLoad_Dense_Binary(newMat, viewer)); 1898 #if defined(PETSC_HAVE_HDF5) 1899 } else if (ishdf5) { 1900 PetscCall(MatLoad_Dense_HDF5(newMat, viewer)); 1901 #endif 1902 } else SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name); 1903 PetscFunctionReturn(PETSC_SUCCESS); 1904 } 1905 1906 static PetscErrorCode MatEqual_MPIDense(Mat A, Mat B, PetscBool *flag) 1907 { 1908 Mat_MPIDense *matB = (Mat_MPIDense *)B->data, *matA = (Mat_MPIDense *)A->data; 1909 Mat a, b; 1910 1911 PetscFunctionBegin; 1912 a = matA->A; 1913 b = matB->A; 1914 PetscCall(MatEqual(a, b, flag)); 1915 PetscCall(MPIU_Allreduce(MPI_IN_PLACE, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A))); 1916 PetscFunctionReturn(PETSC_SUCCESS); 1917 } 1918 1919 PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(void *data) 1920 { 1921 Mat_TransMatMultDense *atb = (Mat_TransMatMultDense *)data; 1922 1923 PetscFunctionBegin; 1924 PetscCall(PetscFree2(atb->sendbuf, atb->recvcounts)); 1925 PetscCall(MatDestroy(&atb->atb)); 1926 PetscCall(PetscFree(atb)); 1927 PetscFunctionReturn(PETSC_SUCCESS); 1928 } 1929 1930 PetscErrorCode MatDestroy_MatMatTransMult_MPIDense_MPIDense(void *data) 1931 { 1932 Mat_MatTransMultDense *abt = (Mat_MatTransMultDense *)data; 1933 1934 PetscFunctionBegin; 1935 PetscCall(PetscFree2(abt->buf[0], abt->buf[1])); 1936 PetscCall(PetscFree2(abt->recvcounts, abt->recvdispls)); 1937 PetscCall(PetscFree(abt)); 1938 PetscFunctionReturn(PETSC_SUCCESS); 1939 } 1940 1941 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C) 1942 { 1943 Mat_MPIDense *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data; 1944 Mat_TransMatMultDense *atb; 1945 MPI_Comm comm; 1946 PetscMPIInt size, *recvcounts; 1947 PetscScalar *carray, *sendbuf; 1948 const PetscScalar *atbarray; 1949 PetscInt i, cN = C->cmap->N, proc, k, j, lda; 1950 const PetscInt *ranges; 1951 1952 PetscFunctionBegin; 1953 MatCheckProduct(C, 3); 1954 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 1955 atb = (Mat_TransMatMultDense *)C->product->data; 1956 recvcounts = atb->recvcounts; 1957 sendbuf = atb->sendbuf; 1958 1959 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 1960 PetscCallMPI(MPI_Comm_size(comm, &size)); 1961 1962 /* compute atbarray = aseq^T * bseq */ 1963 PetscCall(MatTransposeMatMult(a->A, b->A, atb->atb ? MAT_REUSE_MATRIX : MAT_INITIAL_MATRIX, PETSC_DEFAULT, &atb->atb)); 1964 1965 PetscCall(MatGetOwnershipRanges(C, &ranges)); 1966 1967 /* arrange atbarray into sendbuf */ 1968 PetscCall(MatDenseGetArrayRead(atb->atb, &atbarray)); 1969 PetscCall(MatDenseGetLDA(atb->atb, &lda)); 1970 for (proc = 0, k = 0; proc < size; proc++) { 1971 for (j = 0; j < cN; j++) { 1972 for (i = ranges[proc]; i < ranges[proc + 1]; i++) sendbuf[k++] = atbarray[i + j * lda]; 1973 } 1974 } 1975 PetscCall(MatDenseRestoreArrayRead(atb->atb, &atbarray)); 1976 1977 /* sum all atbarray to local values of C */ 1978 PetscCall(MatDenseGetArrayWrite(c->A, &carray)); 1979 PetscCallMPI(MPI_Reduce_scatter(sendbuf, carray, recvcounts, MPIU_SCALAR, MPIU_SUM, comm)); 1980 PetscCall(MatDenseRestoreArrayWrite(c->A, &carray)); 1981 PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY)); 1982 PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY)); 1983 PetscFunctionReturn(PETSC_SUCCESS); 1984 } 1985 1986 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C) 1987 { 1988 MPI_Comm comm; 1989 PetscMPIInt size; 1990 PetscInt cm = A->cmap->n, cM, cN = B->cmap->N; 1991 Mat_TransMatMultDense *atb; 1992 PetscBool cisdense = PETSC_FALSE; 1993 PetscInt i; 1994 const PetscInt *ranges; 1995 1996 PetscFunctionBegin; 1997 MatCheckProduct(C, 4); 1998 PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty"); 1999 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 2000 if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) { 2001 SETERRQ(comm, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend); 2002 } 2003 2004 /* create matrix product C */ 2005 PetscCall(MatSetSizes(C, cm, B->cmap->n, A->cmap->N, B->cmap->N)); 2006 #if defined(PETSC_HAVE_CUDA) 2007 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSECUDA, "")); 2008 #endif 2009 #if defined(PETSC_HAVE_HIP) 2010 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSEHIP, "")); 2011 #endif 2012 if (!cisdense) PetscCall(MatSetType(C, ((PetscObject)A)->type_name)); 2013 PetscCall(MatSetUp(C)); 2014 2015 /* create data structure for reuse C */ 2016 PetscCallMPI(MPI_Comm_size(comm, &size)); 2017 PetscCall(PetscNew(&atb)); 2018 cM = C->rmap->N; 2019 PetscCall(PetscMalloc2(cM * cN, &atb->sendbuf, size, &atb->recvcounts)); 2020 PetscCall(MatGetOwnershipRanges(C, &ranges)); 2021 for (i = 0; i < size; i++) atb->recvcounts[i] = (ranges[i + 1] - ranges[i]) * cN; 2022 2023 C->product->data = atb; 2024 C->product->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense; 2025 PetscFunctionReturn(PETSC_SUCCESS); 2026 } 2027 2028 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C) 2029 { 2030 MPI_Comm comm; 2031 PetscMPIInt i, size; 2032 PetscInt maxRows, bufsiz; 2033 PetscMPIInt tag; 2034 PetscInt alg; 2035 Mat_MatTransMultDense *abt; 2036 Mat_Product *product = C->product; 2037 PetscBool flg; 2038 2039 PetscFunctionBegin; 2040 MatCheckProduct(C, 4); 2041 PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty"); 2042 /* check local size of A and B */ 2043 PetscCheck(A->cmap->n == B->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local column dimensions are incompatible, A (%" PetscInt_FMT ") != B (%" PetscInt_FMT ")", A->cmap->n, B->cmap->n); 2044 2045 PetscCall(PetscStrcmp(product->alg, "allgatherv", &flg)); 2046 alg = flg ? 0 : 1; 2047 2048 /* setup matrix product C */ 2049 PetscCall(MatSetSizes(C, A->rmap->n, B->rmap->n, A->rmap->N, B->rmap->N)); 2050 PetscCall(MatSetType(C, MATMPIDENSE)); 2051 PetscCall(MatSetUp(C)); 2052 PetscCall(PetscObjectGetNewTag((PetscObject)C, &tag)); 2053 2054 /* create data structure for reuse C */ 2055 PetscCall(PetscObjectGetComm((PetscObject)C, &comm)); 2056 PetscCallMPI(MPI_Comm_size(comm, &size)); 2057 PetscCall(PetscNew(&abt)); 2058 abt->tag = tag; 2059 abt->alg = alg; 2060 switch (alg) { 2061 case 1: /* alg: "cyclic" */ 2062 for (maxRows = 0, i = 0; i < size; i++) maxRows = PetscMax(maxRows, (B->rmap->range[i + 1] - B->rmap->range[i])); 2063 bufsiz = A->cmap->N * maxRows; 2064 PetscCall(PetscMalloc2(bufsiz, &(abt->buf[0]), bufsiz, &(abt->buf[1]))); 2065 break; 2066 default: /* alg: "allgatherv" */ 2067 PetscCall(PetscMalloc2(B->rmap->n * B->cmap->N, &(abt->buf[0]), B->rmap->N * B->cmap->N, &(abt->buf[1]))); 2068 PetscCall(PetscMalloc2(size, &(abt->recvcounts), size + 1, &(abt->recvdispls))); 2069 for (i = 0; i <= size; i++) abt->recvdispls[i] = B->rmap->range[i] * A->cmap->N; 2070 for (i = 0; i < size; i++) abt->recvcounts[i] = abt->recvdispls[i + 1] - abt->recvdispls[i]; 2071 break; 2072 } 2073 2074 C->product->data = abt; 2075 C->product->destroy = MatDestroy_MatMatTransMult_MPIDense_MPIDense; 2076 C->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_MPIDense_MPIDense; 2077 PetscFunctionReturn(PETSC_SUCCESS); 2078 } 2079 2080 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(Mat A, Mat B, Mat C) 2081 { 2082 Mat_MPIDense *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data; 2083 Mat_MatTransMultDense *abt; 2084 MPI_Comm comm; 2085 PetscMPIInt rank, size, sendsiz, recvsiz, sendto, recvfrom, recvisfrom; 2086 PetscScalar *sendbuf, *recvbuf = NULL, *cv; 2087 PetscInt i, cK = A->cmap->N, k, j, bn; 2088 PetscScalar _DOne = 1.0, _DZero = 0.0; 2089 const PetscScalar *av, *bv; 2090 PetscBLASInt cm, cn, ck, alda, blda = 0, clda; 2091 MPI_Request reqs[2]; 2092 const PetscInt *ranges; 2093 2094 PetscFunctionBegin; 2095 MatCheckProduct(C, 3); 2096 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 2097 abt = (Mat_MatTransMultDense *)C->product->data; 2098 PetscCall(PetscObjectGetComm((PetscObject)C, &comm)); 2099 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 2100 PetscCallMPI(MPI_Comm_size(comm, &size)); 2101 PetscCall(MatDenseGetArrayRead(a->A, &av)); 2102 PetscCall(MatDenseGetArrayRead(b->A, &bv)); 2103 PetscCall(MatDenseGetArrayWrite(c->A, &cv)); 2104 PetscCall(MatDenseGetLDA(a->A, &i)); 2105 PetscCall(PetscBLASIntCast(i, &alda)); 2106 PetscCall(MatDenseGetLDA(b->A, &i)); 2107 PetscCall(PetscBLASIntCast(i, &blda)); 2108 PetscCall(MatDenseGetLDA(c->A, &i)); 2109 PetscCall(PetscBLASIntCast(i, &clda)); 2110 PetscCall(MatGetOwnershipRanges(B, &ranges)); 2111 bn = B->rmap->n; 2112 if (blda == bn) { 2113 sendbuf = (PetscScalar *)bv; 2114 } else { 2115 sendbuf = abt->buf[0]; 2116 for (k = 0, i = 0; i < cK; i++) { 2117 for (j = 0; j < bn; j++, k++) sendbuf[k] = bv[i * blda + j]; 2118 } 2119 } 2120 if (size > 1) { 2121 sendto = (rank + size - 1) % size; 2122 recvfrom = (rank + size + 1) % size; 2123 } else { 2124 sendto = recvfrom = 0; 2125 } 2126 PetscCall(PetscBLASIntCast(cK, &ck)); 2127 PetscCall(PetscBLASIntCast(c->A->rmap->n, &cm)); 2128 recvisfrom = rank; 2129 for (i = 0; i < size; i++) { 2130 /* we have finished receiving in sending, bufs can be read/modified */ 2131 PetscInt nextrecvisfrom = (recvisfrom + 1) % size; /* which process the next recvbuf will originate on */ 2132 PetscInt nextbn = ranges[nextrecvisfrom + 1] - ranges[nextrecvisfrom]; 2133 2134 if (nextrecvisfrom != rank) { 2135 /* start the cyclic sends from sendbuf, to recvbuf (which will switch to sendbuf) */ 2136 sendsiz = cK * bn; 2137 recvsiz = cK * nextbn; 2138 recvbuf = (i & 1) ? abt->buf[0] : abt->buf[1]; 2139 PetscCallMPI(MPI_Isend(sendbuf, sendsiz, MPIU_SCALAR, sendto, abt->tag, comm, &reqs[0])); 2140 PetscCallMPI(MPI_Irecv(recvbuf, recvsiz, MPIU_SCALAR, recvfrom, abt->tag, comm, &reqs[1])); 2141 } 2142 2143 /* local aseq * sendbuf^T */ 2144 PetscCall(PetscBLASIntCast(ranges[recvisfrom + 1] - ranges[recvisfrom], &cn)); 2145 if (cm && cn && ck) PetscCallBLAS("BLASgemm", BLASgemm_("N", "T", &cm, &cn, &ck, &_DOne, av, &alda, sendbuf, &cn, &_DZero, cv + clda * ranges[recvisfrom], &clda)); 2146 2147 if (nextrecvisfrom != rank) { 2148 /* wait for the sends and receives to complete, swap sendbuf and recvbuf */ 2149 PetscCallMPI(MPI_Waitall(2, reqs, MPI_STATUSES_IGNORE)); 2150 } 2151 bn = nextbn; 2152 recvisfrom = nextrecvisfrom; 2153 sendbuf = recvbuf; 2154 } 2155 PetscCall(MatDenseRestoreArrayRead(a->A, &av)); 2156 PetscCall(MatDenseRestoreArrayRead(b->A, &bv)); 2157 PetscCall(MatDenseRestoreArrayWrite(c->A, &cv)); 2158 PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY)); 2159 PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY)); 2160 PetscFunctionReturn(PETSC_SUCCESS); 2161 } 2162 2163 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(Mat A, Mat B, Mat C) 2164 { 2165 Mat_MPIDense *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data; 2166 Mat_MatTransMultDense *abt; 2167 MPI_Comm comm; 2168 PetscMPIInt size; 2169 PetscScalar *cv, *sendbuf, *recvbuf; 2170 const PetscScalar *av, *bv; 2171 PetscInt blda, i, cK = A->cmap->N, k, j, bn; 2172 PetscScalar _DOne = 1.0, _DZero = 0.0; 2173 PetscBLASInt cm, cn, ck, alda, clda; 2174 2175 PetscFunctionBegin; 2176 MatCheckProduct(C, 3); 2177 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 2178 abt = (Mat_MatTransMultDense *)C->product->data; 2179 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 2180 PetscCallMPI(MPI_Comm_size(comm, &size)); 2181 PetscCall(MatDenseGetArrayRead(a->A, &av)); 2182 PetscCall(MatDenseGetArrayRead(b->A, &bv)); 2183 PetscCall(MatDenseGetArrayWrite(c->A, &cv)); 2184 PetscCall(MatDenseGetLDA(a->A, &i)); 2185 PetscCall(PetscBLASIntCast(i, &alda)); 2186 PetscCall(MatDenseGetLDA(b->A, &blda)); 2187 PetscCall(MatDenseGetLDA(c->A, &i)); 2188 PetscCall(PetscBLASIntCast(i, &clda)); 2189 /* copy transpose of B into buf[0] */ 2190 bn = B->rmap->n; 2191 sendbuf = abt->buf[0]; 2192 recvbuf = abt->buf[1]; 2193 for (k = 0, j = 0; j < bn; j++) { 2194 for (i = 0; i < cK; i++, k++) sendbuf[k] = bv[i * blda + j]; 2195 } 2196 PetscCall(MatDenseRestoreArrayRead(b->A, &bv)); 2197 PetscCallMPI(MPI_Allgatherv(sendbuf, bn * cK, MPIU_SCALAR, recvbuf, abt->recvcounts, abt->recvdispls, MPIU_SCALAR, comm)); 2198 PetscCall(PetscBLASIntCast(cK, &ck)); 2199 PetscCall(PetscBLASIntCast(c->A->rmap->n, &cm)); 2200 PetscCall(PetscBLASIntCast(c->A->cmap->n, &cn)); 2201 if (cm && cn && ck) PetscCallBLAS("BLASgemm", BLASgemm_("N", "N", &cm, &cn, &ck, &_DOne, av, &alda, recvbuf, &ck, &_DZero, cv, &clda)); 2202 PetscCall(MatDenseRestoreArrayRead(a->A, &av)); 2203 PetscCall(MatDenseRestoreArrayRead(b->A, &bv)); 2204 PetscCall(MatDenseRestoreArrayWrite(c->A, &cv)); 2205 PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY)); 2206 PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY)); 2207 PetscFunctionReturn(PETSC_SUCCESS); 2208 } 2209 2210 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C) 2211 { 2212 Mat_MatTransMultDense *abt; 2213 2214 PetscFunctionBegin; 2215 MatCheckProduct(C, 3); 2216 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 2217 abt = (Mat_MatTransMultDense *)C->product->data; 2218 switch (abt->alg) { 2219 case 1: 2220 PetscCall(MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(A, B, C)); 2221 break; 2222 default: 2223 PetscCall(MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(A, B, C)); 2224 break; 2225 } 2226 PetscFunctionReturn(PETSC_SUCCESS); 2227 } 2228 2229 PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(void *data) 2230 { 2231 Mat_MatMultDense *ab = (Mat_MatMultDense *)data; 2232 2233 PetscFunctionBegin; 2234 PetscCall(MatDestroy(&ab->Ce)); 2235 PetscCall(MatDestroy(&ab->Ae)); 2236 PetscCall(MatDestroy(&ab->Be)); 2237 PetscCall(PetscFree(ab)); 2238 PetscFunctionReturn(PETSC_SUCCESS); 2239 } 2240 2241 #if defined(PETSC_HAVE_ELEMENTAL) 2242 PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C) 2243 { 2244 Mat_MatMultDense *ab; 2245 2246 PetscFunctionBegin; 2247 MatCheckProduct(C, 3); 2248 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Missing product data"); 2249 ab = (Mat_MatMultDense *)C->product->data; 2250 PetscCall(MatConvert_MPIDense_Elemental(A, MATELEMENTAL, MAT_REUSE_MATRIX, &ab->Ae)); 2251 PetscCall(MatConvert_MPIDense_Elemental(B, MATELEMENTAL, MAT_REUSE_MATRIX, &ab->Be)); 2252 PetscCall(MatMatMultNumeric_Elemental(ab->Ae, ab->Be, ab->Ce)); 2253 PetscCall(MatConvert(ab->Ce, MATMPIDENSE, MAT_REUSE_MATRIX, &C)); 2254 PetscFunctionReturn(PETSC_SUCCESS); 2255 } 2256 2257 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C) 2258 { 2259 Mat Ae, Be, Ce; 2260 Mat_MatMultDense *ab; 2261 2262 PetscFunctionBegin; 2263 MatCheckProduct(C, 4); 2264 PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty"); 2265 /* check local size of A and B */ 2266 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 2267 SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend); 2268 } 2269 2270 /* create elemental matrices Ae and Be */ 2271 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &Ae)); 2272 PetscCall(MatSetSizes(Ae, PETSC_DECIDE, PETSC_DECIDE, A->rmap->N, A->cmap->N)); 2273 PetscCall(MatSetType(Ae, MATELEMENTAL)); 2274 PetscCall(MatSetUp(Ae)); 2275 PetscCall(MatSetOption(Ae, MAT_ROW_ORIENTED, PETSC_FALSE)); 2276 2277 PetscCall(MatCreate(PetscObjectComm((PetscObject)B), &Be)); 2278 PetscCall(MatSetSizes(Be, PETSC_DECIDE, PETSC_DECIDE, B->rmap->N, B->cmap->N)); 2279 PetscCall(MatSetType(Be, MATELEMENTAL)); 2280 PetscCall(MatSetUp(Be)); 2281 PetscCall(MatSetOption(Be, MAT_ROW_ORIENTED, PETSC_FALSE)); 2282 2283 /* compute symbolic Ce = Ae*Be */ 2284 PetscCall(MatCreate(PetscObjectComm((PetscObject)C), &Ce)); 2285 PetscCall(MatMatMultSymbolic_Elemental(Ae, Be, fill, Ce)); 2286 2287 /* setup C */ 2288 PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, PETSC_DECIDE, PETSC_DECIDE)); 2289 PetscCall(MatSetType(C, MATDENSE)); 2290 PetscCall(MatSetUp(C)); 2291 2292 /* create data structure for reuse Cdense */ 2293 PetscCall(PetscNew(&ab)); 2294 ab->Ae = Ae; 2295 ab->Be = Be; 2296 ab->Ce = Ce; 2297 2298 C->product->data = ab; 2299 C->product->destroy = MatDestroy_MatMatMult_MPIDense_MPIDense; 2300 C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense; 2301 PetscFunctionReturn(PETSC_SUCCESS); 2302 } 2303 #endif 2304 2305 #if defined(PETSC_HAVE_ELEMENTAL) 2306 static PetscErrorCode MatProductSetFromOptions_MPIDense_AB(Mat C) 2307 { 2308 PetscFunctionBegin; 2309 C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIDense; 2310 C->ops->productsymbolic = MatProductSymbolic_AB; 2311 PetscFunctionReturn(PETSC_SUCCESS); 2312 } 2313 #endif 2314 2315 static PetscErrorCode MatProductSetFromOptions_MPIDense_AtB(Mat C) 2316 { 2317 Mat_Product *product = C->product; 2318 Mat A = product->A, B = product->B; 2319 2320 PetscFunctionBegin; 2321 if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) 2322 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend); 2323 C->ops->transposematmultsymbolic = MatTransposeMatMultSymbolic_MPIDense_MPIDense; 2324 C->ops->productsymbolic = MatProductSymbolic_AtB; 2325 PetscFunctionReturn(PETSC_SUCCESS); 2326 } 2327 2328 static PetscErrorCode MatProductSetFromOptions_MPIDense_ABt(Mat C) 2329 { 2330 Mat_Product *product = C->product; 2331 const char *algTypes[2] = {"allgatherv", "cyclic"}; 2332 PetscInt alg, nalg = 2; 2333 PetscBool flg = PETSC_FALSE; 2334 2335 PetscFunctionBegin; 2336 /* Set default algorithm */ 2337 alg = 0; /* default is allgatherv */ 2338 PetscCall(PetscStrcmp(product->alg, "default", &flg)); 2339 if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg])); 2340 2341 /* Get runtime option */ 2342 if (product->api_user) { 2343 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatMatTransposeMult", "Mat"); 2344 PetscCall(PetscOptionsEList("-matmattransmult_mpidense_mpidense_via", "Algorithmic approach", "MatMatTransposeMult", algTypes, nalg, algTypes[alg], &alg, &flg)); 2345 PetscOptionsEnd(); 2346 } else { 2347 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_ABt", "Mat"); 2348 PetscCall(PetscOptionsEList("-mat_product_algorithm", "Algorithmic approach", "MatProduct_ABt", algTypes, nalg, algTypes[alg], &alg, &flg)); 2349 PetscOptionsEnd(); 2350 } 2351 if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg])); 2352 2353 C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_MPIDense_MPIDense; 2354 C->ops->productsymbolic = MatProductSymbolic_ABt; 2355 PetscFunctionReturn(PETSC_SUCCESS); 2356 } 2357 2358 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense(Mat C) 2359 { 2360 Mat_Product *product = C->product; 2361 2362 PetscFunctionBegin; 2363 switch (product->type) { 2364 #if defined(PETSC_HAVE_ELEMENTAL) 2365 case MATPRODUCT_AB: 2366 PetscCall(MatProductSetFromOptions_MPIDense_AB(C)); 2367 break; 2368 #endif 2369 case MATPRODUCT_AtB: 2370 PetscCall(MatProductSetFromOptions_MPIDense_AtB(C)); 2371 break; 2372 case MATPRODUCT_ABt: 2373 PetscCall(MatProductSetFromOptions_MPIDense_ABt(C)); 2374 break; 2375 default: 2376 break; 2377 } 2378 PetscFunctionReturn(PETSC_SUCCESS); 2379 } 2380