1 2 /* 3 Basic functions for basic parallel dense matrices. 4 Portions of this code are under: 5 Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. 6 */ 7 8 #include <../src/mat/impls/dense/mpi/mpidense.h> /*I "petscmat.h" I*/ 9 #include <../src/mat/impls/aij/mpi/mpiaij.h> 10 #include <petscblaslapack.h> 11 12 /*@ 13 MatDenseGetLocalMatrix - For a `MATMPIDENSE` or `MATSEQDENSE` matrix returns the sequential 14 matrix that represents the operator. For sequential matrices it returns itself. 15 16 Input Parameter: 17 . A - the sequential or MPI `MATDENSE` matrix 18 19 Output Parameter: 20 . B - the inner matrix 21 22 Level: intermediate 23 24 .seealso: [](chapter_matrices), `Mat`, `MATDENSE`, `MATMPIDENSE`, `MATSEQDENSE` 25 @*/ 26 PetscErrorCode MatDenseGetLocalMatrix(Mat A, Mat *B) 27 { 28 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 29 PetscBool flg; 30 31 PetscFunctionBegin; 32 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 33 PetscValidPointer(B, 2); 34 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIDENSE, &flg)); 35 if (flg) *B = mat->A; 36 else { 37 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQDENSE, &flg)); 38 PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Not for matrix type %s", ((PetscObject)A)->type_name); 39 *B = A; 40 } 41 PetscFunctionReturn(PETSC_SUCCESS); 42 } 43 44 PetscErrorCode MatCopy_MPIDense(Mat A, Mat B, MatStructure s) 45 { 46 Mat_MPIDense *Amat = (Mat_MPIDense *)A->data; 47 Mat_MPIDense *Bmat = (Mat_MPIDense *)B->data; 48 49 PetscFunctionBegin; 50 PetscCall(MatCopy(Amat->A, Bmat->A, s)); 51 PetscFunctionReturn(PETSC_SUCCESS); 52 } 53 54 PetscErrorCode MatShift_MPIDense(Mat A, PetscScalar alpha) 55 { 56 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 57 PetscInt j, lda, rstart = A->rmap->rstart, rend = A->rmap->rend, rend2; 58 PetscScalar *v; 59 60 PetscFunctionBegin; 61 PetscCall(MatDenseGetArray(mat->A, &v)); 62 PetscCall(MatDenseGetLDA(mat->A, &lda)); 63 rend2 = PetscMin(rend, A->cmap->N); 64 if (rend2 > rstart) { 65 for (j = rstart; j < rend2; j++) v[j - rstart + j * lda] += alpha; 66 PetscCall(PetscLogFlops(rend2 - rstart)); 67 } 68 PetscCall(MatDenseRestoreArray(mat->A, &v)); 69 PetscFunctionReturn(PETSC_SUCCESS); 70 } 71 72 PetscErrorCode MatGetRow_MPIDense(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 73 { 74 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 75 PetscInt lrow, rstart = A->rmap->rstart, rend = A->rmap->rend; 76 77 PetscFunctionBegin; 78 PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "only local rows"); 79 lrow = row - rstart; 80 PetscCall(MatGetRow(mat->A, lrow, nz, (const PetscInt **)idx, (const PetscScalar **)v)); 81 PetscFunctionReturn(PETSC_SUCCESS); 82 } 83 84 PetscErrorCode MatRestoreRow_MPIDense(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 85 { 86 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 87 PetscInt lrow, rstart = A->rmap->rstart, rend = A->rmap->rend; 88 89 PetscFunctionBegin; 90 PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "only local rows"); 91 lrow = row - rstart; 92 PetscCall(MatRestoreRow(mat->A, lrow, nz, (const PetscInt **)idx, (const PetscScalar **)v)); 93 PetscFunctionReturn(PETSC_SUCCESS); 94 } 95 96 PetscErrorCode MatGetDiagonalBlock_MPIDense(Mat A, Mat *a) 97 { 98 Mat_MPIDense *mdn = (Mat_MPIDense *)A->data; 99 PetscInt m = A->rmap->n, rstart = A->rmap->rstart; 100 PetscScalar *array; 101 MPI_Comm comm; 102 PetscBool flg; 103 Mat B; 104 105 PetscFunctionBegin; 106 PetscCall(MatHasCongruentLayouts(A, &flg)); 107 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only square matrices supported."); 108 PetscCall(PetscObjectQuery((PetscObject)A, "DiagonalBlock", (PetscObject *)&B)); 109 if (!B) { /* This should use MatDenseGetSubMatrix (not create), but we would need a call like MatRestoreDiagonalBlock */ 110 #if defined(PETSC_HAVE_CUDA) 111 PetscCall(PetscObjectTypeCompare((PetscObject)mdn->A, MATSEQDENSECUDA, &flg)); 112 PetscCheck(!flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature", MATSEQDENSECUDA); 113 #elif (PETSC_HAVE_HIP) 114 PetscCall(PetscObjectTypeCompare((PetscObject)mdn->A, MATSEQDENSEHIP, &flg)); 115 PetscCheck(!flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature", MATSEQDENSEHIP); 116 #endif 117 PetscCall(PetscObjectGetComm((PetscObject)(mdn->A), &comm)); 118 PetscCall(MatCreate(comm, &B)); 119 PetscCall(MatSetSizes(B, m, m, m, m)); 120 PetscCall(MatSetType(B, ((PetscObject)mdn->A)->type_name)); 121 PetscCall(MatDenseGetArrayRead(mdn->A, (const PetscScalar **)&array)); 122 PetscCall(MatSeqDenseSetPreallocation(B, array + m * rstart)); 123 PetscCall(MatDenseRestoreArrayRead(mdn->A, (const PetscScalar **)&array)); 124 PetscCall(PetscObjectCompose((PetscObject)A, "DiagonalBlock", (PetscObject)B)); 125 *a = B; 126 PetscCall(MatDestroy(&B)); 127 } else *a = B; 128 PetscFunctionReturn(PETSC_SUCCESS); 129 } 130 131 PetscErrorCode MatSetValues_MPIDense(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], const PetscScalar v[], InsertMode addv) 132 { 133 Mat_MPIDense *A = (Mat_MPIDense *)mat->data; 134 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend, row; 135 PetscBool roworiented = A->roworiented; 136 137 PetscFunctionBegin; 138 for (i = 0; i < m; i++) { 139 if (idxm[i] < 0) continue; 140 PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large"); 141 if (idxm[i] >= rstart && idxm[i] < rend) { 142 row = idxm[i] - rstart; 143 if (roworiented) { 144 PetscCall(MatSetValues(A->A, 1, &row, n, idxn, v + i * n, addv)); 145 } else { 146 for (j = 0; j < n; j++) { 147 if (idxn[j] < 0) continue; 148 PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large"); 149 PetscCall(MatSetValues(A->A, 1, &row, 1, &idxn[j], v + i + j * m, addv)); 150 } 151 } 152 } else if (!A->donotstash) { 153 mat->assembled = PETSC_FALSE; 154 if (roworiented) { 155 PetscCall(MatStashValuesRow_Private(&mat->stash, idxm[i], n, idxn, v + i * n, PETSC_FALSE)); 156 } else { 157 PetscCall(MatStashValuesCol_Private(&mat->stash, idxm[i], n, idxn, v + i, m, PETSC_FALSE)); 158 } 159 } 160 } 161 PetscFunctionReturn(PETSC_SUCCESS); 162 } 163 164 PetscErrorCode MatGetValues_MPIDense(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[]) 165 { 166 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 167 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend, row; 168 169 PetscFunctionBegin; 170 for (i = 0; i < m; i++) { 171 if (idxm[i] < 0) continue; /* negative row */ 172 PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large"); 173 if (idxm[i] >= rstart && idxm[i] < rend) { 174 row = idxm[i] - rstart; 175 for (j = 0; j < n; j++) { 176 if (idxn[j] < 0) continue; /* negative column */ 177 PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large"); 178 PetscCall(MatGetValues(mdn->A, 1, &row, 1, &idxn[j], v + i * n + j)); 179 } 180 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported"); 181 } 182 PetscFunctionReturn(PETSC_SUCCESS); 183 } 184 185 static PetscErrorCode MatDenseGetLDA_MPIDense(Mat A, PetscInt *lda) 186 { 187 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 188 189 PetscFunctionBegin; 190 PetscCall(MatDenseGetLDA(a->A, lda)); 191 PetscFunctionReturn(PETSC_SUCCESS); 192 } 193 194 static PetscErrorCode MatDenseSetLDA_MPIDense(Mat A, PetscInt lda) 195 { 196 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 197 MatType mtype = MATSEQDENSE; 198 199 PetscFunctionBegin; 200 if (!a->A) { 201 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 202 PetscCall(PetscLayoutSetUp(A->rmap)); 203 PetscCall(PetscLayoutSetUp(A->cmap)); 204 PetscCall(MatCreate(PETSC_COMM_SELF, &a->A)); 205 PetscCall(MatSetSizes(a->A, A->rmap->n, A->cmap->N, A->rmap->n, A->cmap->N)); 206 #if defined(PETSC_HAVE_CUDA) 207 PetscBool iscuda; 208 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIDENSECUDA, &iscuda)); 209 if (iscuda) mtype = MATSEQDENSECUDA; 210 #elif (PETSC_HAVE_HIP) 211 PetscBool iship; 212 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIDENSEHIP, &iship)); 213 if (iship) mtype = MATSEQDENSEHIP; 214 #endif 215 PetscCall(MatSetType(a->A, mtype)); 216 } 217 PetscCall(MatDenseSetLDA(a->A, lda)); 218 PetscFunctionReturn(PETSC_SUCCESS); 219 } 220 221 static PetscErrorCode MatDenseGetArray_MPIDense(Mat A, PetscScalar **array) 222 { 223 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 224 225 PetscFunctionBegin; 226 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 227 PetscCall(MatDenseGetArray(a->A, array)); 228 PetscFunctionReturn(PETSC_SUCCESS); 229 } 230 231 static PetscErrorCode MatDenseGetArrayRead_MPIDense(Mat A, const PetscScalar **array) 232 { 233 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 234 235 PetscFunctionBegin; 236 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 237 PetscCall(MatDenseGetArrayRead(a->A, array)); 238 PetscFunctionReturn(PETSC_SUCCESS); 239 } 240 241 static PetscErrorCode MatDenseGetArrayWrite_MPIDense(Mat A, PetscScalar **array) 242 { 243 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 244 245 PetscFunctionBegin; 246 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 247 PetscCall(MatDenseGetArrayWrite(a->A, array)); 248 PetscFunctionReturn(PETSC_SUCCESS); 249 } 250 251 static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A, const PetscScalar *array) 252 { 253 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 254 255 PetscFunctionBegin; 256 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 257 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 258 PetscCall(MatDensePlaceArray(a->A, array)); 259 PetscFunctionReturn(PETSC_SUCCESS); 260 } 261 262 static PetscErrorCode MatDenseResetArray_MPIDense(Mat A) 263 { 264 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 265 266 PetscFunctionBegin; 267 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 268 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 269 PetscCall(MatDenseResetArray(a->A)); 270 PetscFunctionReturn(PETSC_SUCCESS); 271 } 272 273 static PetscErrorCode MatDenseReplaceArray_MPIDense(Mat A, const PetscScalar *array) 274 { 275 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 276 277 PetscFunctionBegin; 278 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 279 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 280 PetscCall(MatDenseReplaceArray(a->A, array)); 281 PetscFunctionReturn(PETSC_SUCCESS); 282 } 283 284 static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A, IS isrow, IS iscol, MatReuse scall, Mat *B) 285 { 286 Mat_MPIDense *mat = (Mat_MPIDense *)A->data, *newmatd; 287 PetscInt lda, i, j, rstart, rend, nrows, ncols, Ncols, nlrows, nlcols; 288 const PetscInt *irow, *icol; 289 const PetscScalar *v; 290 PetscScalar *bv; 291 Mat newmat; 292 IS iscol_local; 293 MPI_Comm comm_is, comm_mat; 294 295 PetscFunctionBegin; 296 PetscCall(PetscObjectGetComm((PetscObject)A, &comm_mat)); 297 PetscCall(PetscObjectGetComm((PetscObject)iscol, &comm_is)); 298 PetscCheck(comm_mat == comm_is, PETSC_COMM_SELF, PETSC_ERR_ARG_NOTSAMECOMM, "IS communicator must match matrix communicator"); 299 300 PetscCall(ISAllGather(iscol, &iscol_local)); 301 PetscCall(ISGetIndices(isrow, &irow)); 302 PetscCall(ISGetIndices(iscol_local, &icol)); 303 PetscCall(ISGetLocalSize(isrow, &nrows)); 304 PetscCall(ISGetLocalSize(iscol, &ncols)); 305 PetscCall(ISGetSize(iscol, &Ncols)); /* global number of columns, size of iscol_local */ 306 307 /* No parallel redistribution currently supported! Should really check each index set 308 to confirm that it is OK. ... Currently supports only submatrix same partitioning as 309 original matrix! */ 310 311 PetscCall(MatGetLocalSize(A, &nlrows, &nlcols)); 312 PetscCall(MatGetOwnershipRange(A, &rstart, &rend)); 313 314 /* Check submatrix call */ 315 if (scall == MAT_REUSE_MATRIX) { 316 /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */ 317 /* Really need to test rows and column sizes! */ 318 newmat = *B; 319 } else { 320 /* Create and fill new matrix */ 321 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &newmat)); 322 PetscCall(MatSetSizes(newmat, nrows, ncols, PETSC_DECIDE, Ncols)); 323 PetscCall(MatSetType(newmat, ((PetscObject)A)->type_name)); 324 PetscCall(MatMPIDenseSetPreallocation(newmat, NULL)); 325 } 326 327 /* Now extract the data pointers and do the copy, column at a time */ 328 newmatd = (Mat_MPIDense *)newmat->data; 329 PetscCall(MatDenseGetArray(newmatd->A, &bv)); 330 PetscCall(MatDenseGetArrayRead(mat->A, &v)); 331 PetscCall(MatDenseGetLDA(mat->A, &lda)); 332 for (i = 0; i < Ncols; i++) { 333 const PetscScalar *av = v + lda * icol[i]; 334 for (j = 0; j < nrows; j++) *bv++ = av[irow[j] - rstart]; 335 } 336 PetscCall(MatDenseRestoreArrayRead(mat->A, &v)); 337 PetscCall(MatDenseRestoreArray(newmatd->A, &bv)); 338 339 /* Assemble the matrices so that the correct flags are set */ 340 PetscCall(MatAssemblyBegin(newmat, MAT_FINAL_ASSEMBLY)); 341 PetscCall(MatAssemblyEnd(newmat, MAT_FINAL_ASSEMBLY)); 342 343 /* Free work space */ 344 PetscCall(ISRestoreIndices(isrow, &irow)); 345 PetscCall(ISRestoreIndices(iscol_local, &icol)); 346 PetscCall(ISDestroy(&iscol_local)); 347 *B = newmat; 348 PetscFunctionReturn(PETSC_SUCCESS); 349 } 350 351 PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A, PetscScalar **array) 352 { 353 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 354 355 PetscFunctionBegin; 356 PetscCall(MatDenseRestoreArray(a->A, array)); 357 PetscFunctionReturn(PETSC_SUCCESS); 358 } 359 360 PetscErrorCode MatDenseRestoreArrayRead_MPIDense(Mat A, const PetscScalar **array) 361 { 362 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 363 364 PetscFunctionBegin; 365 PetscCall(MatDenseRestoreArrayRead(a->A, array)); 366 PetscFunctionReturn(PETSC_SUCCESS); 367 } 368 369 PetscErrorCode MatDenseRestoreArrayWrite_MPIDense(Mat A, PetscScalar **array) 370 { 371 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 372 373 PetscFunctionBegin; 374 PetscCall(MatDenseRestoreArrayWrite(a->A, array)); 375 PetscFunctionReturn(PETSC_SUCCESS); 376 } 377 378 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat, MatAssemblyType mode) 379 { 380 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 381 PetscInt nstash, reallocs; 382 383 PetscFunctionBegin; 384 if (mdn->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS); 385 386 PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range)); 387 PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs)); 388 PetscCall(PetscInfo(mdn->A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 389 PetscFunctionReturn(PETSC_SUCCESS); 390 } 391 392 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat, MatAssemblyType mode) 393 { 394 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 395 PetscInt i, *row, *col, flg, j, rstart, ncols; 396 PetscMPIInt n; 397 PetscScalar *val; 398 399 PetscFunctionBegin; 400 if (!mdn->donotstash && !mat->nooffprocentries) { 401 /* wait on receives */ 402 while (1) { 403 PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg)); 404 if (!flg) break; 405 406 for (i = 0; i < n;) { 407 /* Now identify the consecutive vals belonging to the same row */ 408 for (j = i, rstart = row[j]; j < n; j++) { 409 if (row[j] != rstart) break; 410 } 411 if (j < n) ncols = j - i; 412 else ncols = n - i; 413 /* Now assemble all these values with a single function call */ 414 PetscCall(MatSetValues_MPIDense(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode)); 415 i = j; 416 } 417 } 418 PetscCall(MatStashScatterEnd_Private(&mat->stash)); 419 } 420 421 PetscCall(MatAssemblyBegin(mdn->A, mode)); 422 PetscCall(MatAssemblyEnd(mdn->A, mode)); 423 PetscFunctionReturn(PETSC_SUCCESS); 424 } 425 426 PetscErrorCode MatZeroEntries_MPIDense(Mat A) 427 { 428 Mat_MPIDense *l = (Mat_MPIDense *)A->data; 429 430 PetscFunctionBegin; 431 PetscCall(MatZeroEntries(l->A)); 432 PetscFunctionReturn(PETSC_SUCCESS); 433 } 434 435 PetscErrorCode MatZeroRows_MPIDense(Mat A, PetscInt n, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 436 { 437 Mat_MPIDense *l = (Mat_MPIDense *)A->data; 438 PetscInt i, len, *lrows; 439 440 PetscFunctionBegin; 441 /* get locally owned rows */ 442 PetscCall(PetscLayoutMapLocal(A->rmap, n, rows, &len, &lrows, NULL)); 443 /* fix right hand side if needed */ 444 if (x && b) { 445 const PetscScalar *xx; 446 PetscScalar *bb; 447 448 PetscCall(VecGetArrayRead(x, &xx)); 449 PetscCall(VecGetArrayWrite(b, &bb)); 450 for (i = 0; i < len; ++i) bb[lrows[i]] = diag * xx[lrows[i]]; 451 PetscCall(VecRestoreArrayRead(x, &xx)); 452 PetscCall(VecRestoreArrayWrite(b, &bb)); 453 } 454 PetscCall(MatZeroRows(l->A, len, lrows, 0.0, NULL, NULL)); 455 if (diag != 0.0) { 456 Vec d; 457 458 PetscCall(MatCreateVecs(A, NULL, &d)); 459 PetscCall(VecSet(d, diag)); 460 PetscCall(MatDiagonalSet(A, d, INSERT_VALUES)); 461 PetscCall(VecDestroy(&d)); 462 } 463 PetscCall(PetscFree(lrows)); 464 PetscFunctionReturn(PETSC_SUCCESS); 465 } 466 467 PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat, Vec, Vec); 468 PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat, Vec, Vec, Vec); 469 PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat, Vec, Vec); 470 PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat, Vec, Vec, Vec); 471 472 PetscErrorCode MatMult_MPIDense(Mat mat, Vec xx, Vec yy) 473 { 474 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 475 const PetscScalar *ax; 476 PetscScalar *ay; 477 PetscMemType axmtype, aymtype; 478 479 PetscFunctionBegin; 480 if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat)); 481 PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype)); 482 PetscCall(VecGetArrayAndMemType(mdn->lvec, &ay, &aymtype)); 483 PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE)); 484 PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE)); 485 PetscCall(VecRestoreArrayAndMemType(mdn->lvec, &ay)); 486 PetscCall(VecRestoreArrayReadAndMemType(xx, &ax)); 487 PetscCall((*mdn->A->ops->mult)(mdn->A, mdn->lvec, yy)); 488 PetscFunctionReturn(PETSC_SUCCESS); 489 } 490 491 PetscErrorCode MatMultAdd_MPIDense(Mat mat, Vec xx, Vec yy, Vec zz) 492 { 493 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 494 const PetscScalar *ax; 495 PetscScalar *ay; 496 PetscMemType axmtype, aymtype; 497 498 PetscFunctionBegin; 499 if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat)); 500 PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype)); 501 PetscCall(VecGetArrayAndMemType(mdn->lvec, &ay, &aymtype)); 502 PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE)); 503 PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE)); 504 PetscCall(VecRestoreArrayAndMemType(mdn->lvec, &ay)); 505 PetscCall(VecRestoreArrayReadAndMemType(xx, &ax)); 506 PetscCall((*mdn->A->ops->multadd)(mdn->A, mdn->lvec, yy, zz)); 507 PetscFunctionReturn(PETSC_SUCCESS); 508 } 509 510 PetscErrorCode MatMultTranspose_MPIDense(Mat A, Vec xx, Vec yy) 511 { 512 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 513 const PetscScalar *ax; 514 PetscScalar *ay; 515 PetscMemType axmtype, aymtype; 516 517 PetscFunctionBegin; 518 if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A)); 519 PetscCall(VecSet(yy, 0.0)); 520 PetscCall((*a->A->ops->multtranspose)(a->A, xx, a->lvec)); 521 PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype)); 522 PetscCall(VecGetArrayAndMemType(yy, &ay, &aymtype)); 523 PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM)); 524 PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM)); 525 PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax)); 526 PetscCall(VecRestoreArrayAndMemType(yy, &ay)); 527 PetscFunctionReturn(PETSC_SUCCESS); 528 } 529 530 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A, Vec xx, Vec yy, Vec zz) 531 { 532 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 533 const PetscScalar *ax; 534 PetscScalar *ay; 535 PetscMemType axmtype, aymtype; 536 537 PetscFunctionBegin; 538 if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A)); 539 PetscCall(VecCopy(yy, zz)); 540 PetscCall((*a->A->ops->multtranspose)(a->A, xx, a->lvec)); 541 PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype)); 542 PetscCall(VecGetArrayAndMemType(zz, &ay, &aymtype)); 543 PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM)); 544 PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM)); 545 PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax)); 546 PetscCall(VecRestoreArrayAndMemType(zz, &ay)); 547 PetscFunctionReturn(PETSC_SUCCESS); 548 } 549 550 PetscErrorCode MatGetDiagonal_MPIDense(Mat A, Vec v) 551 { 552 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 553 PetscInt lda, len, i, n, m = A->rmap->n, radd; 554 PetscScalar *x, zero = 0.0; 555 const PetscScalar *av; 556 557 PetscFunctionBegin; 558 PetscCall(VecSet(v, zero)); 559 PetscCall(VecGetArray(v, &x)); 560 PetscCall(VecGetSize(v, &n)); 561 PetscCheck(n == A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming mat and vec"); 562 len = PetscMin(a->A->rmap->n, a->A->cmap->n); 563 radd = A->rmap->rstart * m; 564 PetscCall(MatDenseGetArrayRead(a->A, &av)); 565 PetscCall(MatDenseGetLDA(a->A, &lda)); 566 for (i = 0; i < len; i++) x[i] = av[radd + i * lda + i]; 567 PetscCall(MatDenseRestoreArrayRead(a->A, &av)); 568 PetscCall(VecRestoreArray(v, &x)); 569 PetscFunctionReturn(PETSC_SUCCESS); 570 } 571 572 PetscErrorCode MatDestroy_MPIDense(Mat mat) 573 { 574 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 575 576 PetscFunctionBegin; 577 #if defined(PETSC_USE_LOG) 578 PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N)); 579 #endif 580 PetscCall(MatStashDestroy_Private(&mat->stash)); 581 PetscCheck(!mdn->vecinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 582 PetscCheck(!mdn->matinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 583 PetscCall(MatDestroy(&mdn->A)); 584 PetscCall(VecDestroy(&mdn->lvec)); 585 PetscCall(PetscSFDestroy(&mdn->Mvctx)); 586 PetscCall(VecDestroy(&mdn->cvec)); 587 PetscCall(MatDestroy(&mdn->cmat)); 588 589 PetscCall(PetscFree(mat->data)); 590 PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL)); 591 592 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetLDA_C", NULL)); 593 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseSetLDA_C", NULL)); 594 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArray_C", NULL)); 595 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArray_C", NULL)); 596 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayRead_C", NULL)); 597 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayRead_C", NULL)); 598 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayWrite_C", NULL)); 599 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayWrite_C", NULL)); 600 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDensePlaceArray_C", NULL)); 601 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseResetArray_C", NULL)); 602 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseReplaceArray_C", NULL)); 603 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", NULL)); 604 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpiaij_C", NULL)); 605 #if defined(PETSC_HAVE_ELEMENTAL) 606 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_elemental_C", NULL)); 607 #endif 608 #if defined(PETSC_HAVE_SCALAPACK) 609 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_scalapack_C", NULL)); 610 #endif 611 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIDenseSetPreallocation_C", NULL)); 612 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidense_C", NULL)); 613 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaij_C", NULL)); 614 #if defined(PETSC_HAVE_CUDA) 615 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidense_C", NULL)); 616 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijcusparse_C", NULL)); 617 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensecuda_C", NULL)); 618 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidensecuda_mpidense_C", NULL)); 619 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidensecuda_C", NULL)); 620 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C", NULL)); 621 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensecuda_mpiaij_C", NULL)); 622 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C", NULL)); 623 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArray_C", NULL)); 624 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArrayRead_C", NULL)); 625 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArrayWrite_C", NULL)); 626 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArray_C", NULL)); 627 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArrayRead_C", NULL)); 628 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArrayWrite_C", NULL)); 629 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAPlaceArray_C", NULL)); 630 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAResetArray_C", NULL)); 631 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAReplaceArray_C", NULL)); 632 #endif 633 #if defined(PETSC_HAVE_HIP) 634 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidense_C", NULL)); 635 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijhipsparse_C", NULL)); 636 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensehip_C", NULL)); 637 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidensehip_mpidense_C", NULL)); 638 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidensehip_C", NULL)); 639 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidensehip_C", NULL)); 640 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensehip_mpiaij_C", NULL)); 641 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensehip_mpiaijhipsparse_C", NULL)); 642 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArray_C", NULL)); 643 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArrayRead_C", NULL)); 644 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArrayWrite_C", NULL)); 645 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArray_C", NULL)); 646 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArrayRead_C", NULL)); 647 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArrayWrite_C", NULL)); 648 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPPlaceArray_C", NULL)); 649 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPResetArray_C", NULL)); 650 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPReplaceArray_C", NULL)); 651 #endif 652 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumn_C", NULL)); 653 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumn_C", NULL)); 654 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", NULL)); 655 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", NULL)); 656 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", NULL)); 657 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", NULL)); 658 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", NULL)); 659 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", NULL)); 660 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetSubMatrix_C", NULL)); 661 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreSubMatrix_C", NULL)); 662 663 PetscCall(PetscObjectCompose((PetscObject)mat, "DiagonalBlock", NULL)); 664 PetscFunctionReturn(PETSC_SUCCESS); 665 } 666 667 PETSC_INTERN PetscErrorCode MatView_SeqDense(Mat, PetscViewer); 668 669 #include <petscdraw.h> 670 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat, PetscViewer viewer) 671 { 672 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 673 PetscMPIInt rank; 674 PetscViewerType vtype; 675 PetscBool iascii, isdraw; 676 PetscViewer sviewer; 677 PetscViewerFormat format; 678 679 PetscFunctionBegin; 680 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 681 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 682 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 683 if (iascii) { 684 PetscCall(PetscViewerGetType(viewer, &vtype)); 685 PetscCall(PetscViewerGetFormat(viewer, &format)); 686 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 687 MatInfo info; 688 PetscCall(MatGetInfo(mat, MAT_LOCAL, &info)); 689 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 690 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %" PetscInt_FMT " \n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, 691 (PetscInt)info.memory)); 692 PetscCall(PetscViewerFlush(viewer)); 693 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 694 if (mdn->Mvctx) PetscCall(PetscSFView(mdn->Mvctx, viewer)); 695 PetscFunctionReturn(PETSC_SUCCESS); 696 } else if (format == PETSC_VIEWER_ASCII_INFO) { 697 PetscFunctionReturn(PETSC_SUCCESS); 698 } 699 } else if (isdraw) { 700 PetscDraw draw; 701 PetscBool isnull; 702 703 PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw)); 704 PetscCall(PetscDrawIsNull(draw, &isnull)); 705 if (isnull) PetscFunctionReturn(PETSC_SUCCESS); 706 } 707 708 { 709 /* assemble the entire matrix onto first processor. */ 710 Mat A; 711 PetscInt M = mat->rmap->N, N = mat->cmap->N, m, row, i, nz; 712 PetscInt *cols; 713 PetscScalar *vals; 714 715 PetscCall(MatCreate(PetscObjectComm((PetscObject)mat), &A)); 716 if (rank == 0) { 717 PetscCall(MatSetSizes(A, M, N, M, N)); 718 } else { 719 PetscCall(MatSetSizes(A, 0, 0, M, N)); 720 } 721 /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */ 722 PetscCall(MatSetType(A, MATMPIDENSE)); 723 PetscCall(MatMPIDenseSetPreallocation(A, NULL)); 724 725 /* Copy the matrix ... This isn't the most efficient means, 726 but it's quick for now */ 727 A->insertmode = INSERT_VALUES; 728 729 row = mat->rmap->rstart; 730 m = mdn->A->rmap->n; 731 for (i = 0; i < m; i++) { 732 PetscCall(MatGetRow_MPIDense(mat, row, &nz, &cols, &vals)); 733 PetscCall(MatSetValues_MPIDense(A, 1, &row, nz, cols, vals, INSERT_VALUES)); 734 PetscCall(MatRestoreRow_MPIDense(mat, row, &nz, &cols, &vals)); 735 row++; 736 } 737 738 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 739 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 740 PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 741 if (rank == 0) { 742 PetscCall(PetscObjectSetName((PetscObject)((Mat_MPIDense *)(A->data))->A, ((PetscObject)mat)->name)); 743 PetscCall(MatView_SeqDense(((Mat_MPIDense *)(A->data))->A, sviewer)); 744 } 745 PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 746 PetscCall(PetscViewerFlush(viewer)); 747 PetscCall(MatDestroy(&A)); 748 } 749 PetscFunctionReturn(PETSC_SUCCESS); 750 } 751 752 PetscErrorCode MatView_MPIDense(Mat mat, PetscViewer viewer) 753 { 754 PetscBool iascii, isbinary, isdraw, issocket; 755 756 PetscFunctionBegin; 757 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 758 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 759 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket)); 760 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 761 762 if (iascii || issocket || isdraw) { 763 PetscCall(MatView_MPIDense_ASCIIorDraworSocket(mat, viewer)); 764 } else if (isbinary) PetscCall(MatView_Dense_Binary(mat, viewer)); 765 PetscFunctionReturn(PETSC_SUCCESS); 766 } 767 768 PetscErrorCode MatGetInfo_MPIDense(Mat A, MatInfoType flag, MatInfo *info) 769 { 770 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 771 Mat mdn = mat->A; 772 PetscLogDouble isend[5], irecv[5]; 773 774 PetscFunctionBegin; 775 info->block_size = 1.0; 776 777 PetscCall(MatGetInfo(mdn, MAT_LOCAL, info)); 778 779 isend[0] = info->nz_used; 780 isend[1] = info->nz_allocated; 781 isend[2] = info->nz_unneeded; 782 isend[3] = info->memory; 783 isend[4] = info->mallocs; 784 if (flag == MAT_LOCAL) { 785 info->nz_used = isend[0]; 786 info->nz_allocated = isend[1]; 787 info->nz_unneeded = isend[2]; 788 info->memory = isend[3]; 789 info->mallocs = isend[4]; 790 } else if (flag == MAT_GLOBAL_MAX) { 791 PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)A))); 792 793 info->nz_used = irecv[0]; 794 info->nz_allocated = irecv[1]; 795 info->nz_unneeded = irecv[2]; 796 info->memory = irecv[3]; 797 info->mallocs = irecv[4]; 798 } else if (flag == MAT_GLOBAL_SUM) { 799 PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)A))); 800 801 info->nz_used = irecv[0]; 802 info->nz_allocated = irecv[1]; 803 info->nz_unneeded = irecv[2]; 804 info->memory = irecv[3]; 805 info->mallocs = irecv[4]; 806 } 807 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 808 info->fill_ratio_needed = 0; 809 info->factor_mallocs = 0; 810 PetscFunctionReturn(PETSC_SUCCESS); 811 } 812 813 PetscErrorCode MatSetOption_MPIDense(Mat A, MatOption op, PetscBool flg) 814 { 815 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 816 817 PetscFunctionBegin; 818 switch (op) { 819 case MAT_NEW_NONZERO_LOCATIONS: 820 case MAT_NEW_NONZERO_LOCATION_ERR: 821 case MAT_NEW_NONZERO_ALLOCATION_ERR: 822 MatCheckPreallocated(A, 1); 823 PetscCall(MatSetOption(a->A, op, flg)); 824 break; 825 case MAT_ROW_ORIENTED: 826 MatCheckPreallocated(A, 1); 827 a->roworiented = flg; 828 PetscCall(MatSetOption(a->A, op, flg)); 829 break; 830 case MAT_FORCE_DIAGONAL_ENTRIES: 831 case MAT_KEEP_NONZERO_PATTERN: 832 case MAT_USE_HASH_TABLE: 833 case MAT_SORTED_FULL: 834 PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op])); 835 break; 836 case MAT_IGNORE_OFF_PROC_ENTRIES: 837 a->donotstash = flg; 838 break; 839 case MAT_SYMMETRIC: 840 case MAT_STRUCTURALLY_SYMMETRIC: 841 case MAT_HERMITIAN: 842 case MAT_SYMMETRY_ETERNAL: 843 case MAT_STRUCTURAL_SYMMETRY_ETERNAL: 844 case MAT_SPD: 845 case MAT_IGNORE_LOWER_TRIANGULAR: 846 case MAT_IGNORE_ZERO_ENTRIES: 847 case MAT_SPD_ETERNAL: 848 /* if the diagonal matrix is square it inherits some of the properties above */ 849 PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op])); 850 break; 851 default: 852 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %s", MatOptions[op]); 853 } 854 PetscFunctionReturn(PETSC_SUCCESS); 855 } 856 857 PetscErrorCode MatDiagonalScale_MPIDense(Mat A, Vec ll, Vec rr) 858 { 859 Mat_MPIDense *mdn = (Mat_MPIDense *)A->data; 860 const PetscScalar *l; 861 PetscScalar x, *v, *vv, *r; 862 PetscInt i, j, s2a, s3a, s2, s3, m = mdn->A->rmap->n, n = mdn->A->cmap->n, lda; 863 864 PetscFunctionBegin; 865 PetscCall(MatDenseGetArray(mdn->A, &vv)); 866 PetscCall(MatDenseGetLDA(mdn->A, &lda)); 867 PetscCall(MatGetLocalSize(A, &s2, &s3)); 868 if (ll) { 869 PetscCall(VecGetLocalSize(ll, &s2a)); 870 PetscCheck(s2a == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Left scaling vector non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT, s2a, s2); 871 PetscCall(VecGetArrayRead(ll, &l)); 872 for (i = 0; i < m; i++) { 873 x = l[i]; 874 v = vv + i; 875 for (j = 0; j < n; j++) { 876 (*v) *= x; 877 v += lda; 878 } 879 } 880 PetscCall(VecRestoreArrayRead(ll, &l)); 881 PetscCall(PetscLogFlops(1.0 * n * m)); 882 } 883 if (rr) { 884 const PetscScalar *ar; 885 886 PetscCall(VecGetLocalSize(rr, &s3a)); 887 PetscCheck(s3a == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Right scaling vec non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT ".", s3a, s3); 888 PetscCall(VecGetArrayRead(rr, &ar)); 889 if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A)); 890 PetscCall(VecGetArray(mdn->lvec, &r)); 891 PetscCall(PetscSFBcastBegin(mdn->Mvctx, MPIU_SCALAR, ar, r, MPI_REPLACE)); 892 PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ar, r, MPI_REPLACE)); 893 PetscCall(VecRestoreArrayRead(rr, &ar)); 894 for (i = 0; i < n; i++) { 895 x = r[i]; 896 v = vv + i * lda; 897 for (j = 0; j < m; j++) (*v++) *= x; 898 } 899 PetscCall(VecRestoreArray(mdn->lvec, &r)); 900 PetscCall(PetscLogFlops(1.0 * n * m)); 901 } 902 PetscCall(MatDenseRestoreArray(mdn->A, &vv)); 903 PetscFunctionReturn(PETSC_SUCCESS); 904 } 905 906 PetscErrorCode MatNorm_MPIDense(Mat A, NormType type, PetscReal *nrm) 907 { 908 Mat_MPIDense *mdn = (Mat_MPIDense *)A->data; 909 PetscInt i, j; 910 PetscMPIInt size; 911 PetscReal sum = 0.0; 912 const PetscScalar *av, *v; 913 914 PetscFunctionBegin; 915 PetscCall(MatDenseGetArrayRead(mdn->A, &av)); 916 v = av; 917 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 918 if (size == 1) { 919 PetscCall(MatNorm(mdn->A, type, nrm)); 920 } else { 921 if (type == NORM_FROBENIUS) { 922 for (i = 0; i < mdn->A->cmap->n * mdn->A->rmap->n; i++) { 923 sum += PetscRealPart(PetscConj(*v) * (*v)); 924 v++; 925 } 926 PetscCall(MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A))); 927 *nrm = PetscSqrtReal(*nrm); 928 PetscCall(PetscLogFlops(2.0 * mdn->A->cmap->n * mdn->A->rmap->n)); 929 } else if (type == NORM_1) { 930 PetscReal *tmp, *tmp2; 931 PetscCall(PetscCalloc2(A->cmap->N, &tmp, A->cmap->N, &tmp2)); 932 *nrm = 0.0; 933 v = av; 934 for (j = 0; j < mdn->A->cmap->n; j++) { 935 for (i = 0; i < mdn->A->rmap->n; i++) { 936 tmp[j] += PetscAbsScalar(*v); 937 v++; 938 } 939 } 940 PetscCall(MPIU_Allreduce(tmp, tmp2, A->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A))); 941 for (j = 0; j < A->cmap->N; j++) { 942 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 943 } 944 PetscCall(PetscFree2(tmp, tmp2)); 945 PetscCall(PetscLogFlops(A->cmap->n * A->rmap->n)); 946 } else if (type == NORM_INFINITY) { /* max row norm */ 947 PetscReal ntemp; 948 PetscCall(MatNorm(mdn->A, type, &ntemp)); 949 PetscCall(MPIU_Allreduce(&ntemp, nrm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A))); 950 } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "No support for two norm"); 951 } 952 PetscCall(MatDenseRestoreArrayRead(mdn->A, &av)); 953 PetscFunctionReturn(PETSC_SUCCESS); 954 } 955 956 PetscErrorCode MatTranspose_MPIDense(Mat A, MatReuse reuse, Mat *matout) 957 { 958 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 959 Mat B; 960 PetscInt M = A->rmap->N, N = A->cmap->N, m, n, *rwork, rstart = A->rmap->rstart; 961 PetscInt j, i, lda; 962 PetscScalar *v; 963 964 PetscFunctionBegin; 965 if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout)); 966 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) { 967 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 968 PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M)); 969 PetscCall(MatSetType(B, ((PetscObject)A)->type_name)); 970 PetscCall(MatMPIDenseSetPreallocation(B, NULL)); 971 } else B = *matout; 972 973 m = a->A->rmap->n; 974 n = a->A->cmap->n; 975 PetscCall(MatDenseGetArrayRead(a->A, (const PetscScalar **)&v)); 976 PetscCall(MatDenseGetLDA(a->A, &lda)); 977 PetscCall(PetscMalloc1(m, &rwork)); 978 for (i = 0; i < m; i++) rwork[i] = rstart + i; 979 for (j = 0; j < n; j++) { 980 PetscCall(MatSetValues(B, 1, &j, m, rwork, v, INSERT_VALUES)); 981 v += lda; 982 } 983 PetscCall(MatDenseRestoreArrayRead(a->A, (const PetscScalar **)&v)); 984 PetscCall(PetscFree(rwork)); 985 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 986 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 987 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 988 *matout = B; 989 } else { 990 PetscCall(MatHeaderMerge(A, &B)); 991 } 992 PetscFunctionReturn(PETSC_SUCCESS); 993 } 994 995 static PetscErrorCode MatDuplicate_MPIDense(Mat, MatDuplicateOption, Mat *); 996 PETSC_INTERN PetscErrorCode MatScale_MPIDense(Mat, PetscScalar); 997 998 PetscErrorCode MatSetUp_MPIDense(Mat A) 999 { 1000 PetscFunctionBegin; 1001 PetscCall(PetscLayoutSetUp(A->rmap)); 1002 PetscCall(PetscLayoutSetUp(A->cmap)); 1003 if (!A->preallocated) PetscCall(MatMPIDenseSetPreallocation(A, NULL)); 1004 PetscFunctionReturn(PETSC_SUCCESS); 1005 } 1006 1007 PetscErrorCode MatAXPY_MPIDense(Mat Y, PetscScalar alpha, Mat X, MatStructure str) 1008 { 1009 Mat_MPIDense *A = (Mat_MPIDense *)Y->data, *B = (Mat_MPIDense *)X->data; 1010 1011 PetscFunctionBegin; 1012 PetscCall(MatAXPY(A->A, alpha, B->A, str)); 1013 PetscFunctionReturn(PETSC_SUCCESS); 1014 } 1015 1016 PetscErrorCode MatConjugate_MPIDense(Mat mat) 1017 { 1018 Mat_MPIDense *a = (Mat_MPIDense *)mat->data; 1019 1020 PetscFunctionBegin; 1021 PetscCall(MatConjugate(a->A)); 1022 PetscFunctionReturn(PETSC_SUCCESS); 1023 } 1024 1025 PetscErrorCode MatRealPart_MPIDense(Mat A) 1026 { 1027 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1028 1029 PetscFunctionBegin; 1030 PetscCall(MatRealPart(a->A)); 1031 PetscFunctionReturn(PETSC_SUCCESS); 1032 } 1033 1034 PetscErrorCode MatImaginaryPart_MPIDense(Mat A) 1035 { 1036 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1037 1038 PetscFunctionBegin; 1039 PetscCall(MatImaginaryPart(a->A)); 1040 PetscFunctionReturn(PETSC_SUCCESS); 1041 } 1042 1043 static PetscErrorCode MatGetColumnVector_MPIDense(Mat A, Vec v, PetscInt col) 1044 { 1045 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1046 1047 PetscFunctionBegin; 1048 PetscCheck(a->A, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Missing local matrix"); 1049 PetscCheck(a->A->ops->getcolumnvector, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Missing get column operation"); 1050 PetscCall((*a->A->ops->getcolumnvector)(a->A, v, col)); 1051 PetscFunctionReturn(PETSC_SUCCESS); 1052 } 1053 1054 PETSC_INTERN PetscErrorCode MatGetColumnReductions_SeqDense(Mat, PetscInt, PetscReal *); 1055 1056 PetscErrorCode MatGetColumnReductions_MPIDense(Mat A, PetscInt type, PetscReal *reductions) 1057 { 1058 PetscInt i, m, n; 1059 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1060 PetscReal *work; 1061 1062 PetscFunctionBegin; 1063 PetscCall(MatGetSize(A, &m, &n)); 1064 PetscCall(PetscMalloc1(n, &work)); 1065 if (type == REDUCTION_MEAN_REALPART) { 1066 PetscCall(MatGetColumnReductions_SeqDense(a->A, (PetscInt)REDUCTION_SUM_REALPART, work)); 1067 } else if (type == REDUCTION_MEAN_IMAGINARYPART) { 1068 PetscCall(MatGetColumnReductions_SeqDense(a->A, (PetscInt)REDUCTION_SUM_IMAGINARYPART, work)); 1069 } else { 1070 PetscCall(MatGetColumnReductions_SeqDense(a->A, type, work)); 1071 } 1072 if (type == NORM_2) { 1073 for (i = 0; i < n; i++) work[i] *= work[i]; 1074 } 1075 if (type == NORM_INFINITY) { 1076 PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_MAX, A->hdr.comm)); 1077 } else { 1078 PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_SUM, A->hdr.comm)); 1079 } 1080 PetscCall(PetscFree(work)); 1081 if (type == NORM_2) { 1082 for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]); 1083 } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 1084 for (i = 0; i < n; i++) reductions[i] /= m; 1085 } 1086 PetscFunctionReturn(PETSC_SUCCESS); 1087 } 1088 1089 static PetscErrorCode MatSetRandom_MPIDense(Mat x, PetscRandom rctx) 1090 { 1091 Mat_MPIDense *d = (Mat_MPIDense *)x->data; 1092 1093 PetscFunctionBegin; 1094 PetscCall(MatSetRandom(d->A, rctx)); 1095 #if defined(PETSC_HAVE_DEVICE) 1096 x->offloadmask = d->A->offloadmask; 1097 #endif 1098 PetscFunctionReturn(PETSC_SUCCESS); 1099 } 1100 1101 static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A, PetscBool *missing, PetscInt *d) 1102 { 1103 PetscFunctionBegin; 1104 *missing = PETSC_FALSE; 1105 PetscFunctionReturn(PETSC_SUCCESS); 1106 } 1107 1108 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat, Mat, PetscReal, Mat); 1109 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat, Mat, Mat); 1110 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat, Mat, PetscReal, Mat); 1111 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat, Mat, Mat); 1112 static PetscErrorCode MatEqual_MPIDense(Mat, Mat, PetscBool *); 1113 static PetscErrorCode MatLoad_MPIDense(Mat, PetscViewer); 1114 1115 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense, 1116 MatGetRow_MPIDense, 1117 MatRestoreRow_MPIDense, 1118 MatMult_MPIDense, 1119 /* 4*/ MatMultAdd_MPIDense, 1120 MatMultTranspose_MPIDense, 1121 MatMultTransposeAdd_MPIDense, 1122 NULL, 1123 NULL, 1124 NULL, 1125 /* 10*/ NULL, 1126 NULL, 1127 NULL, 1128 NULL, 1129 MatTranspose_MPIDense, 1130 /* 15*/ MatGetInfo_MPIDense, 1131 MatEqual_MPIDense, 1132 MatGetDiagonal_MPIDense, 1133 MatDiagonalScale_MPIDense, 1134 MatNorm_MPIDense, 1135 /* 20*/ MatAssemblyBegin_MPIDense, 1136 MatAssemblyEnd_MPIDense, 1137 MatSetOption_MPIDense, 1138 MatZeroEntries_MPIDense, 1139 /* 24*/ MatZeroRows_MPIDense, 1140 NULL, 1141 NULL, 1142 NULL, 1143 NULL, 1144 /* 29*/ MatSetUp_MPIDense, 1145 NULL, 1146 NULL, 1147 MatGetDiagonalBlock_MPIDense, 1148 NULL, 1149 /* 34*/ MatDuplicate_MPIDense, 1150 NULL, 1151 NULL, 1152 NULL, 1153 NULL, 1154 /* 39*/ MatAXPY_MPIDense, 1155 MatCreateSubMatrices_MPIDense, 1156 NULL, 1157 MatGetValues_MPIDense, 1158 MatCopy_MPIDense, 1159 /* 44*/ NULL, 1160 MatScale_MPIDense, 1161 MatShift_MPIDense, 1162 NULL, 1163 NULL, 1164 /* 49*/ MatSetRandom_MPIDense, 1165 NULL, 1166 NULL, 1167 NULL, 1168 NULL, 1169 /* 54*/ NULL, 1170 NULL, 1171 NULL, 1172 NULL, 1173 NULL, 1174 /* 59*/ MatCreateSubMatrix_MPIDense, 1175 MatDestroy_MPIDense, 1176 MatView_MPIDense, 1177 NULL, 1178 NULL, 1179 /* 64*/ NULL, 1180 NULL, 1181 NULL, 1182 NULL, 1183 NULL, 1184 /* 69*/ NULL, 1185 NULL, 1186 NULL, 1187 NULL, 1188 NULL, 1189 /* 74*/ NULL, 1190 NULL, 1191 NULL, 1192 NULL, 1193 NULL, 1194 /* 79*/ NULL, 1195 NULL, 1196 NULL, 1197 NULL, 1198 /* 83*/ MatLoad_MPIDense, 1199 NULL, 1200 NULL, 1201 NULL, 1202 NULL, 1203 NULL, 1204 /* 89*/ NULL, 1205 NULL, 1206 NULL, 1207 NULL, 1208 NULL, 1209 /* 94*/ NULL, 1210 NULL, 1211 MatMatTransposeMultSymbolic_MPIDense_MPIDense, 1212 MatMatTransposeMultNumeric_MPIDense_MPIDense, 1213 NULL, 1214 /* 99*/ MatProductSetFromOptions_MPIDense, 1215 NULL, 1216 NULL, 1217 MatConjugate_MPIDense, 1218 NULL, 1219 /*104*/ NULL, 1220 MatRealPart_MPIDense, 1221 MatImaginaryPart_MPIDense, 1222 NULL, 1223 NULL, 1224 /*109*/ NULL, 1225 NULL, 1226 NULL, 1227 MatGetColumnVector_MPIDense, 1228 MatMissingDiagonal_MPIDense, 1229 /*114*/ NULL, 1230 NULL, 1231 NULL, 1232 NULL, 1233 NULL, 1234 /*119*/ NULL, 1235 NULL, 1236 NULL, 1237 NULL, 1238 NULL, 1239 /*124*/ NULL, 1240 MatGetColumnReductions_MPIDense, 1241 NULL, 1242 NULL, 1243 NULL, 1244 /*129*/ NULL, 1245 NULL, 1246 MatTransposeMatMultSymbolic_MPIDense_MPIDense, 1247 MatTransposeMatMultNumeric_MPIDense_MPIDense, 1248 NULL, 1249 /*134*/ NULL, 1250 NULL, 1251 NULL, 1252 NULL, 1253 NULL, 1254 /*139*/ NULL, 1255 NULL, 1256 NULL, 1257 NULL, 1258 NULL, 1259 MatCreateMPIMatConcatenateSeqMat_MPIDense, 1260 /*145*/ NULL, 1261 NULL, 1262 NULL, 1263 NULL, 1264 NULL, 1265 /*150*/ NULL, 1266 NULL}; 1267 1268 PetscErrorCode MatMPIDenseSetPreallocation_MPIDense(Mat mat, PetscScalar *data) 1269 { 1270 Mat_MPIDense *a = (Mat_MPIDense *)mat->data; 1271 MatType mtype = MATSEQDENSE; 1272 1273 PetscFunctionBegin; 1274 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1275 PetscCall(PetscLayoutSetUp(mat->rmap)); 1276 PetscCall(PetscLayoutSetUp(mat->cmap)); 1277 if (!a->A) { 1278 PetscCall(MatCreate(PETSC_COMM_SELF, &a->A)); 1279 PetscCall(MatSetSizes(a->A, mat->rmap->n, mat->cmap->N, mat->rmap->n, mat->cmap->N)); 1280 } 1281 #if defined(PETSC_HAVE_CUDA) 1282 PetscBool iscuda; 1283 PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPIDENSECUDA, &iscuda)); 1284 if (iscuda) mtype = MATSEQDENSECUDA; 1285 #endif 1286 #if defined(PETSC_HAVE_HIP) 1287 PetscBool iship; 1288 PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPIDENSEHIP, &iship)); 1289 if (iship) mtype = MATSEQDENSEHIP; 1290 #endif 1291 PetscCall(MatSetType(a->A, mtype)); 1292 PetscCall(MatSeqDenseSetPreallocation(a->A, data)); 1293 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 1294 mat->offloadmask = a->A->offloadmask; 1295 #endif 1296 mat->preallocated = PETSC_TRUE; 1297 mat->assembled = PETSC_TRUE; 1298 PetscFunctionReturn(PETSC_SUCCESS); 1299 } 1300 1301 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIDense(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 1302 { 1303 Mat B, C; 1304 1305 PetscFunctionBegin; 1306 PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &C)); 1307 PetscCall(MatConvert_SeqAIJ_SeqDense(C, MATSEQDENSE, MAT_INITIAL_MATRIX, &B)); 1308 PetscCall(MatDestroy(&C)); 1309 if (reuse == MAT_REUSE_MATRIX) { 1310 C = *newmat; 1311 } else C = NULL; 1312 PetscCall(MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A), B, A->cmap->n, !C ? MAT_INITIAL_MATRIX : MAT_REUSE_MATRIX, &C)); 1313 PetscCall(MatDestroy(&B)); 1314 if (reuse == MAT_INPLACE_MATRIX) { 1315 PetscCall(MatHeaderReplace(A, &C)); 1316 } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C; 1317 PetscFunctionReturn(PETSC_SUCCESS); 1318 } 1319 1320 PetscErrorCode MatConvert_MPIDense_MPIAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 1321 { 1322 Mat B, C; 1323 1324 PetscFunctionBegin; 1325 PetscCall(MatDenseGetLocalMatrix(A, &C)); 1326 PetscCall(MatConvert_SeqDense_SeqAIJ(C, MATSEQAIJ, MAT_INITIAL_MATRIX, &B)); 1327 if (reuse == MAT_REUSE_MATRIX) { 1328 C = *newmat; 1329 } else C = NULL; 1330 PetscCall(MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A), B, A->cmap->n, !C ? MAT_INITIAL_MATRIX : MAT_REUSE_MATRIX, &C)); 1331 PetscCall(MatDestroy(&B)); 1332 if (reuse == MAT_INPLACE_MATRIX) { 1333 PetscCall(MatHeaderReplace(A, &C)); 1334 } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C; 1335 PetscFunctionReturn(PETSC_SUCCESS); 1336 } 1337 1338 #if defined(PETSC_HAVE_ELEMENTAL) 1339 PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 1340 { 1341 Mat mat_elemental; 1342 PetscScalar *v; 1343 PetscInt m = A->rmap->n, N = A->cmap->N, rstart = A->rmap->rstart, i, *rows, *cols; 1344 1345 PetscFunctionBegin; 1346 if (reuse == MAT_REUSE_MATRIX) { 1347 mat_elemental = *newmat; 1348 PetscCall(MatZeroEntries(*newmat)); 1349 } else { 1350 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental)); 1351 PetscCall(MatSetSizes(mat_elemental, PETSC_DECIDE, PETSC_DECIDE, A->rmap->N, A->cmap->N)); 1352 PetscCall(MatSetType(mat_elemental, MATELEMENTAL)); 1353 PetscCall(MatSetUp(mat_elemental)); 1354 PetscCall(MatSetOption(mat_elemental, MAT_ROW_ORIENTED, PETSC_FALSE)); 1355 } 1356 1357 PetscCall(PetscMalloc2(m, &rows, N, &cols)); 1358 for (i = 0; i < N; i++) cols[i] = i; 1359 for (i = 0; i < m; i++) rows[i] = rstart + i; 1360 1361 /* PETSc-Elemental interface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */ 1362 PetscCall(MatDenseGetArray(A, &v)); 1363 PetscCall(MatSetValues(mat_elemental, m, rows, N, cols, v, ADD_VALUES)); 1364 PetscCall(MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY)); 1365 PetscCall(MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY)); 1366 PetscCall(MatDenseRestoreArray(A, &v)); 1367 PetscCall(PetscFree2(rows, cols)); 1368 1369 if (reuse == MAT_INPLACE_MATRIX) { 1370 PetscCall(MatHeaderReplace(A, &mat_elemental)); 1371 } else { 1372 *newmat = mat_elemental; 1373 } 1374 PetscFunctionReturn(PETSC_SUCCESS); 1375 } 1376 #endif 1377 1378 static PetscErrorCode MatDenseGetColumn_MPIDense(Mat A, PetscInt col, PetscScalar **vals) 1379 { 1380 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 1381 1382 PetscFunctionBegin; 1383 PetscCall(MatDenseGetColumn(mat->A, col, vals)); 1384 PetscFunctionReturn(PETSC_SUCCESS); 1385 } 1386 1387 static PetscErrorCode MatDenseRestoreColumn_MPIDense(Mat A, PetscScalar **vals) 1388 { 1389 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 1390 1391 PetscFunctionBegin; 1392 PetscCall(MatDenseRestoreColumn(mat->A, vals)); 1393 PetscFunctionReturn(PETSC_SUCCESS); 1394 } 1395 1396 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIDense(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat) 1397 { 1398 Mat_MPIDense *mat; 1399 PetscInt m, nloc, N; 1400 1401 PetscFunctionBegin; 1402 PetscCall(MatGetSize(inmat, &m, &N)); 1403 PetscCall(MatGetLocalSize(inmat, NULL, &nloc)); 1404 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 1405 PetscInt sum; 1406 1407 if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnership(comm, &n, &N)); 1408 /* Check sum(n) = N */ 1409 PetscCall(MPIU_Allreduce(&n, &sum, 1, MPIU_INT, MPI_SUM, comm)); 1410 PetscCheck(sum == N, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT, sum, N); 1411 1412 PetscCall(MatCreateDense(comm, m, n, PETSC_DETERMINE, N, NULL, outmat)); 1413 PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 1414 } 1415 1416 /* numeric phase */ 1417 mat = (Mat_MPIDense *)(*outmat)->data; 1418 PetscCall(MatCopy(inmat, mat->A, SAME_NONZERO_PATTERN)); 1419 PetscFunctionReturn(PETSC_SUCCESS); 1420 } 1421 1422 PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat A, PetscInt col, Vec *v) 1423 { 1424 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1425 PetscInt lda; 1426 1427 PetscFunctionBegin; 1428 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1429 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1430 if (!a->cvec) PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, &a->cvec)); 1431 a->vecinuse = col + 1; 1432 PetscCall(MatDenseGetLDA(a->A, &lda)); 1433 PetscCall(MatDenseGetArray(a->A, (PetscScalar **)&a->ptrinuse)); 1434 PetscCall(VecPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda)); 1435 *v = a->cvec; 1436 PetscFunctionReturn(PETSC_SUCCESS); 1437 } 1438 1439 PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat A, PetscInt col, Vec *v) 1440 { 1441 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1442 1443 PetscFunctionBegin; 1444 PetscCheck(a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first"); 1445 PetscCheck(a->cvec, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing internal column vector"); 1446 a->vecinuse = 0; 1447 PetscCall(MatDenseRestoreArray(a->A, (PetscScalar **)&a->ptrinuse)); 1448 PetscCall(VecResetArray(a->cvec)); 1449 if (v) *v = NULL; 1450 PetscFunctionReturn(PETSC_SUCCESS); 1451 } 1452 1453 PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat A, PetscInt col, Vec *v) 1454 { 1455 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1456 PetscInt lda; 1457 1458 PetscFunctionBegin; 1459 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1460 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1461 if (!a->cvec) PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, &a->cvec)); 1462 a->vecinuse = col + 1; 1463 PetscCall(MatDenseGetLDA(a->A, &lda)); 1464 PetscCall(MatDenseGetArrayRead(a->A, &a->ptrinuse)); 1465 PetscCall(VecPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda)); 1466 PetscCall(VecLockReadPush(a->cvec)); 1467 *v = a->cvec; 1468 PetscFunctionReturn(PETSC_SUCCESS); 1469 } 1470 1471 PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat A, PetscInt col, Vec *v) 1472 { 1473 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1474 1475 PetscFunctionBegin; 1476 PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first"); 1477 PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector"); 1478 a->vecinuse = 0; 1479 PetscCall(MatDenseRestoreArrayRead(a->A, &a->ptrinuse)); 1480 PetscCall(VecLockReadPop(a->cvec)); 1481 PetscCall(VecResetArray(a->cvec)); 1482 if (v) *v = NULL; 1483 PetscFunctionReturn(PETSC_SUCCESS); 1484 } 1485 1486 PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat A, PetscInt col, Vec *v) 1487 { 1488 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1489 PetscInt lda; 1490 1491 PetscFunctionBegin; 1492 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1493 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1494 if (!a->cvec) PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, &a->cvec)); 1495 a->vecinuse = col + 1; 1496 PetscCall(MatDenseGetLDA(a->A, &lda)); 1497 PetscCall(MatDenseGetArrayWrite(a->A, (PetscScalar **)&a->ptrinuse)); 1498 PetscCall(VecPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda)); 1499 *v = a->cvec; 1500 PetscFunctionReturn(PETSC_SUCCESS); 1501 } 1502 1503 PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat A, PetscInt col, Vec *v) 1504 { 1505 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1506 1507 PetscFunctionBegin; 1508 PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first"); 1509 PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector"); 1510 a->vecinuse = 0; 1511 PetscCall(MatDenseRestoreArrayWrite(a->A, (PetscScalar **)&a->ptrinuse)); 1512 PetscCall(VecResetArray(a->cvec)); 1513 if (v) *v = NULL; 1514 PetscFunctionReturn(PETSC_SUCCESS); 1515 } 1516 1517 PetscErrorCode MatDenseGetSubMatrix_MPIDense(Mat A, PetscInt rbegin, PetscInt rend, PetscInt cbegin, PetscInt cend, Mat *v) 1518 { 1519 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1520 Mat_MPIDense *c; 1521 MPI_Comm comm; 1522 PetscInt pbegin, pend; 1523 1524 PetscFunctionBegin; 1525 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 1526 PetscCheck(!a->vecinuse, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1527 PetscCheck(!a->matinuse, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1528 pbegin = PetscMax(0, PetscMin(A->rmap->rend, rbegin) - A->rmap->rstart); 1529 pend = PetscMin(A->rmap->n, PetscMax(0, rend - A->rmap->rstart)); 1530 if (!a->cmat) { 1531 PetscCall(MatCreate(comm, &a->cmat)); 1532 PetscCall(MatSetType(a->cmat, ((PetscObject)A)->type_name)); 1533 if (rend - rbegin == A->rmap->N) PetscCall(PetscLayoutReference(A->rmap, &a->cmat->rmap)); 1534 else { 1535 PetscCall(PetscLayoutSetLocalSize(a->cmat->rmap, pend - pbegin)); 1536 PetscCall(PetscLayoutSetSize(a->cmat->rmap, rend - rbegin)); 1537 PetscCall(PetscLayoutSetUp(a->cmat->rmap)); 1538 } 1539 PetscCall(PetscLayoutSetSize(a->cmat->cmap, cend - cbegin)); 1540 PetscCall(PetscLayoutSetUp(a->cmat->cmap)); 1541 } else { 1542 PetscBool same = (PetscBool)(rend - rbegin == a->cmat->rmap->N); 1543 if (same && a->cmat->rmap->N != A->rmap->N) { 1544 same = (PetscBool)(pend - pbegin == a->cmat->rmap->n); 1545 PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &same, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A))); 1546 } 1547 if (!same) { 1548 PetscCall(PetscLayoutDestroy(&a->cmat->rmap)); 1549 PetscCall(PetscLayoutCreate(comm, &a->cmat->rmap)); 1550 PetscCall(PetscLayoutSetLocalSize(a->cmat->rmap, pend - pbegin)); 1551 PetscCall(PetscLayoutSetSize(a->cmat->rmap, rend - rbegin)); 1552 PetscCall(PetscLayoutSetUp(a->cmat->rmap)); 1553 } 1554 if (cend - cbegin != a->cmat->cmap->N) { 1555 PetscCall(PetscLayoutDestroy(&a->cmat->cmap)); 1556 PetscCall(PetscLayoutCreate(comm, &a->cmat->cmap)); 1557 PetscCall(PetscLayoutSetSize(a->cmat->cmap, cend - cbegin)); 1558 PetscCall(PetscLayoutSetUp(a->cmat->cmap)); 1559 } 1560 } 1561 c = (Mat_MPIDense *)a->cmat->data; 1562 PetscCheck(!c->A, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1563 PetscCall(MatDenseGetSubMatrix(a->A, pbegin, pend, cbegin, cend, &c->A)); 1564 1565 a->cmat->preallocated = PETSC_TRUE; 1566 a->cmat->assembled = PETSC_TRUE; 1567 #if defined(PETSC_HAVE_DEVICE) 1568 a->cmat->offloadmask = c->A->offloadmask; 1569 #endif 1570 a->matinuse = cbegin + 1; 1571 *v = a->cmat; 1572 PetscFunctionReturn(PETSC_SUCCESS); 1573 } 1574 1575 PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat A, Mat *v) 1576 { 1577 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1578 Mat_MPIDense *c; 1579 1580 PetscFunctionBegin; 1581 PetscCheck(a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetSubMatrix() first"); 1582 PetscCheck(a->cmat, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal matrix"); 1583 PetscCheck(*v == a->cmat, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Not the matrix obtained from MatDenseGetSubMatrix()"); 1584 a->matinuse = 0; 1585 c = (Mat_MPIDense *)a->cmat->data; 1586 PetscCall(MatDenseRestoreSubMatrix(a->A, &c->A)); 1587 if (v) *v = NULL; 1588 #if defined(PETSC_HAVE_DEVICE) 1589 A->offloadmask = a->A->offloadmask; 1590 #endif 1591 PetscFunctionReturn(PETSC_SUCCESS); 1592 } 1593 1594 /*MC 1595 MATMPIDENSE - MATMPIDENSE = "mpidense" - A matrix type to be used for distributed dense matrices. 1596 1597 Options Database Key: 1598 . -mat_type mpidense - sets the matrix type to `MATMPIDENSE` during a call to `MatSetFromOptions()` 1599 1600 Level: beginner 1601 1602 .seealso: [](chapter_matrices), `Mat`, `MatCreateDense()`, `MATSEQDENSE`, `MATDENSE` 1603 M*/ 1604 PetscErrorCode MatCreate_MPIDense(Mat mat) 1605 { 1606 Mat_MPIDense *a; 1607 1608 PetscFunctionBegin; 1609 PetscCall(PetscNew(&a)); 1610 mat->data = (void *)a; 1611 PetscCall(PetscMemcpy(mat->ops, &MatOps_Values, sizeof(struct _MatOps))); 1612 1613 mat->insertmode = NOT_SET_VALUES; 1614 1615 /* build cache for off array entries formed */ 1616 a->donotstash = PETSC_FALSE; 1617 1618 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)mat), 1, &mat->stash)); 1619 1620 /* stuff used for matrix vector multiply */ 1621 a->lvec = NULL; 1622 a->Mvctx = NULL; 1623 a->roworiented = PETSC_TRUE; 1624 1625 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetLDA_C", MatDenseGetLDA_MPIDense)); 1626 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseSetLDA_C", MatDenseSetLDA_MPIDense)); 1627 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArray_C", MatDenseGetArray_MPIDense)); 1628 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArray_C", MatDenseRestoreArray_MPIDense)); 1629 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayRead_C", MatDenseGetArrayRead_MPIDense)); 1630 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayRead_C", MatDenseRestoreArrayRead_MPIDense)); 1631 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayWrite_C", MatDenseGetArrayWrite_MPIDense)); 1632 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayWrite_C", MatDenseRestoreArrayWrite_MPIDense)); 1633 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDensePlaceArray_C", MatDensePlaceArray_MPIDense)); 1634 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseResetArray_C", MatDenseResetArray_MPIDense)); 1635 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseReplaceArray_C", MatDenseReplaceArray_MPIDense)); 1636 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", MatDenseGetColumnVec_MPIDense)); 1637 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", MatDenseRestoreColumnVec_MPIDense)); 1638 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", MatDenseGetColumnVecRead_MPIDense)); 1639 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", MatDenseRestoreColumnVecRead_MPIDense)); 1640 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", MatDenseGetColumnVecWrite_MPIDense)); 1641 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", MatDenseRestoreColumnVecWrite_MPIDense)); 1642 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetSubMatrix_C", MatDenseGetSubMatrix_MPIDense)); 1643 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreSubMatrix_C", MatDenseRestoreSubMatrix_MPIDense)); 1644 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", MatConvert_MPIAIJ_MPIDense)); 1645 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpiaij_C", MatConvert_MPIDense_MPIAIJ)); 1646 #if defined(PETSC_HAVE_ELEMENTAL) 1647 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_elemental_C", MatConvert_MPIDense_Elemental)); 1648 #endif 1649 #if defined(PETSC_HAVE_SCALAPACK) 1650 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_scalapack_C", MatConvert_Dense_ScaLAPACK)); 1651 #endif 1652 #if defined(PETSC_HAVE_CUDA) 1653 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensecuda_C", MatConvert_MPIDense_MPIDenseCUDA)); 1654 #endif 1655 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIDenseSetPreallocation_C", MatMPIDenseSetPreallocation_MPIDense)); 1656 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense)); 1657 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaij_C", MatProductSetFromOptions_MPIDense_MPIAIJ)); 1658 #if defined(PETSC_HAVE_CUDA) 1659 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense)); 1660 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijcusparse_C", MatProductSetFromOptions_MPIDense_MPIAIJ)); 1661 #endif 1662 #if defined(PETSC_HAVE_HIP) 1663 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensehip_C", MatConvert_MPIDense_MPIDenseHIP)); 1664 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense)); 1665 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijhipsparse_C", MatProductSetFromOptions_MPIDense_MPIAIJ)); 1666 #endif 1667 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumn_C", MatDenseGetColumn_MPIDense)); 1668 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumn_C", MatDenseRestoreColumn_MPIDense)); 1669 PetscCall(PetscObjectChangeTypeName((PetscObject)mat, MATMPIDENSE)); 1670 PetscFunctionReturn(PETSC_SUCCESS); 1671 } 1672 1673 /*MC 1674 MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices. 1675 1676 This matrix type is identical to `MATSEQDENSE` when constructed with a single process communicator, 1677 and `MATMPIDENSE` otherwise. 1678 1679 Options Database Key: 1680 . -mat_type dense - sets the matrix type to `MATDENSE` during a call to `MatSetFromOptions()` 1681 1682 Level: beginner 1683 1684 .seealso: [](chapter_matrices), `Mat`, `MATSEQDENSE`, `MATMPIDENSE`, `MATDENSECUDA`, `MATDENSEHIP` 1685 M*/ 1686 1687 /*@C 1688 MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries 1689 1690 Collective 1691 1692 Input Parameters: 1693 . B - the matrix 1694 - data - optional location of matrix data. Set to `NULL` for PETSc 1695 to control all matrix memory allocation. 1696 1697 Level: intermediate 1698 1699 Notes: 1700 The dense format is fully compatible with standard Fortran 1701 storage by columns. 1702 1703 The data input variable is intended primarily for Fortran programmers 1704 who wish to allocate their own matrix memory space. Most users should 1705 set `data` to `NULL`. 1706 1707 .seealso: [](chapter_matrices), `Mat`, `MATMPIDENSE`, `MatCreate()`, `MatCreateSeqDense()`, `MatSetValues()` 1708 @*/ 1709 PetscErrorCode MatMPIDenseSetPreallocation(Mat B, PetscScalar *data) 1710 { 1711 PetscFunctionBegin; 1712 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 1713 PetscTryMethod(B, "MatMPIDenseSetPreallocation_C", (Mat, PetscScalar *), (B, data)); 1714 PetscFunctionReturn(PETSC_SUCCESS); 1715 } 1716 1717 /*@ 1718 MatDensePlaceArray - Allows one to replace the array in a `MATDENSE` matrix with an 1719 array provided by the user. This is useful to avoid copying an array 1720 into a matrix 1721 1722 Not Collective 1723 1724 Input Parameters: 1725 + mat - the matrix 1726 - array - the array in column major order 1727 1728 Level: developer 1729 1730 Note: 1731 You can return to the original array with a call to `MatDenseResetArray()`. The user is responsible for freeing this array; it will not be 1732 freed when the matrix is destroyed. 1733 1734 .seealso: [](chapter_matrices), `Mat`, `MATDENSE`, `MatDenseGetArray()`, `MatDenseResetArray()`, `VecPlaceArray()`, `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`, `VecResetArray()`, 1735 `MatDenseReplaceArray()` 1736 @*/ 1737 PetscErrorCode MatDensePlaceArray(Mat mat, const PetscScalar *array) 1738 { 1739 PetscFunctionBegin; 1740 PetscValidHeaderSpecific(mat, MAT_CLASSID, 1); 1741 PetscUseMethod(mat, "MatDensePlaceArray_C", (Mat, const PetscScalar *), (mat, array)); 1742 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 1743 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 1744 mat->offloadmask = PETSC_OFFLOAD_CPU; 1745 #endif 1746 PetscFunctionReturn(PETSC_SUCCESS); 1747 } 1748 1749 /*@ 1750 MatDenseResetArray - Resets the matrix array to that it previously had before the call to `MatDensePlaceArray()` 1751 1752 Not Collective 1753 1754 Input Parameters: 1755 . mat - the matrix 1756 1757 Level: developer 1758 1759 Note: 1760 You can only call this after a call to `MatDensePlaceArray()` 1761 1762 .seealso: [](chapter_matrices), `Mat`, `MATDENSE`, `MatDenseGetArray()`, `MatDensePlaceArray()`, `VecPlaceArray()`, `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`, `VecResetArray()` 1763 @*/ 1764 PetscErrorCode MatDenseResetArray(Mat mat) 1765 { 1766 PetscFunctionBegin; 1767 PetscValidHeaderSpecific(mat, MAT_CLASSID, 1); 1768 PetscUseMethod(mat, "MatDenseResetArray_C", (Mat), (mat)); 1769 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 1770 PetscFunctionReturn(PETSC_SUCCESS); 1771 } 1772 1773 /*@ 1774 MatDenseReplaceArray - Allows one to replace the array in a dense matrix with an 1775 array provided by the user. This is useful to avoid copying an array 1776 into a matrix 1777 1778 Not Collective 1779 1780 Input Parameters: 1781 + mat - the matrix 1782 - array - the array in column major order 1783 1784 Level: developer 1785 1786 Note: 1787 The memory passed in MUST be obtained with `PetscMalloc()` and CANNOT be 1788 freed by the user. It will be freed when the matrix is destroyed. 1789 1790 .seealso: [](chapter_matrices), `Mat`, `MatDensePlaceArray()`, `MatDenseGetArray()`, `VecReplaceArray()` 1791 @*/ 1792 PetscErrorCode MatDenseReplaceArray(Mat mat, const PetscScalar *array) 1793 { 1794 PetscFunctionBegin; 1795 PetscValidHeaderSpecific(mat, MAT_CLASSID, 1); 1796 PetscUseMethod(mat, "MatDenseReplaceArray_C", (Mat, const PetscScalar *), (mat, array)); 1797 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 1798 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 1799 mat->offloadmask = PETSC_OFFLOAD_CPU; 1800 #endif 1801 PetscFunctionReturn(PETSC_SUCCESS); 1802 } 1803 1804 /*@C 1805 MatCreateDense - Creates a matrix in `MATDENSE` format. 1806 1807 Collective 1808 1809 Input Parameters: 1810 + comm - MPI communicator 1811 . m - number of local rows (or `PETSC_DECIDE` to have calculated if `M` is given) 1812 . n - number of local columns (or `PETSC_DECIDE` to have calculated if `N` is given) 1813 . M - number of global rows (or `PETSC_DECIDE` to have calculated if `m` is given) 1814 . N - number of global columns (or `PETSC_DECIDE` to have calculated if `n` is given) 1815 - data - optional location of matrix data. Set data to `NULL` (`PETSC_NULL_SCALAR` for Fortran users) for PETSc 1816 to control all matrix memory allocation. 1817 1818 Output Parameter: 1819 . A - the matrix 1820 1821 Level: intermediate 1822 1823 Notes: 1824 The dense format is fully compatible with standard Fortran 1825 storage by columns. 1826 1827 Although local portions of the matrix are stored in column-major 1828 order, the matrix is partitioned across MPI ranks by row. 1829 1830 The data input variable is intended primarily for Fortran programmers 1831 who wish to allocate their own matrix memory space. Most users should 1832 set `data` to `NULL` (`PETSC_NULL_SCALAR` for Fortran users). 1833 1834 The user MUST specify either the local or global matrix dimensions 1835 (possibly both). 1836 1837 .seealso: [](chapter_matrices), `Mat`, `MATDENSE`, `MatCreate()`, `MatCreateSeqDense()`, `MatSetValues()` 1838 @*/ 1839 PetscErrorCode MatCreateDense(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscScalar *data, Mat *A) 1840 { 1841 PetscFunctionBegin; 1842 PetscCall(MatCreate(comm, A)); 1843 PetscCall(MatSetSizes(*A, m, n, M, N)); 1844 PetscCall(MatSetType(*A, MATDENSE)); 1845 PetscCall(MatSeqDenseSetPreallocation(*A, data)); 1846 PetscCall(MatMPIDenseSetPreallocation(*A, data)); 1847 PetscFunctionReturn(PETSC_SUCCESS); 1848 } 1849 1850 static PetscErrorCode MatDuplicate_MPIDense(Mat A, MatDuplicateOption cpvalues, Mat *newmat) 1851 { 1852 Mat mat; 1853 Mat_MPIDense *a, *oldmat = (Mat_MPIDense *)A->data; 1854 1855 PetscFunctionBegin; 1856 *newmat = NULL; 1857 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat)); 1858 PetscCall(MatSetSizes(mat, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 1859 PetscCall(MatSetType(mat, ((PetscObject)A)->type_name)); 1860 a = (Mat_MPIDense *)mat->data; 1861 1862 mat->factortype = A->factortype; 1863 mat->assembled = PETSC_TRUE; 1864 mat->preallocated = PETSC_TRUE; 1865 1866 mat->insertmode = NOT_SET_VALUES; 1867 a->donotstash = oldmat->donotstash; 1868 1869 PetscCall(PetscLayoutReference(A->rmap, &mat->rmap)); 1870 PetscCall(PetscLayoutReference(A->cmap, &mat->cmap)); 1871 1872 PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A)); 1873 1874 *newmat = mat; 1875 PetscFunctionReturn(PETSC_SUCCESS); 1876 } 1877 1878 PetscErrorCode MatLoad_MPIDense(Mat newMat, PetscViewer viewer) 1879 { 1880 PetscBool isbinary; 1881 #if defined(PETSC_HAVE_HDF5) 1882 PetscBool ishdf5; 1883 #endif 1884 1885 PetscFunctionBegin; 1886 PetscValidHeaderSpecific(newMat, MAT_CLASSID, 1); 1887 PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2); 1888 /* force binary viewer to load .info file if it has not yet done so */ 1889 PetscCall(PetscViewerSetUp(viewer)); 1890 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 1891 #if defined(PETSC_HAVE_HDF5) 1892 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5)); 1893 #endif 1894 if (isbinary) { 1895 PetscCall(MatLoad_Dense_Binary(newMat, viewer)); 1896 #if defined(PETSC_HAVE_HDF5) 1897 } else if (ishdf5) { 1898 PetscCall(MatLoad_Dense_HDF5(newMat, viewer)); 1899 #endif 1900 } else SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name); 1901 PetscFunctionReturn(PETSC_SUCCESS); 1902 } 1903 1904 static PetscErrorCode MatEqual_MPIDense(Mat A, Mat B, PetscBool *flag) 1905 { 1906 Mat_MPIDense *matB = (Mat_MPIDense *)B->data, *matA = (Mat_MPIDense *)A->data; 1907 Mat a, b; 1908 1909 PetscFunctionBegin; 1910 a = matA->A; 1911 b = matB->A; 1912 PetscCall(MatEqual(a, b, flag)); 1913 PetscCall(MPIU_Allreduce(MPI_IN_PLACE, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A))); 1914 PetscFunctionReturn(PETSC_SUCCESS); 1915 } 1916 1917 PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(void *data) 1918 { 1919 Mat_TransMatMultDense *atb = (Mat_TransMatMultDense *)data; 1920 1921 PetscFunctionBegin; 1922 PetscCall(PetscFree2(atb->sendbuf, atb->recvcounts)); 1923 PetscCall(MatDestroy(&atb->atb)); 1924 PetscCall(PetscFree(atb)); 1925 PetscFunctionReturn(PETSC_SUCCESS); 1926 } 1927 1928 PetscErrorCode MatDestroy_MatMatTransMult_MPIDense_MPIDense(void *data) 1929 { 1930 Mat_MatTransMultDense *abt = (Mat_MatTransMultDense *)data; 1931 1932 PetscFunctionBegin; 1933 PetscCall(PetscFree2(abt->buf[0], abt->buf[1])); 1934 PetscCall(PetscFree2(abt->recvcounts, abt->recvdispls)); 1935 PetscCall(PetscFree(abt)); 1936 PetscFunctionReturn(PETSC_SUCCESS); 1937 } 1938 1939 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C) 1940 { 1941 Mat_MPIDense *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data; 1942 Mat_TransMatMultDense *atb; 1943 MPI_Comm comm; 1944 PetscMPIInt size, *recvcounts; 1945 PetscScalar *carray, *sendbuf; 1946 const PetscScalar *atbarray; 1947 PetscInt i, cN = C->cmap->N, proc, k, j, lda; 1948 const PetscInt *ranges; 1949 1950 PetscFunctionBegin; 1951 MatCheckProduct(C, 3); 1952 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 1953 atb = (Mat_TransMatMultDense *)C->product->data; 1954 recvcounts = atb->recvcounts; 1955 sendbuf = atb->sendbuf; 1956 1957 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 1958 PetscCallMPI(MPI_Comm_size(comm, &size)); 1959 1960 /* compute atbarray = aseq^T * bseq */ 1961 PetscCall(MatTransposeMatMult(a->A, b->A, atb->atb ? MAT_REUSE_MATRIX : MAT_INITIAL_MATRIX, PETSC_DEFAULT, &atb->atb)); 1962 1963 PetscCall(MatGetOwnershipRanges(C, &ranges)); 1964 1965 /* arrange atbarray into sendbuf */ 1966 PetscCall(MatDenseGetArrayRead(atb->atb, &atbarray)); 1967 PetscCall(MatDenseGetLDA(atb->atb, &lda)); 1968 for (proc = 0, k = 0; proc < size; proc++) { 1969 for (j = 0; j < cN; j++) { 1970 for (i = ranges[proc]; i < ranges[proc + 1]; i++) sendbuf[k++] = atbarray[i + j * lda]; 1971 } 1972 } 1973 PetscCall(MatDenseRestoreArrayRead(atb->atb, &atbarray)); 1974 1975 /* sum all atbarray to local values of C */ 1976 PetscCall(MatDenseGetArrayWrite(c->A, &carray)); 1977 PetscCallMPI(MPI_Reduce_scatter(sendbuf, carray, recvcounts, MPIU_SCALAR, MPIU_SUM, comm)); 1978 PetscCall(MatDenseRestoreArrayWrite(c->A, &carray)); 1979 PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY)); 1980 PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY)); 1981 PetscFunctionReturn(PETSC_SUCCESS); 1982 } 1983 1984 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C) 1985 { 1986 MPI_Comm comm; 1987 PetscMPIInt size; 1988 PetscInt cm = A->cmap->n, cM, cN = B->cmap->N; 1989 Mat_TransMatMultDense *atb; 1990 PetscBool cisdense = PETSC_FALSE; 1991 PetscInt i; 1992 const PetscInt *ranges; 1993 1994 PetscFunctionBegin; 1995 MatCheckProduct(C, 4); 1996 PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty"); 1997 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 1998 if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) { 1999 SETERRQ(comm, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend); 2000 } 2001 2002 /* create matrix product C */ 2003 PetscCall(MatSetSizes(C, cm, B->cmap->n, A->cmap->N, B->cmap->N)); 2004 #if defined(PETSC_HAVE_CUDA) 2005 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSECUDA, "")); 2006 #endif 2007 #if defined(PETSC_HAVE_HIP) 2008 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSEHIP, "")); 2009 #endif 2010 if (!cisdense) PetscCall(MatSetType(C, ((PetscObject)A)->type_name)); 2011 PetscCall(MatSetUp(C)); 2012 2013 /* create data structure for reuse C */ 2014 PetscCallMPI(MPI_Comm_size(comm, &size)); 2015 PetscCall(PetscNew(&atb)); 2016 cM = C->rmap->N; 2017 PetscCall(PetscMalloc2(cM * cN, &atb->sendbuf, size, &atb->recvcounts)); 2018 PetscCall(MatGetOwnershipRanges(C, &ranges)); 2019 for (i = 0; i < size; i++) atb->recvcounts[i] = (ranges[i + 1] - ranges[i]) * cN; 2020 2021 C->product->data = atb; 2022 C->product->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense; 2023 PetscFunctionReturn(PETSC_SUCCESS); 2024 } 2025 2026 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C) 2027 { 2028 MPI_Comm comm; 2029 PetscMPIInt i, size; 2030 PetscInt maxRows, bufsiz; 2031 PetscMPIInt tag; 2032 PetscInt alg; 2033 Mat_MatTransMultDense *abt; 2034 Mat_Product *product = C->product; 2035 PetscBool flg; 2036 2037 PetscFunctionBegin; 2038 MatCheckProduct(C, 4); 2039 PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty"); 2040 /* check local size of A and B */ 2041 PetscCheck(A->cmap->n == B->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local column dimensions are incompatible, A (%" PetscInt_FMT ") != B (%" PetscInt_FMT ")", A->cmap->n, B->cmap->n); 2042 2043 PetscCall(PetscStrcmp(product->alg, "allgatherv", &flg)); 2044 alg = flg ? 0 : 1; 2045 2046 /* setup matrix product C */ 2047 PetscCall(MatSetSizes(C, A->rmap->n, B->rmap->n, A->rmap->N, B->rmap->N)); 2048 PetscCall(MatSetType(C, MATMPIDENSE)); 2049 PetscCall(MatSetUp(C)); 2050 PetscCall(PetscObjectGetNewTag((PetscObject)C, &tag)); 2051 2052 /* create data structure for reuse C */ 2053 PetscCall(PetscObjectGetComm((PetscObject)C, &comm)); 2054 PetscCallMPI(MPI_Comm_size(comm, &size)); 2055 PetscCall(PetscNew(&abt)); 2056 abt->tag = tag; 2057 abt->alg = alg; 2058 switch (alg) { 2059 case 1: /* alg: "cyclic" */ 2060 for (maxRows = 0, i = 0; i < size; i++) maxRows = PetscMax(maxRows, (B->rmap->range[i + 1] - B->rmap->range[i])); 2061 bufsiz = A->cmap->N * maxRows; 2062 PetscCall(PetscMalloc2(bufsiz, &(abt->buf[0]), bufsiz, &(abt->buf[1]))); 2063 break; 2064 default: /* alg: "allgatherv" */ 2065 PetscCall(PetscMalloc2(B->rmap->n * B->cmap->N, &(abt->buf[0]), B->rmap->N * B->cmap->N, &(abt->buf[1]))); 2066 PetscCall(PetscMalloc2(size, &(abt->recvcounts), size + 1, &(abt->recvdispls))); 2067 for (i = 0; i <= size; i++) abt->recvdispls[i] = B->rmap->range[i] * A->cmap->N; 2068 for (i = 0; i < size; i++) abt->recvcounts[i] = abt->recvdispls[i + 1] - abt->recvdispls[i]; 2069 break; 2070 } 2071 2072 C->product->data = abt; 2073 C->product->destroy = MatDestroy_MatMatTransMult_MPIDense_MPIDense; 2074 C->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_MPIDense_MPIDense; 2075 PetscFunctionReturn(PETSC_SUCCESS); 2076 } 2077 2078 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(Mat A, Mat B, Mat C) 2079 { 2080 Mat_MPIDense *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data; 2081 Mat_MatTransMultDense *abt; 2082 MPI_Comm comm; 2083 PetscMPIInt rank, size, sendsiz, recvsiz, sendto, recvfrom, recvisfrom; 2084 PetscScalar *sendbuf, *recvbuf = NULL, *cv; 2085 PetscInt i, cK = A->cmap->N, k, j, bn; 2086 PetscScalar _DOne = 1.0, _DZero = 0.0; 2087 const PetscScalar *av, *bv; 2088 PetscBLASInt cm, cn, ck, alda, blda = 0, clda; 2089 MPI_Request reqs[2]; 2090 const PetscInt *ranges; 2091 2092 PetscFunctionBegin; 2093 MatCheckProduct(C, 3); 2094 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 2095 abt = (Mat_MatTransMultDense *)C->product->data; 2096 PetscCall(PetscObjectGetComm((PetscObject)C, &comm)); 2097 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 2098 PetscCallMPI(MPI_Comm_size(comm, &size)); 2099 PetscCall(MatDenseGetArrayRead(a->A, &av)); 2100 PetscCall(MatDenseGetArrayRead(b->A, &bv)); 2101 PetscCall(MatDenseGetArrayWrite(c->A, &cv)); 2102 PetscCall(MatDenseGetLDA(a->A, &i)); 2103 PetscCall(PetscBLASIntCast(i, &alda)); 2104 PetscCall(MatDenseGetLDA(b->A, &i)); 2105 PetscCall(PetscBLASIntCast(i, &blda)); 2106 PetscCall(MatDenseGetLDA(c->A, &i)); 2107 PetscCall(PetscBLASIntCast(i, &clda)); 2108 PetscCall(MatGetOwnershipRanges(B, &ranges)); 2109 bn = B->rmap->n; 2110 if (blda == bn) { 2111 sendbuf = (PetscScalar *)bv; 2112 } else { 2113 sendbuf = abt->buf[0]; 2114 for (k = 0, i = 0; i < cK; i++) { 2115 for (j = 0; j < bn; j++, k++) sendbuf[k] = bv[i * blda + j]; 2116 } 2117 } 2118 if (size > 1) { 2119 sendto = (rank + size - 1) % size; 2120 recvfrom = (rank + size + 1) % size; 2121 } else { 2122 sendto = recvfrom = 0; 2123 } 2124 PetscCall(PetscBLASIntCast(cK, &ck)); 2125 PetscCall(PetscBLASIntCast(c->A->rmap->n, &cm)); 2126 recvisfrom = rank; 2127 for (i = 0; i < size; i++) { 2128 /* we have finished receiving in sending, bufs can be read/modified */ 2129 PetscInt nextrecvisfrom = (recvisfrom + 1) % size; /* which process the next recvbuf will originate on */ 2130 PetscInt nextbn = ranges[nextrecvisfrom + 1] - ranges[nextrecvisfrom]; 2131 2132 if (nextrecvisfrom != rank) { 2133 /* start the cyclic sends from sendbuf, to recvbuf (which will switch to sendbuf) */ 2134 sendsiz = cK * bn; 2135 recvsiz = cK * nextbn; 2136 recvbuf = (i & 1) ? abt->buf[0] : abt->buf[1]; 2137 PetscCallMPI(MPI_Isend(sendbuf, sendsiz, MPIU_SCALAR, sendto, abt->tag, comm, &reqs[0])); 2138 PetscCallMPI(MPI_Irecv(recvbuf, recvsiz, MPIU_SCALAR, recvfrom, abt->tag, comm, &reqs[1])); 2139 } 2140 2141 /* local aseq * sendbuf^T */ 2142 PetscCall(PetscBLASIntCast(ranges[recvisfrom + 1] - ranges[recvisfrom], &cn)); 2143 if (cm && cn && ck) PetscCallBLAS("BLASgemm", BLASgemm_("N", "T", &cm, &cn, &ck, &_DOne, av, &alda, sendbuf, &cn, &_DZero, cv + clda * ranges[recvisfrom], &clda)); 2144 2145 if (nextrecvisfrom != rank) { 2146 /* wait for the sends and receives to complete, swap sendbuf and recvbuf */ 2147 PetscCallMPI(MPI_Waitall(2, reqs, MPI_STATUSES_IGNORE)); 2148 } 2149 bn = nextbn; 2150 recvisfrom = nextrecvisfrom; 2151 sendbuf = recvbuf; 2152 } 2153 PetscCall(MatDenseRestoreArrayRead(a->A, &av)); 2154 PetscCall(MatDenseRestoreArrayRead(b->A, &bv)); 2155 PetscCall(MatDenseRestoreArrayWrite(c->A, &cv)); 2156 PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY)); 2157 PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY)); 2158 PetscFunctionReturn(PETSC_SUCCESS); 2159 } 2160 2161 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(Mat A, Mat B, Mat C) 2162 { 2163 Mat_MPIDense *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data; 2164 Mat_MatTransMultDense *abt; 2165 MPI_Comm comm; 2166 PetscMPIInt size; 2167 PetscScalar *cv, *sendbuf, *recvbuf; 2168 const PetscScalar *av, *bv; 2169 PetscInt blda, i, cK = A->cmap->N, k, j, bn; 2170 PetscScalar _DOne = 1.0, _DZero = 0.0; 2171 PetscBLASInt cm, cn, ck, alda, clda; 2172 2173 PetscFunctionBegin; 2174 MatCheckProduct(C, 3); 2175 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 2176 abt = (Mat_MatTransMultDense *)C->product->data; 2177 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 2178 PetscCallMPI(MPI_Comm_size(comm, &size)); 2179 PetscCall(MatDenseGetArrayRead(a->A, &av)); 2180 PetscCall(MatDenseGetArrayRead(b->A, &bv)); 2181 PetscCall(MatDenseGetArrayWrite(c->A, &cv)); 2182 PetscCall(MatDenseGetLDA(a->A, &i)); 2183 PetscCall(PetscBLASIntCast(i, &alda)); 2184 PetscCall(MatDenseGetLDA(b->A, &blda)); 2185 PetscCall(MatDenseGetLDA(c->A, &i)); 2186 PetscCall(PetscBLASIntCast(i, &clda)); 2187 /* copy transpose of B into buf[0] */ 2188 bn = B->rmap->n; 2189 sendbuf = abt->buf[0]; 2190 recvbuf = abt->buf[1]; 2191 for (k = 0, j = 0; j < bn; j++) { 2192 for (i = 0; i < cK; i++, k++) sendbuf[k] = bv[i * blda + j]; 2193 } 2194 PetscCall(MatDenseRestoreArrayRead(b->A, &bv)); 2195 PetscCallMPI(MPI_Allgatherv(sendbuf, bn * cK, MPIU_SCALAR, recvbuf, abt->recvcounts, abt->recvdispls, MPIU_SCALAR, comm)); 2196 PetscCall(PetscBLASIntCast(cK, &ck)); 2197 PetscCall(PetscBLASIntCast(c->A->rmap->n, &cm)); 2198 PetscCall(PetscBLASIntCast(c->A->cmap->n, &cn)); 2199 if (cm && cn && ck) PetscCallBLAS("BLASgemm", BLASgemm_("N", "N", &cm, &cn, &ck, &_DOne, av, &alda, recvbuf, &ck, &_DZero, cv, &clda)); 2200 PetscCall(MatDenseRestoreArrayRead(a->A, &av)); 2201 PetscCall(MatDenseRestoreArrayRead(b->A, &bv)); 2202 PetscCall(MatDenseRestoreArrayWrite(c->A, &cv)); 2203 PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY)); 2204 PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY)); 2205 PetscFunctionReturn(PETSC_SUCCESS); 2206 } 2207 2208 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C) 2209 { 2210 Mat_MatTransMultDense *abt; 2211 2212 PetscFunctionBegin; 2213 MatCheckProduct(C, 3); 2214 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 2215 abt = (Mat_MatTransMultDense *)C->product->data; 2216 switch (abt->alg) { 2217 case 1: 2218 PetscCall(MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(A, B, C)); 2219 break; 2220 default: 2221 PetscCall(MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(A, B, C)); 2222 break; 2223 } 2224 PetscFunctionReturn(PETSC_SUCCESS); 2225 } 2226 2227 PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(void *data) 2228 { 2229 Mat_MatMultDense *ab = (Mat_MatMultDense *)data; 2230 2231 PetscFunctionBegin; 2232 PetscCall(MatDestroy(&ab->Ce)); 2233 PetscCall(MatDestroy(&ab->Ae)); 2234 PetscCall(MatDestroy(&ab->Be)); 2235 PetscCall(PetscFree(ab)); 2236 PetscFunctionReturn(PETSC_SUCCESS); 2237 } 2238 2239 #if defined(PETSC_HAVE_ELEMENTAL) 2240 PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C) 2241 { 2242 Mat_MatMultDense *ab; 2243 2244 PetscFunctionBegin; 2245 MatCheckProduct(C, 3); 2246 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Missing product data"); 2247 ab = (Mat_MatMultDense *)C->product->data; 2248 PetscCall(MatConvert_MPIDense_Elemental(A, MATELEMENTAL, MAT_REUSE_MATRIX, &ab->Ae)); 2249 PetscCall(MatConvert_MPIDense_Elemental(B, MATELEMENTAL, MAT_REUSE_MATRIX, &ab->Be)); 2250 PetscCall(MatMatMultNumeric_Elemental(ab->Ae, ab->Be, ab->Ce)); 2251 PetscCall(MatConvert(ab->Ce, MATMPIDENSE, MAT_REUSE_MATRIX, &C)); 2252 PetscFunctionReturn(PETSC_SUCCESS); 2253 } 2254 2255 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C) 2256 { 2257 Mat Ae, Be, Ce; 2258 Mat_MatMultDense *ab; 2259 2260 PetscFunctionBegin; 2261 MatCheckProduct(C, 4); 2262 PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty"); 2263 /* check local size of A and B */ 2264 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 2265 SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend); 2266 } 2267 2268 /* create elemental matrices Ae and Be */ 2269 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &Ae)); 2270 PetscCall(MatSetSizes(Ae, PETSC_DECIDE, PETSC_DECIDE, A->rmap->N, A->cmap->N)); 2271 PetscCall(MatSetType(Ae, MATELEMENTAL)); 2272 PetscCall(MatSetUp(Ae)); 2273 PetscCall(MatSetOption(Ae, MAT_ROW_ORIENTED, PETSC_FALSE)); 2274 2275 PetscCall(MatCreate(PetscObjectComm((PetscObject)B), &Be)); 2276 PetscCall(MatSetSizes(Be, PETSC_DECIDE, PETSC_DECIDE, B->rmap->N, B->cmap->N)); 2277 PetscCall(MatSetType(Be, MATELEMENTAL)); 2278 PetscCall(MatSetUp(Be)); 2279 PetscCall(MatSetOption(Be, MAT_ROW_ORIENTED, PETSC_FALSE)); 2280 2281 /* compute symbolic Ce = Ae*Be */ 2282 PetscCall(MatCreate(PetscObjectComm((PetscObject)C), &Ce)); 2283 PetscCall(MatMatMultSymbolic_Elemental(Ae, Be, fill, Ce)); 2284 2285 /* setup C */ 2286 PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, PETSC_DECIDE, PETSC_DECIDE)); 2287 PetscCall(MatSetType(C, MATDENSE)); 2288 PetscCall(MatSetUp(C)); 2289 2290 /* create data structure for reuse Cdense */ 2291 PetscCall(PetscNew(&ab)); 2292 ab->Ae = Ae; 2293 ab->Be = Be; 2294 ab->Ce = Ce; 2295 2296 C->product->data = ab; 2297 C->product->destroy = MatDestroy_MatMatMult_MPIDense_MPIDense; 2298 C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense; 2299 PetscFunctionReturn(PETSC_SUCCESS); 2300 } 2301 #endif 2302 2303 #if defined(PETSC_HAVE_ELEMENTAL) 2304 static PetscErrorCode MatProductSetFromOptions_MPIDense_AB(Mat C) 2305 { 2306 PetscFunctionBegin; 2307 C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIDense; 2308 C->ops->productsymbolic = MatProductSymbolic_AB; 2309 PetscFunctionReturn(PETSC_SUCCESS); 2310 } 2311 #endif 2312 2313 static PetscErrorCode MatProductSetFromOptions_MPIDense_AtB(Mat C) 2314 { 2315 Mat_Product *product = C->product; 2316 Mat A = product->A, B = product->B; 2317 2318 PetscFunctionBegin; 2319 if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) 2320 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend); 2321 C->ops->transposematmultsymbolic = MatTransposeMatMultSymbolic_MPIDense_MPIDense; 2322 C->ops->productsymbolic = MatProductSymbolic_AtB; 2323 PetscFunctionReturn(PETSC_SUCCESS); 2324 } 2325 2326 static PetscErrorCode MatProductSetFromOptions_MPIDense_ABt(Mat C) 2327 { 2328 Mat_Product *product = C->product; 2329 const char *algTypes[2] = {"allgatherv", "cyclic"}; 2330 PetscInt alg, nalg = 2; 2331 PetscBool flg = PETSC_FALSE; 2332 2333 PetscFunctionBegin; 2334 /* Set default algorithm */ 2335 alg = 0; /* default is allgatherv */ 2336 PetscCall(PetscStrcmp(product->alg, "default", &flg)); 2337 if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg])); 2338 2339 /* Get runtime option */ 2340 if (product->api_user) { 2341 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatMatTransposeMult", "Mat"); 2342 PetscCall(PetscOptionsEList("-matmattransmult_mpidense_mpidense_via", "Algorithmic approach", "MatMatTransposeMult", algTypes, nalg, algTypes[alg], &alg, &flg)); 2343 PetscOptionsEnd(); 2344 } else { 2345 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_ABt", "Mat"); 2346 PetscCall(PetscOptionsEList("-mat_product_algorithm", "Algorithmic approach", "MatProduct_ABt", algTypes, nalg, algTypes[alg], &alg, &flg)); 2347 PetscOptionsEnd(); 2348 } 2349 if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg])); 2350 2351 C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_MPIDense_MPIDense; 2352 C->ops->productsymbolic = MatProductSymbolic_ABt; 2353 PetscFunctionReturn(PETSC_SUCCESS); 2354 } 2355 2356 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense(Mat C) 2357 { 2358 Mat_Product *product = C->product; 2359 2360 PetscFunctionBegin; 2361 switch (product->type) { 2362 #if defined(PETSC_HAVE_ELEMENTAL) 2363 case MATPRODUCT_AB: 2364 PetscCall(MatProductSetFromOptions_MPIDense_AB(C)); 2365 break; 2366 #endif 2367 case MATPRODUCT_AtB: 2368 PetscCall(MatProductSetFromOptions_MPIDense_AtB(C)); 2369 break; 2370 case MATPRODUCT_ABt: 2371 PetscCall(MatProductSetFromOptions_MPIDense_ABt(C)); 2372 break; 2373 default: 2374 break; 2375 } 2376 PetscFunctionReturn(PETSC_SUCCESS); 2377 } 2378