1 /* 2 Basic functions for basic parallel dense matrices. 3 Portions of this code are under: 4 Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. 5 */ 6 7 #include <../src/mat/impls/dense/mpi/mpidense.h> /*I "petscmat.h" I*/ 8 #include <../src/mat/impls/aij/mpi/mpiaij.h> 9 #include <petscblaslapack.h> 10 11 /*@ 12 MatDenseGetLocalMatrix - For a `MATMPIDENSE` or `MATSEQDENSE` matrix returns the sequential 13 matrix that represents the operator. For sequential matrices it returns itself. 14 15 Input Parameter: 16 . A - the sequential or MPI `MATDENSE` matrix 17 18 Output Parameter: 19 . B - the inner matrix 20 21 Level: intermediate 22 23 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MATMPIDENSE`, `MATSEQDENSE` 24 @*/ 25 PetscErrorCode MatDenseGetLocalMatrix(Mat A, Mat *B) 26 { 27 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 28 PetscBool flg; 29 30 PetscFunctionBegin; 31 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 32 PetscAssertPointer(B, 2); 33 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIDENSE, &flg)); 34 if (flg) *B = mat->A; 35 else { 36 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQDENSE, &flg)); 37 PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Not for matrix type %s", ((PetscObject)A)->type_name); 38 *B = A; 39 } 40 PetscFunctionReturn(PETSC_SUCCESS); 41 } 42 43 static PetscErrorCode MatCopy_MPIDense(Mat A, Mat B, MatStructure s) 44 { 45 Mat_MPIDense *Amat = (Mat_MPIDense *)A->data; 46 Mat_MPIDense *Bmat = (Mat_MPIDense *)B->data; 47 48 PetscFunctionBegin; 49 PetscCall(MatCopy(Amat->A, Bmat->A, s)); 50 PetscFunctionReturn(PETSC_SUCCESS); 51 } 52 53 PetscErrorCode MatShift_MPIDense(Mat A, PetscScalar alpha) 54 { 55 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 56 PetscInt j, lda, rstart = A->rmap->rstart, rend = A->rmap->rend, rend2; 57 PetscScalar *v; 58 59 PetscFunctionBegin; 60 PetscCall(MatDenseGetArray(mat->A, &v)); 61 PetscCall(MatDenseGetLDA(mat->A, &lda)); 62 rend2 = PetscMin(rend, A->cmap->N); 63 if (rend2 > rstart) { 64 for (j = rstart; j < rend2; j++) v[j - rstart + j * lda] += alpha; 65 PetscCall(PetscLogFlops(rend2 - rstart)); 66 } 67 PetscCall(MatDenseRestoreArray(mat->A, &v)); 68 PetscFunctionReturn(PETSC_SUCCESS); 69 } 70 71 static PetscErrorCode MatGetRow_MPIDense(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 72 { 73 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 74 PetscInt lrow, rstart = A->rmap->rstart, rend = A->rmap->rend; 75 76 PetscFunctionBegin; 77 PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "only local rows"); 78 lrow = row - rstart; 79 PetscCall(MatGetRow(mat->A, lrow, nz, (const PetscInt **)idx, (const PetscScalar **)v)); 80 PetscFunctionReturn(PETSC_SUCCESS); 81 } 82 83 static PetscErrorCode MatRestoreRow_MPIDense(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 84 { 85 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 86 PetscInt lrow, rstart = A->rmap->rstart, rend = A->rmap->rend; 87 88 PetscFunctionBegin; 89 PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "only local rows"); 90 lrow = row - rstart; 91 PetscCall(MatRestoreRow(mat->A, lrow, nz, (const PetscInt **)idx, (const PetscScalar **)v)); 92 PetscFunctionReturn(PETSC_SUCCESS); 93 } 94 95 static PetscErrorCode MatGetDiagonalBlock_MPIDense(Mat A, Mat *a) 96 { 97 Mat_MPIDense *mdn = (Mat_MPIDense *)A->data; 98 PetscInt m = A->rmap->n, rstart = A->rmap->rstart; 99 PetscScalar *array; 100 MPI_Comm comm; 101 PetscBool flg; 102 Mat B; 103 104 PetscFunctionBegin; 105 PetscCall(MatHasCongruentLayouts(A, &flg)); 106 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only square matrices supported."); 107 PetscCall(PetscObjectQuery((PetscObject)A, "DiagonalBlock", (PetscObject *)&B)); 108 if (!B) { /* This should use MatDenseGetSubMatrix (not create), but we would need a call like MatRestoreDiagonalBlock */ 109 #if PetscDefined(HAVE_CUDA) 110 PetscCall(PetscObjectTypeCompare((PetscObject)mdn->A, MATSEQDENSECUDA, &flg)); 111 PetscCheck(!flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature", MATSEQDENSECUDA); 112 #elif PetscDefined(HAVE_HIP) 113 PetscCall(PetscObjectTypeCompare((PetscObject)mdn->A, MATSEQDENSEHIP, &flg)); 114 PetscCheck(!flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature", MATSEQDENSEHIP); 115 #endif 116 PetscCall(PetscObjectGetComm((PetscObject)(mdn->A), &comm)); 117 PetscCall(MatCreate(comm, &B)); 118 PetscCall(MatSetSizes(B, m, m, m, m)); 119 PetscCall(MatSetType(B, ((PetscObject)mdn->A)->type_name)); 120 PetscCall(MatDenseGetArrayRead(mdn->A, (const PetscScalar **)&array)); 121 PetscCall(MatSeqDenseSetPreallocation(B, array + m * rstart)); 122 PetscCall(MatDenseRestoreArrayRead(mdn->A, (const PetscScalar **)&array)); 123 PetscCall(PetscObjectCompose((PetscObject)A, "DiagonalBlock", (PetscObject)B)); 124 *a = B; 125 PetscCall(MatDestroy(&B)); 126 } else *a = B; 127 PetscFunctionReturn(PETSC_SUCCESS); 128 } 129 130 static PetscErrorCode MatSetValues_MPIDense(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], const PetscScalar v[], InsertMode addv) 131 { 132 Mat_MPIDense *A = (Mat_MPIDense *)mat->data; 133 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend, row; 134 PetscBool roworiented = A->roworiented; 135 136 PetscFunctionBegin; 137 for (i = 0; i < m; i++) { 138 if (idxm[i] < 0) continue; 139 PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large"); 140 if (idxm[i] >= rstart && idxm[i] < rend) { 141 row = idxm[i] - rstart; 142 if (roworiented) { 143 PetscCall(MatSetValues(A->A, 1, &row, n, idxn, v + i * n, addv)); 144 } else { 145 for (j = 0; j < n; j++) { 146 if (idxn[j] < 0) continue; 147 PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large"); 148 PetscCall(MatSetValues(A->A, 1, &row, 1, &idxn[j], v + i + j * m, addv)); 149 } 150 } 151 } else if (!A->donotstash) { 152 mat->assembled = PETSC_FALSE; 153 if (roworiented) { 154 PetscCall(MatStashValuesRow_Private(&mat->stash, idxm[i], n, idxn, v + i * n, PETSC_FALSE)); 155 } else { 156 PetscCall(MatStashValuesCol_Private(&mat->stash, idxm[i], n, idxn, v + i, m, PETSC_FALSE)); 157 } 158 } 159 } 160 PetscFunctionReturn(PETSC_SUCCESS); 161 } 162 163 static PetscErrorCode MatGetValues_MPIDense(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[]) 164 { 165 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 166 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend, row; 167 168 PetscFunctionBegin; 169 for (i = 0; i < m; i++) { 170 if (idxm[i] < 0) continue; /* negative row */ 171 PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large"); 172 if (idxm[i] >= rstart && idxm[i] < rend) { 173 row = idxm[i] - rstart; 174 for (j = 0; j < n; j++) { 175 if (idxn[j] < 0) continue; /* negative column */ 176 PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large"); 177 PetscCall(MatGetValues(mdn->A, 1, &row, 1, &idxn[j], v + i * n + j)); 178 } 179 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported"); 180 } 181 PetscFunctionReturn(PETSC_SUCCESS); 182 } 183 184 static PetscErrorCode MatDenseGetLDA_MPIDense(Mat A, PetscInt *lda) 185 { 186 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 187 188 PetscFunctionBegin; 189 PetscCall(MatDenseGetLDA(a->A, lda)); 190 PetscFunctionReturn(PETSC_SUCCESS); 191 } 192 193 static PetscErrorCode MatDenseSetLDA_MPIDense(Mat A, PetscInt lda) 194 { 195 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 196 MatType mtype = MATSEQDENSE; 197 198 PetscFunctionBegin; 199 if (!a->A) { 200 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 201 PetscCall(PetscLayoutSetUp(A->rmap)); 202 PetscCall(PetscLayoutSetUp(A->cmap)); 203 PetscCall(MatCreate(PETSC_COMM_SELF, &a->A)); 204 PetscCall(MatSetSizes(a->A, A->rmap->n, A->cmap->N, A->rmap->n, A->cmap->N)); 205 #if PetscDefined(HAVE_CUDA) 206 PetscBool iscuda; 207 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIDENSECUDA, &iscuda)); 208 if (iscuda) mtype = MATSEQDENSECUDA; 209 #elif PetscDefined(HAVE_HIP) 210 PetscBool iship; 211 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIDENSEHIP, &iship)); 212 if (iship) mtype = MATSEQDENSEHIP; 213 #endif 214 PetscCall(MatSetType(a->A, mtype)); 215 } 216 PetscCall(MatDenseSetLDA(a->A, lda)); 217 PetscFunctionReturn(PETSC_SUCCESS); 218 } 219 220 static PetscErrorCode MatDenseGetArray_MPIDense(Mat A, PetscScalar **array) 221 { 222 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 223 224 PetscFunctionBegin; 225 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 226 PetscCall(MatDenseGetArray(a->A, array)); 227 PetscFunctionReturn(PETSC_SUCCESS); 228 } 229 230 static PetscErrorCode MatDenseGetArrayRead_MPIDense(Mat A, const PetscScalar **array) 231 { 232 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 233 234 PetscFunctionBegin; 235 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 236 PetscCall(MatDenseGetArrayRead(a->A, array)); 237 PetscFunctionReturn(PETSC_SUCCESS); 238 } 239 240 static PetscErrorCode MatDenseGetArrayWrite_MPIDense(Mat A, PetscScalar **array) 241 { 242 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 243 244 PetscFunctionBegin; 245 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 246 PetscCall(MatDenseGetArrayWrite(a->A, array)); 247 PetscFunctionReturn(PETSC_SUCCESS); 248 } 249 250 static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A, const PetscScalar *array) 251 { 252 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 253 254 PetscFunctionBegin; 255 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 256 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 257 PetscCall(MatDensePlaceArray(a->A, array)); 258 PetscFunctionReturn(PETSC_SUCCESS); 259 } 260 261 static PetscErrorCode MatDenseResetArray_MPIDense(Mat A) 262 { 263 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 264 265 PetscFunctionBegin; 266 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 267 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 268 PetscCall(MatDenseResetArray(a->A)); 269 PetscFunctionReturn(PETSC_SUCCESS); 270 } 271 272 static PetscErrorCode MatDenseReplaceArray_MPIDense(Mat A, const PetscScalar *array) 273 { 274 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 275 276 PetscFunctionBegin; 277 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 278 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 279 PetscCall(MatDenseReplaceArray(a->A, array)); 280 PetscFunctionReturn(PETSC_SUCCESS); 281 } 282 283 static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A, IS isrow, IS iscol, MatReuse scall, Mat *B) 284 { 285 Mat_MPIDense *mat = (Mat_MPIDense *)A->data, *newmatd; 286 PetscInt lda, i, j, rstart, rend, nrows, ncols, Ncols, nlrows, nlcols; 287 const PetscInt *irow, *icol; 288 const PetscScalar *v; 289 PetscScalar *bv; 290 Mat newmat; 291 IS iscol_local; 292 MPI_Comm comm_is, comm_mat; 293 294 PetscFunctionBegin; 295 PetscCall(PetscObjectGetComm((PetscObject)A, &comm_mat)); 296 PetscCall(PetscObjectGetComm((PetscObject)iscol, &comm_is)); 297 PetscCheck(comm_mat == comm_is, PETSC_COMM_SELF, PETSC_ERR_ARG_NOTSAMECOMM, "IS communicator must match matrix communicator"); 298 299 PetscCall(ISAllGather(iscol, &iscol_local)); 300 PetscCall(ISGetIndices(isrow, &irow)); 301 PetscCall(ISGetIndices(iscol_local, &icol)); 302 PetscCall(ISGetLocalSize(isrow, &nrows)); 303 PetscCall(ISGetLocalSize(iscol, &ncols)); 304 PetscCall(ISGetSize(iscol, &Ncols)); /* global number of columns, size of iscol_local */ 305 306 /* No parallel redistribution currently supported! Should really check each index set 307 to confirm that it is OK. ... Currently supports only submatrix same partitioning as 308 original matrix! */ 309 310 PetscCall(MatGetLocalSize(A, &nlrows, &nlcols)); 311 PetscCall(MatGetOwnershipRange(A, &rstart, &rend)); 312 313 /* Check submatrix call */ 314 if (scall == MAT_REUSE_MATRIX) { 315 /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */ 316 /* Really need to test rows and column sizes! */ 317 newmat = *B; 318 } else { 319 /* Create and fill new matrix */ 320 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &newmat)); 321 PetscCall(MatSetSizes(newmat, nrows, ncols, PETSC_DECIDE, Ncols)); 322 PetscCall(MatSetType(newmat, ((PetscObject)A)->type_name)); 323 PetscCall(MatMPIDenseSetPreallocation(newmat, NULL)); 324 } 325 326 /* Now extract the data pointers and do the copy, column at a time */ 327 newmatd = (Mat_MPIDense *)newmat->data; 328 PetscCall(MatDenseGetArray(newmatd->A, &bv)); 329 PetscCall(MatDenseGetArrayRead(mat->A, &v)); 330 PetscCall(MatDenseGetLDA(mat->A, &lda)); 331 for (i = 0; i < Ncols; i++) { 332 const PetscScalar *av = v + lda * icol[i]; 333 for (j = 0; j < nrows; j++) *bv++ = av[irow[j] - rstart]; 334 } 335 PetscCall(MatDenseRestoreArrayRead(mat->A, &v)); 336 PetscCall(MatDenseRestoreArray(newmatd->A, &bv)); 337 338 /* Assemble the matrices so that the correct flags are set */ 339 PetscCall(MatAssemblyBegin(newmat, MAT_FINAL_ASSEMBLY)); 340 PetscCall(MatAssemblyEnd(newmat, MAT_FINAL_ASSEMBLY)); 341 342 /* Free work space */ 343 PetscCall(ISRestoreIndices(isrow, &irow)); 344 PetscCall(ISRestoreIndices(iscol_local, &icol)); 345 PetscCall(ISDestroy(&iscol_local)); 346 *B = newmat; 347 PetscFunctionReturn(PETSC_SUCCESS); 348 } 349 350 static PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A, PetscScalar **array) 351 { 352 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 353 354 PetscFunctionBegin; 355 PetscCall(MatDenseRestoreArray(a->A, array)); 356 PetscFunctionReturn(PETSC_SUCCESS); 357 } 358 359 static PetscErrorCode MatDenseRestoreArrayRead_MPIDense(Mat A, const PetscScalar **array) 360 { 361 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 362 363 PetscFunctionBegin; 364 PetscCall(MatDenseRestoreArrayRead(a->A, array)); 365 PetscFunctionReturn(PETSC_SUCCESS); 366 } 367 368 static PetscErrorCode MatDenseRestoreArrayWrite_MPIDense(Mat A, PetscScalar **array) 369 { 370 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 371 372 PetscFunctionBegin; 373 PetscCall(MatDenseRestoreArrayWrite(a->A, array)); 374 PetscFunctionReturn(PETSC_SUCCESS); 375 } 376 377 static PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat, MatAssemblyType mode) 378 { 379 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 380 PetscInt nstash, reallocs; 381 382 PetscFunctionBegin; 383 if (mdn->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS); 384 385 PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range)); 386 PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs)); 387 PetscCall(PetscInfo(mdn->A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 388 PetscFunctionReturn(PETSC_SUCCESS); 389 } 390 391 static PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat, MatAssemblyType mode) 392 { 393 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 394 PetscInt i, *row, *col, flg, j, rstart, ncols; 395 PetscMPIInt n; 396 PetscScalar *val; 397 398 PetscFunctionBegin; 399 if (!mdn->donotstash && !mat->nooffprocentries) { 400 /* wait on receives */ 401 while (1) { 402 PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg)); 403 if (!flg) break; 404 405 for (i = 0; i < n;) { 406 /* Now identify the consecutive vals belonging to the same row */ 407 for (j = i, rstart = row[j]; j < n; j++) { 408 if (row[j] != rstart) break; 409 } 410 if (j < n) ncols = j - i; 411 else ncols = n - i; 412 /* Now assemble all these values with a single function call */ 413 PetscCall(MatSetValues_MPIDense(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode)); 414 i = j; 415 } 416 } 417 PetscCall(MatStashScatterEnd_Private(&mat->stash)); 418 } 419 420 PetscCall(MatAssemblyBegin(mdn->A, mode)); 421 PetscCall(MatAssemblyEnd(mdn->A, mode)); 422 PetscFunctionReturn(PETSC_SUCCESS); 423 } 424 425 static PetscErrorCode MatZeroEntries_MPIDense(Mat A) 426 { 427 Mat_MPIDense *l = (Mat_MPIDense *)A->data; 428 429 PetscFunctionBegin; 430 PetscCall(MatZeroEntries(l->A)); 431 PetscFunctionReturn(PETSC_SUCCESS); 432 } 433 434 static PetscErrorCode MatZeroRows_MPIDense(Mat A, PetscInt n, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 435 { 436 Mat_MPIDense *l = (Mat_MPIDense *)A->data; 437 PetscInt i, len, *lrows; 438 439 PetscFunctionBegin; 440 /* get locally owned rows */ 441 PetscCall(PetscLayoutMapLocal(A->rmap, n, rows, &len, &lrows, NULL)); 442 /* fix right hand side if needed */ 443 if (x && b) { 444 const PetscScalar *xx; 445 PetscScalar *bb; 446 447 PetscCall(VecGetArrayRead(x, &xx)); 448 PetscCall(VecGetArrayWrite(b, &bb)); 449 for (i = 0; i < len; ++i) bb[lrows[i]] = diag * xx[lrows[i]]; 450 PetscCall(VecRestoreArrayRead(x, &xx)); 451 PetscCall(VecRestoreArrayWrite(b, &bb)); 452 } 453 PetscCall(MatZeroRows(l->A, len, lrows, 0.0, NULL, NULL)); 454 if (diag != 0.0) { 455 Vec d; 456 457 PetscCall(MatCreateVecs(A, NULL, &d)); 458 PetscCall(VecSet(d, diag)); 459 PetscCall(MatDiagonalSet(A, d, INSERT_VALUES)); 460 PetscCall(VecDestroy(&d)); 461 } 462 PetscCall(PetscFree(lrows)); 463 PetscFunctionReturn(PETSC_SUCCESS); 464 } 465 466 PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat, Vec, Vec); 467 PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat, Vec, Vec, Vec); 468 PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat, Vec, Vec); 469 PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat, Vec, Vec, Vec); 470 471 static PetscErrorCode MatMult_MPIDense(Mat mat, Vec xx, Vec yy) 472 { 473 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 474 const PetscScalar *ax; 475 PetscScalar *ay; 476 PetscMemType axmtype, aymtype; 477 478 PetscFunctionBegin; 479 if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat)); 480 PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype)); 481 PetscCall(VecGetArrayAndMemType(mdn->lvec, &ay, &aymtype)); 482 PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE)); 483 PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE)); 484 PetscCall(VecRestoreArrayAndMemType(mdn->lvec, &ay)); 485 PetscCall(VecRestoreArrayReadAndMemType(xx, &ax)); 486 PetscCall((*mdn->A->ops->mult)(mdn->A, mdn->lvec, yy)); 487 PetscFunctionReturn(PETSC_SUCCESS); 488 } 489 490 static PetscErrorCode MatMultAdd_MPIDense(Mat mat, Vec xx, Vec yy, Vec zz) 491 { 492 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 493 const PetscScalar *ax; 494 PetscScalar *ay; 495 PetscMemType axmtype, aymtype; 496 497 PetscFunctionBegin; 498 if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat)); 499 PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype)); 500 PetscCall(VecGetArrayAndMemType(mdn->lvec, &ay, &aymtype)); 501 PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE)); 502 PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE)); 503 PetscCall(VecRestoreArrayAndMemType(mdn->lvec, &ay)); 504 PetscCall(VecRestoreArrayReadAndMemType(xx, &ax)); 505 PetscCall((*mdn->A->ops->multadd)(mdn->A, mdn->lvec, yy, zz)); 506 PetscFunctionReturn(PETSC_SUCCESS); 507 } 508 509 static PetscErrorCode MatMultTranspose_MPIDense(Mat A, Vec xx, Vec yy) 510 { 511 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 512 const PetscScalar *ax; 513 PetscScalar *ay; 514 PetscMemType axmtype, aymtype; 515 516 PetscFunctionBegin; 517 if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A)); 518 PetscCall(VecSet(yy, 0.0)); 519 PetscCall((*a->A->ops->multtranspose)(a->A, xx, a->lvec)); 520 PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype)); 521 PetscCall(VecGetArrayAndMemType(yy, &ay, &aymtype)); 522 PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM)); 523 PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM)); 524 PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax)); 525 PetscCall(VecRestoreArrayAndMemType(yy, &ay)); 526 PetscFunctionReturn(PETSC_SUCCESS); 527 } 528 529 static PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A, Vec xx, Vec yy, Vec zz) 530 { 531 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 532 const PetscScalar *ax; 533 PetscScalar *ay; 534 PetscMemType axmtype, aymtype; 535 536 PetscFunctionBegin; 537 if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A)); 538 PetscCall(VecCopy(yy, zz)); 539 PetscCall((*a->A->ops->multtranspose)(a->A, xx, a->lvec)); 540 PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype)); 541 PetscCall(VecGetArrayAndMemType(zz, &ay, &aymtype)); 542 PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM)); 543 PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM)); 544 PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax)); 545 PetscCall(VecRestoreArrayAndMemType(zz, &ay)); 546 PetscFunctionReturn(PETSC_SUCCESS); 547 } 548 549 PetscErrorCode MatGetDiagonal_MPIDense(Mat A, Vec v) 550 { 551 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 552 PetscInt lda, len, i, nl, ng, m = A->rmap->n, radd; 553 PetscScalar *x; 554 const PetscScalar *av; 555 556 PetscFunctionBegin; 557 PetscCall(VecGetArray(v, &x)); 558 PetscCall(VecGetSize(v, &ng)); 559 PetscCheck(ng == A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming mat and vec"); 560 PetscCall(VecGetLocalSize(v, &nl)); 561 len = PetscMin(a->A->rmap->n, a->A->cmap->n); 562 radd = A->rmap->rstart * m; 563 PetscCall(MatDenseGetArrayRead(a->A, &av)); 564 PetscCall(MatDenseGetLDA(a->A, &lda)); 565 for (i = 0; i < len; i++) x[i] = av[radd + i * lda + i]; 566 PetscCall(MatDenseRestoreArrayRead(a->A, &av)); 567 PetscCall(PetscArrayzero(x + i, nl - i)); 568 PetscCall(VecRestoreArray(v, &x)); 569 PetscFunctionReturn(PETSC_SUCCESS); 570 } 571 572 static PetscErrorCode MatDestroy_MPIDense(Mat mat) 573 { 574 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 575 576 PetscFunctionBegin; 577 PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N)); 578 PetscCall(MatStashDestroy_Private(&mat->stash)); 579 PetscCheck(!mdn->vecinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 580 PetscCheck(!mdn->matinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 581 PetscCall(MatDestroy(&mdn->A)); 582 PetscCall(VecDestroy(&mdn->lvec)); 583 PetscCall(PetscSFDestroy(&mdn->Mvctx)); 584 PetscCall(VecDestroy(&mdn->cvec)); 585 PetscCall(MatDestroy(&mdn->cmat)); 586 587 PetscCall(PetscFree(mat->data)); 588 PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL)); 589 590 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetLDA_C", NULL)); 591 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseSetLDA_C", NULL)); 592 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArray_C", NULL)); 593 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArray_C", NULL)); 594 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayRead_C", NULL)); 595 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayRead_C", NULL)); 596 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayWrite_C", NULL)); 597 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayWrite_C", NULL)); 598 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDensePlaceArray_C", NULL)); 599 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseResetArray_C", NULL)); 600 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseReplaceArray_C", NULL)); 601 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", NULL)); 602 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpiaij_C", NULL)); 603 #if defined(PETSC_HAVE_ELEMENTAL) 604 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_elemental_C", NULL)); 605 #endif 606 #if defined(PETSC_HAVE_SCALAPACK) 607 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_scalapack_C", NULL)); 608 #endif 609 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIDenseSetPreallocation_C", NULL)); 610 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidense_C", NULL)); 611 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaij_C", NULL)); 612 #if defined(PETSC_HAVE_CUDA) 613 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidense_C", NULL)); 614 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijcusparse_C", NULL)); 615 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensecuda_C", NULL)); 616 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidensecuda_mpidense_C", NULL)); 617 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidensecuda_C", NULL)); 618 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C", NULL)); 619 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensecuda_mpiaij_C", NULL)); 620 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C", NULL)); 621 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArray_C", NULL)); 622 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArrayRead_C", NULL)); 623 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArrayWrite_C", NULL)); 624 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArray_C", NULL)); 625 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArrayRead_C", NULL)); 626 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArrayWrite_C", NULL)); 627 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAPlaceArray_C", NULL)); 628 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAResetArray_C", NULL)); 629 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAReplaceArray_C", NULL)); 630 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDASetPreallocation_C", NULL)); 631 #endif 632 #if defined(PETSC_HAVE_HIP) 633 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidense_C", NULL)); 634 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijhipsparse_C", NULL)); 635 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensehip_C", NULL)); 636 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidensehip_mpidense_C", NULL)); 637 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidensehip_C", NULL)); 638 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidensehip_C", NULL)); 639 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensehip_mpiaij_C", NULL)); 640 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensehip_mpiaijhipsparse_C", NULL)); 641 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArray_C", NULL)); 642 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArrayRead_C", NULL)); 643 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArrayWrite_C", NULL)); 644 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArray_C", NULL)); 645 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArrayRead_C", NULL)); 646 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArrayWrite_C", NULL)); 647 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPPlaceArray_C", NULL)); 648 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPResetArray_C", NULL)); 649 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPReplaceArray_C", NULL)); 650 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPSetPreallocation_C", NULL)); 651 #endif 652 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumn_C", NULL)); 653 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumn_C", NULL)); 654 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", NULL)); 655 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", NULL)); 656 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", NULL)); 657 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", NULL)); 658 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", NULL)); 659 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", NULL)); 660 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetSubMatrix_C", NULL)); 661 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreSubMatrix_C", NULL)); 662 663 PetscCall(PetscObjectCompose((PetscObject)mat, "DiagonalBlock", NULL)); 664 PetscFunctionReturn(PETSC_SUCCESS); 665 } 666 667 PETSC_INTERN PetscErrorCode MatView_SeqDense(Mat, PetscViewer); 668 669 #include <petscdraw.h> 670 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat, PetscViewer viewer) 671 { 672 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 673 PetscMPIInt rank; 674 PetscViewerType vtype; 675 PetscBool iascii, isdraw; 676 PetscViewer sviewer; 677 PetscViewerFormat format; 678 679 PetscFunctionBegin; 680 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 681 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 682 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 683 if (iascii) { 684 PetscCall(PetscViewerGetType(viewer, &vtype)); 685 PetscCall(PetscViewerGetFormat(viewer, &format)); 686 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 687 MatInfo info; 688 PetscCall(MatGetInfo(mat, MAT_LOCAL, &info)); 689 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 690 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %" PetscInt_FMT " \n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, 691 (PetscInt)info.memory)); 692 PetscCall(PetscViewerFlush(viewer)); 693 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 694 if (mdn->Mvctx) PetscCall(PetscSFView(mdn->Mvctx, viewer)); 695 PetscFunctionReturn(PETSC_SUCCESS); 696 } else if (format == PETSC_VIEWER_ASCII_INFO) { 697 PetscFunctionReturn(PETSC_SUCCESS); 698 } 699 } else if (isdraw) { 700 PetscDraw draw; 701 PetscBool isnull; 702 703 PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw)); 704 PetscCall(PetscDrawIsNull(draw, &isnull)); 705 if (isnull) PetscFunctionReturn(PETSC_SUCCESS); 706 } 707 708 { 709 /* assemble the entire matrix onto first processor. */ 710 Mat A; 711 PetscInt M = mat->rmap->N, N = mat->cmap->N, m, row, i, nz; 712 PetscInt *cols; 713 PetscScalar *vals; 714 715 PetscCall(MatCreate(PetscObjectComm((PetscObject)mat), &A)); 716 if (rank == 0) { 717 PetscCall(MatSetSizes(A, M, N, M, N)); 718 } else { 719 PetscCall(MatSetSizes(A, 0, 0, M, N)); 720 } 721 /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */ 722 PetscCall(MatSetType(A, MATMPIDENSE)); 723 PetscCall(MatMPIDenseSetPreallocation(A, NULL)); 724 725 /* Copy the matrix ... This isn't the most efficient means, 726 but it's quick for now */ 727 A->insertmode = INSERT_VALUES; 728 729 row = mat->rmap->rstart; 730 m = mdn->A->rmap->n; 731 for (i = 0; i < m; i++) { 732 PetscCall(MatGetRow_MPIDense(mat, row, &nz, &cols, &vals)); 733 PetscCall(MatSetValues_MPIDense(A, 1, &row, nz, cols, vals, INSERT_VALUES)); 734 PetscCall(MatRestoreRow_MPIDense(mat, row, &nz, &cols, &vals)); 735 row++; 736 } 737 738 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 739 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 740 PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 741 if (rank == 0) { 742 PetscCall(PetscObjectSetName((PetscObject)((Mat_MPIDense *)(A->data))->A, ((PetscObject)mat)->name)); 743 PetscCall(MatView_SeqDense(((Mat_MPIDense *)(A->data))->A, sviewer)); 744 } 745 PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 746 PetscCall(PetscViewerFlush(viewer)); 747 PetscCall(MatDestroy(&A)); 748 } 749 PetscFunctionReturn(PETSC_SUCCESS); 750 } 751 752 static PetscErrorCode MatView_MPIDense(Mat mat, PetscViewer viewer) 753 { 754 PetscBool iascii, isbinary, isdraw, issocket; 755 756 PetscFunctionBegin; 757 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 758 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 759 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket)); 760 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 761 762 if (iascii || issocket || isdraw) { 763 PetscCall(MatView_MPIDense_ASCIIorDraworSocket(mat, viewer)); 764 } else if (isbinary) PetscCall(MatView_Dense_Binary(mat, viewer)); 765 PetscFunctionReturn(PETSC_SUCCESS); 766 } 767 768 static PetscErrorCode MatGetInfo_MPIDense(Mat A, MatInfoType flag, MatInfo *info) 769 { 770 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 771 Mat mdn = mat->A; 772 PetscLogDouble isend[5], irecv[5]; 773 774 PetscFunctionBegin; 775 info->block_size = 1.0; 776 777 PetscCall(MatGetInfo(mdn, MAT_LOCAL, info)); 778 779 isend[0] = info->nz_used; 780 isend[1] = info->nz_allocated; 781 isend[2] = info->nz_unneeded; 782 isend[3] = info->memory; 783 isend[4] = info->mallocs; 784 if (flag == MAT_LOCAL) { 785 info->nz_used = isend[0]; 786 info->nz_allocated = isend[1]; 787 info->nz_unneeded = isend[2]; 788 info->memory = isend[3]; 789 info->mallocs = isend[4]; 790 } else if (flag == MAT_GLOBAL_MAX) { 791 PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)A))); 792 793 info->nz_used = irecv[0]; 794 info->nz_allocated = irecv[1]; 795 info->nz_unneeded = irecv[2]; 796 info->memory = irecv[3]; 797 info->mallocs = irecv[4]; 798 } else if (flag == MAT_GLOBAL_SUM) { 799 PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)A))); 800 801 info->nz_used = irecv[0]; 802 info->nz_allocated = irecv[1]; 803 info->nz_unneeded = irecv[2]; 804 info->memory = irecv[3]; 805 info->mallocs = irecv[4]; 806 } 807 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 808 info->fill_ratio_needed = 0; 809 info->factor_mallocs = 0; 810 PetscFunctionReturn(PETSC_SUCCESS); 811 } 812 813 static PetscErrorCode MatSetOption_MPIDense(Mat A, MatOption op, PetscBool flg) 814 { 815 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 816 817 PetscFunctionBegin; 818 switch (op) { 819 case MAT_NEW_NONZERO_LOCATIONS: 820 case MAT_NEW_NONZERO_LOCATION_ERR: 821 case MAT_NEW_NONZERO_ALLOCATION_ERR: 822 MatCheckPreallocated(A, 1); 823 PetscCall(MatSetOption(a->A, op, flg)); 824 break; 825 case MAT_ROW_ORIENTED: 826 MatCheckPreallocated(A, 1); 827 a->roworiented = flg; 828 PetscCall(MatSetOption(a->A, op, flg)); 829 break; 830 case MAT_FORCE_DIAGONAL_ENTRIES: 831 case MAT_KEEP_NONZERO_PATTERN: 832 case MAT_USE_HASH_TABLE: 833 case MAT_SORTED_FULL: 834 PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op])); 835 break; 836 case MAT_IGNORE_OFF_PROC_ENTRIES: 837 a->donotstash = flg; 838 break; 839 case MAT_SYMMETRIC: 840 case MAT_STRUCTURALLY_SYMMETRIC: 841 case MAT_HERMITIAN: 842 case MAT_SYMMETRY_ETERNAL: 843 case MAT_STRUCTURAL_SYMMETRY_ETERNAL: 844 case MAT_SPD: 845 case MAT_IGNORE_LOWER_TRIANGULAR: 846 case MAT_IGNORE_ZERO_ENTRIES: 847 case MAT_SPD_ETERNAL: 848 /* if the diagonal matrix is square it inherits some of the properties above */ 849 PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op])); 850 break; 851 default: 852 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %s", MatOptions[op]); 853 } 854 PetscFunctionReturn(PETSC_SUCCESS); 855 } 856 857 static PetscErrorCode MatDiagonalScale_MPIDense(Mat A, Vec ll, Vec rr) 858 { 859 Mat_MPIDense *mdn = (Mat_MPIDense *)A->data; 860 const PetscScalar *l; 861 PetscScalar x, *v, *vv, *r; 862 PetscInt i, j, s2a, s3a, s2, s3, m = mdn->A->rmap->n, n = mdn->A->cmap->n, lda; 863 864 PetscFunctionBegin; 865 PetscCall(MatDenseGetArray(mdn->A, &vv)); 866 PetscCall(MatDenseGetLDA(mdn->A, &lda)); 867 PetscCall(MatGetLocalSize(A, &s2, &s3)); 868 if (ll) { 869 PetscCall(VecGetLocalSize(ll, &s2a)); 870 PetscCheck(s2a == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Left scaling vector non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT, s2a, s2); 871 PetscCall(VecGetArrayRead(ll, &l)); 872 for (i = 0; i < m; i++) { 873 x = l[i]; 874 v = vv + i; 875 for (j = 0; j < n; j++) { 876 (*v) *= x; 877 v += lda; 878 } 879 } 880 PetscCall(VecRestoreArrayRead(ll, &l)); 881 PetscCall(PetscLogFlops(1.0 * n * m)); 882 } 883 if (rr) { 884 const PetscScalar *ar; 885 886 PetscCall(VecGetLocalSize(rr, &s3a)); 887 PetscCheck(s3a == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Right scaling vec non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT ".", s3a, s3); 888 PetscCall(VecGetArrayRead(rr, &ar)); 889 if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A)); 890 PetscCall(VecGetArray(mdn->lvec, &r)); 891 PetscCall(PetscSFBcastBegin(mdn->Mvctx, MPIU_SCALAR, ar, r, MPI_REPLACE)); 892 PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ar, r, MPI_REPLACE)); 893 PetscCall(VecRestoreArrayRead(rr, &ar)); 894 for (i = 0; i < n; i++) { 895 x = r[i]; 896 v = vv + i * lda; 897 for (j = 0; j < m; j++) (*v++) *= x; 898 } 899 PetscCall(VecRestoreArray(mdn->lvec, &r)); 900 PetscCall(PetscLogFlops(1.0 * n * m)); 901 } 902 PetscCall(MatDenseRestoreArray(mdn->A, &vv)); 903 PetscFunctionReturn(PETSC_SUCCESS); 904 } 905 906 static PetscErrorCode MatNorm_MPIDense(Mat A, NormType type, PetscReal *nrm) 907 { 908 Mat_MPIDense *mdn = (Mat_MPIDense *)A->data; 909 PetscInt i, j; 910 PetscMPIInt size; 911 PetscReal sum = 0.0; 912 const PetscScalar *av, *v; 913 914 PetscFunctionBegin; 915 PetscCall(MatDenseGetArrayRead(mdn->A, &av)); 916 v = av; 917 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 918 if (size == 1) { 919 PetscCall(MatNorm(mdn->A, type, nrm)); 920 } else { 921 if (type == NORM_FROBENIUS) { 922 for (i = 0; i < mdn->A->cmap->n * mdn->A->rmap->n; i++) { 923 sum += PetscRealPart(PetscConj(*v) * (*v)); 924 v++; 925 } 926 PetscCall(MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A))); 927 *nrm = PetscSqrtReal(*nrm); 928 PetscCall(PetscLogFlops(2.0 * mdn->A->cmap->n * mdn->A->rmap->n)); 929 } else if (type == NORM_1) { 930 PetscReal *tmp, *tmp2; 931 PetscCall(PetscCalloc2(A->cmap->N, &tmp, A->cmap->N, &tmp2)); 932 *nrm = 0.0; 933 v = av; 934 for (j = 0; j < mdn->A->cmap->n; j++) { 935 for (i = 0; i < mdn->A->rmap->n; i++) { 936 tmp[j] += PetscAbsScalar(*v); 937 v++; 938 } 939 } 940 PetscCall(MPIU_Allreduce(tmp, tmp2, A->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A))); 941 for (j = 0; j < A->cmap->N; j++) { 942 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 943 } 944 PetscCall(PetscFree2(tmp, tmp2)); 945 PetscCall(PetscLogFlops(A->cmap->n * A->rmap->n)); 946 } else if (type == NORM_INFINITY) { /* max row norm */ 947 PetscReal ntemp; 948 PetscCall(MatNorm(mdn->A, type, &ntemp)); 949 PetscCall(MPIU_Allreduce(&ntemp, nrm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A))); 950 } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "No support for two norm"); 951 } 952 PetscCall(MatDenseRestoreArrayRead(mdn->A, &av)); 953 PetscFunctionReturn(PETSC_SUCCESS); 954 } 955 956 static PetscErrorCode MatTranspose_MPIDense(Mat A, MatReuse reuse, Mat *matout) 957 { 958 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 959 Mat B; 960 PetscInt M = A->rmap->N, N = A->cmap->N, m, n, *rwork, rstart = A->rmap->rstart; 961 PetscInt j, i, lda; 962 PetscScalar *v; 963 964 PetscFunctionBegin; 965 if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout)); 966 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) { 967 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 968 PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M)); 969 PetscCall(MatSetType(B, ((PetscObject)A)->type_name)); 970 PetscCall(MatMPIDenseSetPreallocation(B, NULL)); 971 } else B = *matout; 972 973 m = a->A->rmap->n; 974 n = a->A->cmap->n; 975 PetscCall(MatDenseGetArrayRead(a->A, (const PetscScalar **)&v)); 976 PetscCall(MatDenseGetLDA(a->A, &lda)); 977 PetscCall(PetscMalloc1(m, &rwork)); 978 for (i = 0; i < m; i++) rwork[i] = rstart + i; 979 for (j = 0; j < n; j++) { 980 PetscCall(MatSetValues(B, 1, &j, m, rwork, v, INSERT_VALUES)); 981 v += lda; 982 } 983 PetscCall(MatDenseRestoreArrayRead(a->A, (const PetscScalar **)&v)); 984 PetscCall(PetscFree(rwork)); 985 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 986 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 987 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 988 *matout = B; 989 } else { 990 PetscCall(MatHeaderMerge(A, &B)); 991 } 992 PetscFunctionReturn(PETSC_SUCCESS); 993 } 994 995 static PetscErrorCode MatDuplicate_MPIDense(Mat, MatDuplicateOption, Mat *); 996 PETSC_INTERN PetscErrorCode MatScale_MPIDense(Mat, PetscScalar); 997 998 static PetscErrorCode MatSetUp_MPIDense(Mat A) 999 { 1000 PetscFunctionBegin; 1001 PetscCall(PetscLayoutSetUp(A->rmap)); 1002 PetscCall(PetscLayoutSetUp(A->cmap)); 1003 if (!A->preallocated) PetscCall(MatMPIDenseSetPreallocation(A, NULL)); 1004 PetscFunctionReturn(PETSC_SUCCESS); 1005 } 1006 1007 static PetscErrorCode MatAXPY_MPIDense(Mat Y, PetscScalar alpha, Mat X, MatStructure str) 1008 { 1009 Mat_MPIDense *A = (Mat_MPIDense *)Y->data, *B = (Mat_MPIDense *)X->data; 1010 1011 PetscFunctionBegin; 1012 PetscCall(MatAXPY(A->A, alpha, B->A, str)); 1013 PetscFunctionReturn(PETSC_SUCCESS); 1014 } 1015 1016 static PetscErrorCode MatConjugate_MPIDense(Mat mat) 1017 { 1018 Mat_MPIDense *a = (Mat_MPIDense *)mat->data; 1019 1020 PetscFunctionBegin; 1021 PetscCall(MatConjugate(a->A)); 1022 PetscFunctionReturn(PETSC_SUCCESS); 1023 } 1024 1025 static PetscErrorCode MatRealPart_MPIDense(Mat A) 1026 { 1027 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1028 1029 PetscFunctionBegin; 1030 PetscCall(MatRealPart(a->A)); 1031 PetscFunctionReturn(PETSC_SUCCESS); 1032 } 1033 1034 static PetscErrorCode MatImaginaryPart_MPIDense(Mat A) 1035 { 1036 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1037 1038 PetscFunctionBegin; 1039 PetscCall(MatImaginaryPart(a->A)); 1040 PetscFunctionReturn(PETSC_SUCCESS); 1041 } 1042 1043 static PetscErrorCode MatGetColumnVector_MPIDense(Mat A, Vec v, PetscInt col) 1044 { 1045 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1046 1047 PetscFunctionBegin; 1048 PetscCheck(a->A, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Missing local matrix"); 1049 PetscCheck(a->A->ops->getcolumnvector, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Missing get column operation"); 1050 PetscCall((*a->A->ops->getcolumnvector)(a->A, v, col)); 1051 PetscFunctionReturn(PETSC_SUCCESS); 1052 } 1053 1054 PETSC_INTERN PetscErrorCode MatGetColumnReductions_SeqDense(Mat, PetscInt, PetscReal *); 1055 1056 static PetscErrorCode MatGetColumnReductions_MPIDense(Mat A, PetscInt type, PetscReal *reductions) 1057 { 1058 PetscInt i, m, n; 1059 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1060 PetscReal *work; 1061 1062 PetscFunctionBegin; 1063 PetscCall(MatGetSize(A, &m, &n)); 1064 PetscCall(PetscMalloc1(n, &work)); 1065 if (type == REDUCTION_MEAN_REALPART) { 1066 PetscCall(MatGetColumnReductions_SeqDense(a->A, (PetscInt)REDUCTION_SUM_REALPART, work)); 1067 } else if (type == REDUCTION_MEAN_IMAGINARYPART) { 1068 PetscCall(MatGetColumnReductions_SeqDense(a->A, (PetscInt)REDUCTION_SUM_IMAGINARYPART, work)); 1069 } else { 1070 PetscCall(MatGetColumnReductions_SeqDense(a->A, type, work)); 1071 } 1072 if (type == NORM_2) { 1073 for (i = 0; i < n; i++) work[i] *= work[i]; 1074 } 1075 if (type == NORM_INFINITY) { 1076 PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_MAX, A->hdr.comm)); 1077 } else { 1078 PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_SUM, A->hdr.comm)); 1079 } 1080 PetscCall(PetscFree(work)); 1081 if (type == NORM_2) { 1082 for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]); 1083 } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 1084 for (i = 0; i < n; i++) reductions[i] /= m; 1085 } 1086 PetscFunctionReturn(PETSC_SUCCESS); 1087 } 1088 1089 static PetscErrorCode MatSetRandom_MPIDense(Mat x, PetscRandom rctx) 1090 { 1091 Mat_MPIDense *d = (Mat_MPIDense *)x->data; 1092 1093 PetscFunctionBegin; 1094 PetscCall(MatSetRandom(d->A, rctx)); 1095 #if defined(PETSC_HAVE_DEVICE) 1096 x->offloadmask = d->A->offloadmask; 1097 #endif 1098 PetscFunctionReturn(PETSC_SUCCESS); 1099 } 1100 1101 static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A, PetscBool *missing, PetscInt *d) 1102 { 1103 PetscFunctionBegin; 1104 *missing = PETSC_FALSE; 1105 PetscFunctionReturn(PETSC_SUCCESS); 1106 } 1107 1108 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat, Mat, PetscReal, Mat); 1109 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat, Mat, Mat); 1110 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat, Mat, PetscReal, Mat); 1111 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat, Mat, Mat); 1112 static PetscErrorCode MatEqual_MPIDense(Mat, Mat, PetscBool *); 1113 static PetscErrorCode MatLoad_MPIDense(Mat, PetscViewer); 1114 1115 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense, 1116 MatGetRow_MPIDense, 1117 MatRestoreRow_MPIDense, 1118 MatMult_MPIDense, 1119 /* 4*/ MatMultAdd_MPIDense, 1120 MatMultTranspose_MPIDense, 1121 MatMultTransposeAdd_MPIDense, 1122 NULL, 1123 NULL, 1124 NULL, 1125 /* 10*/ NULL, 1126 NULL, 1127 NULL, 1128 NULL, 1129 MatTranspose_MPIDense, 1130 /* 15*/ MatGetInfo_MPIDense, 1131 MatEqual_MPIDense, 1132 MatGetDiagonal_MPIDense, 1133 MatDiagonalScale_MPIDense, 1134 MatNorm_MPIDense, 1135 /* 20*/ MatAssemblyBegin_MPIDense, 1136 MatAssemblyEnd_MPIDense, 1137 MatSetOption_MPIDense, 1138 MatZeroEntries_MPIDense, 1139 /* 24*/ MatZeroRows_MPIDense, 1140 NULL, 1141 NULL, 1142 NULL, 1143 NULL, 1144 /* 29*/ MatSetUp_MPIDense, 1145 NULL, 1146 NULL, 1147 MatGetDiagonalBlock_MPIDense, 1148 NULL, 1149 /* 34*/ MatDuplicate_MPIDense, 1150 NULL, 1151 NULL, 1152 NULL, 1153 NULL, 1154 /* 39*/ MatAXPY_MPIDense, 1155 MatCreateSubMatrices_MPIDense, 1156 NULL, 1157 MatGetValues_MPIDense, 1158 MatCopy_MPIDense, 1159 /* 44*/ NULL, 1160 MatScale_MPIDense, 1161 MatShift_MPIDense, 1162 NULL, 1163 NULL, 1164 /* 49*/ MatSetRandom_MPIDense, 1165 NULL, 1166 NULL, 1167 NULL, 1168 NULL, 1169 /* 54*/ NULL, 1170 NULL, 1171 NULL, 1172 NULL, 1173 NULL, 1174 /* 59*/ MatCreateSubMatrix_MPIDense, 1175 MatDestroy_MPIDense, 1176 MatView_MPIDense, 1177 NULL, 1178 NULL, 1179 /* 64*/ NULL, 1180 NULL, 1181 NULL, 1182 NULL, 1183 NULL, 1184 /* 69*/ NULL, 1185 NULL, 1186 NULL, 1187 NULL, 1188 NULL, 1189 /* 74*/ NULL, 1190 NULL, 1191 NULL, 1192 NULL, 1193 NULL, 1194 /* 79*/ NULL, 1195 NULL, 1196 NULL, 1197 NULL, 1198 /* 83*/ MatLoad_MPIDense, 1199 NULL, 1200 NULL, 1201 NULL, 1202 NULL, 1203 NULL, 1204 /* 89*/ NULL, 1205 NULL, 1206 NULL, 1207 NULL, 1208 NULL, 1209 /* 94*/ NULL, 1210 NULL, 1211 MatMatTransposeMultSymbolic_MPIDense_MPIDense, 1212 MatMatTransposeMultNumeric_MPIDense_MPIDense, 1213 NULL, 1214 /* 99*/ MatProductSetFromOptions_MPIDense, 1215 NULL, 1216 NULL, 1217 MatConjugate_MPIDense, 1218 NULL, 1219 /*104*/ NULL, 1220 MatRealPart_MPIDense, 1221 MatImaginaryPart_MPIDense, 1222 NULL, 1223 NULL, 1224 /*109*/ NULL, 1225 NULL, 1226 NULL, 1227 MatGetColumnVector_MPIDense, 1228 MatMissingDiagonal_MPIDense, 1229 /*114*/ NULL, 1230 NULL, 1231 NULL, 1232 NULL, 1233 NULL, 1234 /*119*/ NULL, 1235 NULL, 1236 NULL, 1237 NULL, 1238 NULL, 1239 /*124*/ NULL, 1240 MatGetColumnReductions_MPIDense, 1241 NULL, 1242 NULL, 1243 NULL, 1244 /*129*/ NULL, 1245 NULL, 1246 MatTransposeMatMultSymbolic_MPIDense_MPIDense, 1247 MatTransposeMatMultNumeric_MPIDense_MPIDense, 1248 NULL, 1249 /*134*/ NULL, 1250 NULL, 1251 NULL, 1252 NULL, 1253 NULL, 1254 /*139*/ NULL, 1255 NULL, 1256 NULL, 1257 NULL, 1258 NULL, 1259 MatCreateMPIMatConcatenateSeqMat_MPIDense, 1260 /*145*/ NULL, 1261 NULL, 1262 NULL, 1263 NULL, 1264 NULL, 1265 /*150*/ NULL, 1266 NULL}; 1267 1268 static PetscErrorCode MatMPIDenseSetPreallocation_MPIDense(Mat mat, PetscScalar *data) 1269 { 1270 Mat_MPIDense *a = (Mat_MPIDense *)mat->data; 1271 MatType mtype = MATSEQDENSE; 1272 1273 PetscFunctionBegin; 1274 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1275 PetscCall(PetscLayoutSetUp(mat->rmap)); 1276 PetscCall(PetscLayoutSetUp(mat->cmap)); 1277 if (!a->A) { 1278 PetscCall(MatCreate(PETSC_COMM_SELF, &a->A)); 1279 PetscCall(MatSetSizes(a->A, mat->rmap->n, mat->cmap->N, mat->rmap->n, mat->cmap->N)); 1280 } 1281 #if defined(PETSC_HAVE_CUDA) 1282 PetscBool iscuda; 1283 PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPIDENSECUDA, &iscuda)); 1284 if (iscuda) mtype = MATSEQDENSECUDA; 1285 #endif 1286 #if defined(PETSC_HAVE_HIP) 1287 PetscBool iship; 1288 PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPIDENSEHIP, &iship)); 1289 if (iship) mtype = MATSEQDENSEHIP; 1290 #endif 1291 PetscCall(MatSetType(a->A, mtype)); 1292 PetscCall(MatSeqDenseSetPreallocation(a->A, data)); 1293 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 1294 mat->offloadmask = a->A->offloadmask; 1295 #endif 1296 mat->preallocated = PETSC_TRUE; 1297 mat->assembled = PETSC_TRUE; 1298 PetscFunctionReturn(PETSC_SUCCESS); 1299 } 1300 1301 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIDense(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 1302 { 1303 Mat B, C; 1304 1305 PetscFunctionBegin; 1306 PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &C)); 1307 PetscCall(MatConvert_SeqAIJ_SeqDense(C, MATSEQDENSE, MAT_INITIAL_MATRIX, &B)); 1308 PetscCall(MatDestroy(&C)); 1309 if (reuse == MAT_REUSE_MATRIX) { 1310 C = *newmat; 1311 } else C = NULL; 1312 PetscCall(MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A), B, A->cmap->n, !C ? MAT_INITIAL_MATRIX : MAT_REUSE_MATRIX, &C)); 1313 PetscCall(MatDestroy(&B)); 1314 if (reuse == MAT_INPLACE_MATRIX) { 1315 PetscCall(MatHeaderReplace(A, &C)); 1316 } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C; 1317 PetscFunctionReturn(PETSC_SUCCESS); 1318 } 1319 1320 static PetscErrorCode MatConvert_MPIDense_MPIAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 1321 { 1322 Mat B, C; 1323 1324 PetscFunctionBegin; 1325 PetscCall(MatDenseGetLocalMatrix(A, &C)); 1326 PetscCall(MatConvert_SeqDense_SeqAIJ(C, MATSEQAIJ, MAT_INITIAL_MATRIX, &B)); 1327 if (reuse == MAT_REUSE_MATRIX) { 1328 C = *newmat; 1329 } else C = NULL; 1330 PetscCall(MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A), B, A->cmap->n, !C ? MAT_INITIAL_MATRIX : MAT_REUSE_MATRIX, &C)); 1331 PetscCall(MatDestroy(&B)); 1332 if (reuse == MAT_INPLACE_MATRIX) { 1333 PetscCall(MatHeaderReplace(A, &C)); 1334 } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C; 1335 PetscFunctionReturn(PETSC_SUCCESS); 1336 } 1337 1338 #if defined(PETSC_HAVE_ELEMENTAL) 1339 PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 1340 { 1341 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1342 Mat mat_elemental; 1343 PetscScalar *v; 1344 PetscInt m = A->rmap->n, N = A->cmap->N, rstart = A->rmap->rstart, i, *rows, *cols, lda; 1345 1346 PetscFunctionBegin; 1347 if (reuse == MAT_REUSE_MATRIX) { 1348 mat_elemental = *newmat; 1349 PetscCall(MatZeroEntries(*newmat)); 1350 } else { 1351 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental)); 1352 PetscCall(MatSetSizes(mat_elemental, PETSC_DECIDE, PETSC_DECIDE, A->rmap->N, A->cmap->N)); 1353 PetscCall(MatSetType(mat_elemental, MATELEMENTAL)); 1354 PetscCall(MatSetUp(mat_elemental)); 1355 PetscCall(MatSetOption(mat_elemental, MAT_ROW_ORIENTED, PETSC_FALSE)); 1356 } 1357 1358 PetscCall(PetscMalloc2(m, &rows, N, &cols)); 1359 for (i = 0; i < N; i++) cols[i] = i; 1360 for (i = 0; i < m; i++) rows[i] = rstart + i; 1361 1362 /* PETSc-Elemental interface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */ 1363 PetscCall(MatDenseGetArray(A, &v)); 1364 PetscCall(MatDenseGetLDA(a->A, &lda)); 1365 if (lda == m) PetscCall(MatSetValues(mat_elemental, m, rows, N, cols, v, ADD_VALUES)); 1366 else { 1367 for (i = 0; i < N; i++) PetscCall(MatSetValues(mat_elemental, m, rows, 1, &i, v + lda * i, ADD_VALUES)); 1368 } 1369 PetscCall(MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY)); 1370 PetscCall(MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY)); 1371 PetscCall(MatDenseRestoreArray(A, &v)); 1372 PetscCall(PetscFree2(rows, cols)); 1373 1374 if (reuse == MAT_INPLACE_MATRIX) { 1375 PetscCall(MatHeaderReplace(A, &mat_elemental)); 1376 } else { 1377 *newmat = mat_elemental; 1378 } 1379 PetscFunctionReturn(PETSC_SUCCESS); 1380 } 1381 #endif 1382 1383 static PetscErrorCode MatDenseGetColumn_MPIDense(Mat A, PetscInt col, PetscScalar **vals) 1384 { 1385 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 1386 1387 PetscFunctionBegin; 1388 PetscCall(MatDenseGetColumn(mat->A, col, vals)); 1389 PetscFunctionReturn(PETSC_SUCCESS); 1390 } 1391 1392 static PetscErrorCode MatDenseRestoreColumn_MPIDense(Mat A, PetscScalar **vals) 1393 { 1394 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 1395 1396 PetscFunctionBegin; 1397 PetscCall(MatDenseRestoreColumn(mat->A, vals)); 1398 PetscFunctionReturn(PETSC_SUCCESS); 1399 } 1400 1401 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIDense(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat) 1402 { 1403 Mat_MPIDense *mat; 1404 PetscInt m, nloc, N; 1405 1406 PetscFunctionBegin; 1407 PetscCall(MatGetSize(inmat, &m, &N)); 1408 PetscCall(MatGetLocalSize(inmat, NULL, &nloc)); 1409 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 1410 PetscInt sum; 1411 1412 if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnership(comm, &n, &N)); 1413 /* Check sum(n) = N */ 1414 PetscCall(MPIU_Allreduce(&n, &sum, 1, MPIU_INT, MPI_SUM, comm)); 1415 PetscCheck(sum == N, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT, sum, N); 1416 1417 PetscCall(MatCreateDense(comm, m, n, PETSC_DETERMINE, N, NULL, outmat)); 1418 PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 1419 } 1420 1421 /* numeric phase */ 1422 mat = (Mat_MPIDense *)(*outmat)->data; 1423 PetscCall(MatCopy(inmat, mat->A, SAME_NONZERO_PATTERN)); 1424 PetscFunctionReturn(PETSC_SUCCESS); 1425 } 1426 1427 PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat A, PetscInt col, Vec *v) 1428 { 1429 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1430 PetscInt lda; 1431 1432 PetscFunctionBegin; 1433 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1434 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1435 if (!a->cvec) PetscCall(MatDenseCreateColumnVec_Private(A, &a->cvec)); 1436 a->vecinuse = col + 1; 1437 PetscCall(MatDenseGetLDA(a->A, &lda)); 1438 PetscCall(MatDenseGetArray(a->A, (PetscScalar **)&a->ptrinuse)); 1439 PetscCall(VecPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda)); 1440 *v = a->cvec; 1441 PetscFunctionReturn(PETSC_SUCCESS); 1442 } 1443 1444 PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat A, PetscInt col, Vec *v) 1445 { 1446 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1447 1448 PetscFunctionBegin; 1449 PetscCheck(a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first"); 1450 PetscCheck(a->cvec, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing internal column vector"); 1451 a->vecinuse = 0; 1452 PetscCall(MatDenseRestoreArray(a->A, (PetscScalar **)&a->ptrinuse)); 1453 PetscCall(VecResetArray(a->cvec)); 1454 if (v) *v = NULL; 1455 PetscFunctionReturn(PETSC_SUCCESS); 1456 } 1457 1458 PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat A, PetscInt col, Vec *v) 1459 { 1460 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1461 PetscInt lda; 1462 1463 PetscFunctionBegin; 1464 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1465 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1466 if (!a->cvec) PetscCall(MatDenseCreateColumnVec_Private(A, &a->cvec)); 1467 a->vecinuse = col + 1; 1468 PetscCall(MatDenseGetLDA(a->A, &lda)); 1469 PetscCall(MatDenseGetArrayRead(a->A, &a->ptrinuse)); 1470 PetscCall(VecPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda)); 1471 PetscCall(VecLockReadPush(a->cvec)); 1472 *v = a->cvec; 1473 PetscFunctionReturn(PETSC_SUCCESS); 1474 } 1475 1476 PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat A, PetscInt col, Vec *v) 1477 { 1478 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1479 1480 PetscFunctionBegin; 1481 PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first"); 1482 PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector"); 1483 a->vecinuse = 0; 1484 PetscCall(MatDenseRestoreArrayRead(a->A, &a->ptrinuse)); 1485 PetscCall(VecLockReadPop(a->cvec)); 1486 PetscCall(VecResetArray(a->cvec)); 1487 if (v) *v = NULL; 1488 PetscFunctionReturn(PETSC_SUCCESS); 1489 } 1490 1491 PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat A, PetscInt col, Vec *v) 1492 { 1493 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1494 PetscInt lda; 1495 1496 PetscFunctionBegin; 1497 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1498 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1499 if (!a->cvec) PetscCall(MatDenseCreateColumnVec_Private(A, &a->cvec)); 1500 a->vecinuse = col + 1; 1501 PetscCall(MatDenseGetLDA(a->A, &lda)); 1502 PetscCall(MatDenseGetArrayWrite(a->A, (PetscScalar **)&a->ptrinuse)); 1503 PetscCall(VecPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda)); 1504 *v = a->cvec; 1505 PetscFunctionReturn(PETSC_SUCCESS); 1506 } 1507 1508 PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat A, PetscInt col, Vec *v) 1509 { 1510 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1511 1512 PetscFunctionBegin; 1513 PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first"); 1514 PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector"); 1515 a->vecinuse = 0; 1516 PetscCall(MatDenseRestoreArrayWrite(a->A, (PetscScalar **)&a->ptrinuse)); 1517 PetscCall(VecResetArray(a->cvec)); 1518 if (v) *v = NULL; 1519 PetscFunctionReturn(PETSC_SUCCESS); 1520 } 1521 1522 static PetscErrorCode MatDenseGetSubMatrix_MPIDense(Mat A, PetscInt rbegin, PetscInt rend, PetscInt cbegin, PetscInt cend, Mat *v) 1523 { 1524 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1525 Mat_MPIDense *c; 1526 MPI_Comm comm; 1527 PetscInt pbegin, pend; 1528 1529 PetscFunctionBegin; 1530 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 1531 PetscCheck(!a->vecinuse, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1532 PetscCheck(!a->matinuse, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1533 pbegin = PetscMax(0, PetscMin(A->rmap->rend, rbegin) - A->rmap->rstart); 1534 pend = PetscMin(A->rmap->n, PetscMax(0, rend - A->rmap->rstart)); 1535 if (!a->cmat) { 1536 PetscCall(MatCreate(comm, &a->cmat)); 1537 PetscCall(MatSetType(a->cmat, ((PetscObject)A)->type_name)); 1538 if (rend - rbegin == A->rmap->N) PetscCall(PetscLayoutReference(A->rmap, &a->cmat->rmap)); 1539 else { 1540 PetscCall(PetscLayoutSetLocalSize(a->cmat->rmap, pend - pbegin)); 1541 PetscCall(PetscLayoutSetSize(a->cmat->rmap, rend - rbegin)); 1542 PetscCall(PetscLayoutSetUp(a->cmat->rmap)); 1543 } 1544 PetscCall(PetscLayoutSetSize(a->cmat->cmap, cend - cbegin)); 1545 PetscCall(PetscLayoutSetUp(a->cmat->cmap)); 1546 } else { 1547 PetscBool same = (PetscBool)(rend - rbegin == a->cmat->rmap->N); 1548 if (same && a->cmat->rmap->N != A->rmap->N) { 1549 same = (PetscBool)(pend - pbegin == a->cmat->rmap->n); 1550 PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &same, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A))); 1551 } 1552 if (!same) { 1553 PetscCall(PetscLayoutDestroy(&a->cmat->rmap)); 1554 PetscCall(PetscLayoutCreate(comm, &a->cmat->rmap)); 1555 PetscCall(PetscLayoutSetLocalSize(a->cmat->rmap, pend - pbegin)); 1556 PetscCall(PetscLayoutSetSize(a->cmat->rmap, rend - rbegin)); 1557 PetscCall(PetscLayoutSetUp(a->cmat->rmap)); 1558 } 1559 if (cend - cbegin != a->cmat->cmap->N) { 1560 PetscCall(PetscLayoutDestroy(&a->cmat->cmap)); 1561 PetscCall(PetscLayoutCreate(comm, &a->cmat->cmap)); 1562 PetscCall(PetscLayoutSetSize(a->cmat->cmap, cend - cbegin)); 1563 PetscCall(PetscLayoutSetUp(a->cmat->cmap)); 1564 } 1565 } 1566 c = (Mat_MPIDense *)a->cmat->data; 1567 PetscCheck(!c->A, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1568 PetscCall(MatDenseGetSubMatrix(a->A, pbegin, pend, cbegin, cend, &c->A)); 1569 1570 a->cmat->preallocated = PETSC_TRUE; 1571 a->cmat->assembled = PETSC_TRUE; 1572 #if defined(PETSC_HAVE_DEVICE) 1573 a->cmat->offloadmask = c->A->offloadmask; 1574 #endif 1575 a->matinuse = cbegin + 1; 1576 *v = a->cmat; 1577 PetscFunctionReturn(PETSC_SUCCESS); 1578 } 1579 1580 static PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat A, Mat *v) 1581 { 1582 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1583 Mat_MPIDense *c; 1584 1585 PetscFunctionBegin; 1586 PetscCheck(a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetSubMatrix() first"); 1587 PetscCheck(a->cmat, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal matrix"); 1588 PetscCheck(*v == a->cmat, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Not the matrix obtained from MatDenseGetSubMatrix()"); 1589 a->matinuse = 0; 1590 c = (Mat_MPIDense *)a->cmat->data; 1591 PetscCall(MatDenseRestoreSubMatrix(a->A, &c->A)); 1592 if (v) *v = NULL; 1593 #if defined(PETSC_HAVE_DEVICE) 1594 A->offloadmask = a->A->offloadmask; 1595 #endif 1596 PetscFunctionReturn(PETSC_SUCCESS); 1597 } 1598 1599 /*MC 1600 MATMPIDENSE - MATMPIDENSE = "mpidense" - A matrix type to be used for distributed dense matrices. 1601 1602 Options Database Key: 1603 . -mat_type mpidense - sets the matrix type to `MATMPIDENSE` during a call to `MatSetFromOptions()` 1604 1605 Level: beginner 1606 1607 .seealso: [](ch_matrices), `Mat`, `MatCreateDense()`, `MATSEQDENSE`, `MATDENSE` 1608 M*/ 1609 PetscErrorCode MatCreate_MPIDense(Mat mat) 1610 { 1611 Mat_MPIDense *a; 1612 1613 PetscFunctionBegin; 1614 PetscCall(PetscNew(&a)); 1615 mat->data = (void *)a; 1616 mat->ops[0] = MatOps_Values; 1617 1618 mat->insertmode = NOT_SET_VALUES; 1619 1620 /* build cache for off array entries formed */ 1621 a->donotstash = PETSC_FALSE; 1622 1623 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)mat), 1, &mat->stash)); 1624 1625 /* stuff used for matrix vector multiply */ 1626 a->lvec = NULL; 1627 a->Mvctx = NULL; 1628 a->roworiented = PETSC_TRUE; 1629 1630 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetLDA_C", MatDenseGetLDA_MPIDense)); 1631 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseSetLDA_C", MatDenseSetLDA_MPIDense)); 1632 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArray_C", MatDenseGetArray_MPIDense)); 1633 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArray_C", MatDenseRestoreArray_MPIDense)); 1634 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayRead_C", MatDenseGetArrayRead_MPIDense)); 1635 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayRead_C", MatDenseRestoreArrayRead_MPIDense)); 1636 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayWrite_C", MatDenseGetArrayWrite_MPIDense)); 1637 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayWrite_C", MatDenseRestoreArrayWrite_MPIDense)); 1638 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDensePlaceArray_C", MatDensePlaceArray_MPIDense)); 1639 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseResetArray_C", MatDenseResetArray_MPIDense)); 1640 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseReplaceArray_C", MatDenseReplaceArray_MPIDense)); 1641 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", MatDenseGetColumnVec_MPIDense)); 1642 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", MatDenseRestoreColumnVec_MPIDense)); 1643 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", MatDenseGetColumnVecRead_MPIDense)); 1644 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", MatDenseRestoreColumnVecRead_MPIDense)); 1645 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", MatDenseGetColumnVecWrite_MPIDense)); 1646 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", MatDenseRestoreColumnVecWrite_MPIDense)); 1647 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetSubMatrix_C", MatDenseGetSubMatrix_MPIDense)); 1648 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreSubMatrix_C", MatDenseRestoreSubMatrix_MPIDense)); 1649 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", MatConvert_MPIAIJ_MPIDense)); 1650 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpiaij_C", MatConvert_MPIDense_MPIAIJ)); 1651 #if defined(PETSC_HAVE_ELEMENTAL) 1652 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_elemental_C", MatConvert_MPIDense_Elemental)); 1653 #endif 1654 #if defined(PETSC_HAVE_SCALAPACK) 1655 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_scalapack_C", MatConvert_Dense_ScaLAPACK)); 1656 #endif 1657 #if defined(PETSC_HAVE_CUDA) 1658 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensecuda_C", MatConvert_MPIDense_MPIDenseCUDA)); 1659 #endif 1660 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIDenseSetPreallocation_C", MatMPIDenseSetPreallocation_MPIDense)); 1661 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense)); 1662 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaij_C", MatProductSetFromOptions_MPIDense_MPIAIJ)); 1663 #if defined(PETSC_HAVE_CUDA) 1664 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense)); 1665 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijcusparse_C", MatProductSetFromOptions_MPIDense_MPIAIJ)); 1666 #endif 1667 #if defined(PETSC_HAVE_HIP) 1668 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensehip_C", MatConvert_MPIDense_MPIDenseHIP)); 1669 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense)); 1670 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijhipsparse_C", MatProductSetFromOptions_MPIDense_MPIAIJ)); 1671 #endif 1672 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumn_C", MatDenseGetColumn_MPIDense)); 1673 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumn_C", MatDenseRestoreColumn_MPIDense)); 1674 PetscCall(PetscObjectChangeTypeName((PetscObject)mat, MATMPIDENSE)); 1675 PetscFunctionReturn(PETSC_SUCCESS); 1676 } 1677 1678 /*MC 1679 MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices. 1680 1681 This matrix type is identical to `MATSEQDENSE` when constructed with a single process communicator, 1682 and `MATMPIDENSE` otherwise. 1683 1684 Options Database Key: 1685 . -mat_type dense - sets the matrix type to `MATDENSE` during a call to `MatSetFromOptions()` 1686 1687 Level: beginner 1688 1689 .seealso: [](ch_matrices), `Mat`, `MATSEQDENSE`, `MATMPIDENSE`, `MATDENSECUDA`, `MATDENSEHIP` 1690 M*/ 1691 1692 /*@C 1693 MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries 1694 1695 Collective 1696 1697 Input Parameters: 1698 + B - the matrix 1699 - data - optional location of matrix data. Set to `NULL` for PETSc 1700 to control all matrix memory allocation. 1701 1702 Level: intermediate 1703 1704 Notes: 1705 The dense format is fully compatible with standard Fortran 1706 storage by columns. 1707 1708 The data input variable is intended primarily for Fortran programmers 1709 who wish to allocate their own matrix memory space. Most users should 1710 set `data` to `NULL`. 1711 1712 .seealso: [](ch_matrices), `Mat`, `MATMPIDENSE`, `MatCreate()`, `MatCreateSeqDense()`, `MatSetValues()` 1713 @*/ 1714 PetscErrorCode MatMPIDenseSetPreallocation(Mat B, PetscScalar *data) 1715 { 1716 PetscFunctionBegin; 1717 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 1718 PetscTryMethod(B, "MatMPIDenseSetPreallocation_C", (Mat, PetscScalar *), (B, data)); 1719 PetscFunctionReturn(PETSC_SUCCESS); 1720 } 1721 1722 /*@ 1723 MatDensePlaceArray - Allows one to replace the array in a `MATDENSE` matrix with an 1724 array provided by the user. This is useful to avoid copying an array 1725 into a matrix 1726 1727 Not Collective 1728 1729 Input Parameters: 1730 + mat - the matrix 1731 - array - the array in column major order 1732 1733 Level: developer 1734 1735 Note: 1736 You can return to the original array with a call to `MatDenseResetArray()`. The user is responsible for freeing this array; it will not be 1737 freed when the matrix is destroyed. 1738 1739 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseGetArray()`, `MatDenseResetArray()`, `VecPlaceArray()`, `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`, `VecResetArray()`, 1740 `MatDenseReplaceArray()` 1741 @*/ 1742 PetscErrorCode MatDensePlaceArray(Mat mat, const PetscScalar *array) 1743 { 1744 PetscFunctionBegin; 1745 PetscValidHeaderSpecific(mat, MAT_CLASSID, 1); 1746 PetscUseMethod(mat, "MatDensePlaceArray_C", (Mat, const PetscScalar *), (mat, array)); 1747 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 1748 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 1749 mat->offloadmask = PETSC_OFFLOAD_CPU; 1750 #endif 1751 PetscFunctionReturn(PETSC_SUCCESS); 1752 } 1753 1754 /*@ 1755 MatDenseResetArray - Resets the matrix array to that it previously had before the call to `MatDensePlaceArray()` 1756 1757 Not Collective 1758 1759 Input Parameter: 1760 . mat - the matrix 1761 1762 Level: developer 1763 1764 Note: 1765 You can only call this after a call to `MatDensePlaceArray()` 1766 1767 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseGetArray()`, `MatDensePlaceArray()`, `VecPlaceArray()`, `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`, `VecResetArray()` 1768 @*/ 1769 PetscErrorCode MatDenseResetArray(Mat mat) 1770 { 1771 PetscFunctionBegin; 1772 PetscValidHeaderSpecific(mat, MAT_CLASSID, 1); 1773 PetscUseMethod(mat, "MatDenseResetArray_C", (Mat), (mat)); 1774 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 1775 PetscFunctionReturn(PETSC_SUCCESS); 1776 } 1777 1778 /*@ 1779 MatDenseReplaceArray - Allows one to replace the array in a dense matrix with an 1780 array provided by the user. This is useful to avoid copying an array 1781 into a matrix 1782 1783 Not Collective 1784 1785 Input Parameters: 1786 + mat - the matrix 1787 - array - the array in column major order 1788 1789 Level: developer 1790 1791 Note: 1792 The memory passed in MUST be obtained with `PetscMalloc()` and CANNOT be 1793 freed by the user. It will be freed when the matrix is destroyed. 1794 1795 .seealso: [](ch_matrices), `Mat`, `MatDensePlaceArray()`, `MatDenseGetArray()`, `VecReplaceArray()` 1796 @*/ 1797 PetscErrorCode MatDenseReplaceArray(Mat mat, const PetscScalar *array) 1798 { 1799 PetscFunctionBegin; 1800 PetscValidHeaderSpecific(mat, MAT_CLASSID, 1); 1801 PetscUseMethod(mat, "MatDenseReplaceArray_C", (Mat, const PetscScalar *), (mat, array)); 1802 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 1803 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 1804 mat->offloadmask = PETSC_OFFLOAD_CPU; 1805 #endif 1806 PetscFunctionReturn(PETSC_SUCCESS); 1807 } 1808 1809 /*@C 1810 MatCreateDense - Creates a matrix in `MATDENSE` format. 1811 1812 Collective 1813 1814 Input Parameters: 1815 + comm - MPI communicator 1816 . m - number of local rows (or `PETSC_DECIDE` to have calculated if `M` is given) 1817 . n - number of local columns (or `PETSC_DECIDE` to have calculated if `N` is given) 1818 . M - number of global rows (or `PETSC_DECIDE` to have calculated if `m` is given) 1819 . N - number of global columns (or `PETSC_DECIDE` to have calculated if `n` is given) 1820 - data - optional location of matrix data. Set data to `NULL` (`PETSC_NULL_SCALAR` for Fortran users) for PETSc 1821 to control all matrix memory allocation. 1822 1823 Output Parameter: 1824 . A - the matrix 1825 1826 Level: intermediate 1827 1828 Notes: 1829 The dense format is fully compatible with standard Fortran 1830 storage by columns. 1831 1832 Although local portions of the matrix are stored in column-major 1833 order, the matrix is partitioned across MPI ranks by row. 1834 1835 The data input variable is intended primarily for Fortran programmers 1836 who wish to allocate their own matrix memory space. Most users should 1837 set `data` to `NULL` (`PETSC_NULL_SCALAR` for Fortran users). 1838 1839 The user MUST specify either the local or global matrix dimensions 1840 (possibly both). 1841 1842 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatCreate()`, `MatCreateSeqDense()`, `MatSetValues()` 1843 @*/ 1844 PetscErrorCode MatCreateDense(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscScalar *data, Mat *A) 1845 { 1846 PetscFunctionBegin; 1847 PetscCall(MatCreate(comm, A)); 1848 PetscCall(MatSetSizes(*A, m, n, M, N)); 1849 PetscCall(MatSetType(*A, MATDENSE)); 1850 PetscCall(MatSeqDenseSetPreallocation(*A, data)); 1851 PetscCall(MatMPIDenseSetPreallocation(*A, data)); 1852 PetscFunctionReturn(PETSC_SUCCESS); 1853 } 1854 1855 static PetscErrorCode MatDuplicate_MPIDense(Mat A, MatDuplicateOption cpvalues, Mat *newmat) 1856 { 1857 Mat mat; 1858 Mat_MPIDense *a, *oldmat = (Mat_MPIDense *)A->data; 1859 1860 PetscFunctionBegin; 1861 *newmat = NULL; 1862 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat)); 1863 PetscCall(MatSetSizes(mat, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 1864 PetscCall(MatSetType(mat, ((PetscObject)A)->type_name)); 1865 a = (Mat_MPIDense *)mat->data; 1866 1867 mat->factortype = A->factortype; 1868 mat->assembled = PETSC_TRUE; 1869 mat->preallocated = PETSC_TRUE; 1870 1871 mat->insertmode = NOT_SET_VALUES; 1872 a->donotstash = oldmat->donotstash; 1873 1874 PetscCall(PetscLayoutReference(A->rmap, &mat->rmap)); 1875 PetscCall(PetscLayoutReference(A->cmap, &mat->cmap)); 1876 1877 PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A)); 1878 1879 *newmat = mat; 1880 PetscFunctionReturn(PETSC_SUCCESS); 1881 } 1882 1883 static PetscErrorCode MatLoad_MPIDense(Mat newMat, PetscViewer viewer) 1884 { 1885 PetscBool isbinary; 1886 #if defined(PETSC_HAVE_HDF5) 1887 PetscBool ishdf5; 1888 #endif 1889 1890 PetscFunctionBegin; 1891 PetscValidHeaderSpecific(newMat, MAT_CLASSID, 1); 1892 PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2); 1893 /* force binary viewer to load .info file if it has not yet done so */ 1894 PetscCall(PetscViewerSetUp(viewer)); 1895 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 1896 #if defined(PETSC_HAVE_HDF5) 1897 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5)); 1898 #endif 1899 if (isbinary) { 1900 PetscCall(MatLoad_Dense_Binary(newMat, viewer)); 1901 #if defined(PETSC_HAVE_HDF5) 1902 } else if (ishdf5) { 1903 PetscCall(MatLoad_Dense_HDF5(newMat, viewer)); 1904 #endif 1905 } else SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name); 1906 PetscFunctionReturn(PETSC_SUCCESS); 1907 } 1908 1909 static PetscErrorCode MatEqual_MPIDense(Mat A, Mat B, PetscBool *flag) 1910 { 1911 Mat_MPIDense *matB = (Mat_MPIDense *)B->data, *matA = (Mat_MPIDense *)A->data; 1912 Mat a, b; 1913 1914 PetscFunctionBegin; 1915 a = matA->A; 1916 b = matB->A; 1917 PetscCall(MatEqual(a, b, flag)); 1918 PetscCall(MPIU_Allreduce(MPI_IN_PLACE, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A))); 1919 PetscFunctionReturn(PETSC_SUCCESS); 1920 } 1921 1922 static PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(void *data) 1923 { 1924 Mat_TransMatMultDense *atb = (Mat_TransMatMultDense *)data; 1925 1926 PetscFunctionBegin; 1927 PetscCall(PetscFree2(atb->sendbuf, atb->recvcounts)); 1928 PetscCall(MatDestroy(&atb->atb)); 1929 PetscCall(PetscFree(atb)); 1930 PetscFunctionReturn(PETSC_SUCCESS); 1931 } 1932 1933 static PetscErrorCode MatDestroy_MatMatTransMult_MPIDense_MPIDense(void *data) 1934 { 1935 Mat_MatTransMultDense *abt = (Mat_MatTransMultDense *)data; 1936 1937 PetscFunctionBegin; 1938 PetscCall(PetscFree2(abt->buf[0], abt->buf[1])); 1939 PetscCall(PetscFree2(abt->recvcounts, abt->recvdispls)); 1940 PetscCall(PetscFree(abt)); 1941 PetscFunctionReturn(PETSC_SUCCESS); 1942 } 1943 1944 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C) 1945 { 1946 Mat_MPIDense *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data; 1947 Mat_TransMatMultDense *atb; 1948 MPI_Comm comm; 1949 PetscMPIInt size, *recvcounts; 1950 PetscScalar *carray, *sendbuf; 1951 const PetscScalar *atbarray; 1952 PetscInt i, cN = C->cmap->N, proc, k, j, lda; 1953 const PetscInt *ranges; 1954 1955 PetscFunctionBegin; 1956 MatCheckProduct(C, 3); 1957 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 1958 atb = (Mat_TransMatMultDense *)C->product->data; 1959 recvcounts = atb->recvcounts; 1960 sendbuf = atb->sendbuf; 1961 1962 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 1963 PetscCallMPI(MPI_Comm_size(comm, &size)); 1964 1965 /* compute atbarray = aseq^T * bseq */ 1966 PetscCall(MatTransposeMatMult(a->A, b->A, atb->atb ? MAT_REUSE_MATRIX : MAT_INITIAL_MATRIX, PETSC_DEFAULT, &atb->atb)); 1967 1968 PetscCall(MatGetOwnershipRanges(C, &ranges)); 1969 1970 /* arrange atbarray into sendbuf */ 1971 PetscCall(MatDenseGetArrayRead(atb->atb, &atbarray)); 1972 PetscCall(MatDenseGetLDA(atb->atb, &lda)); 1973 for (proc = 0, k = 0; proc < size; proc++) { 1974 for (j = 0; j < cN; j++) { 1975 for (i = ranges[proc]; i < ranges[proc + 1]; i++) sendbuf[k++] = atbarray[i + j * lda]; 1976 } 1977 } 1978 PetscCall(MatDenseRestoreArrayRead(atb->atb, &atbarray)); 1979 1980 /* sum all atbarray to local values of C */ 1981 PetscCall(MatDenseGetArrayWrite(c->A, &carray)); 1982 PetscCallMPI(MPI_Reduce_scatter(sendbuf, carray, recvcounts, MPIU_SCALAR, MPIU_SUM, comm)); 1983 PetscCall(MatDenseRestoreArrayWrite(c->A, &carray)); 1984 PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY)); 1985 PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY)); 1986 PetscFunctionReturn(PETSC_SUCCESS); 1987 } 1988 1989 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C) 1990 { 1991 MPI_Comm comm; 1992 PetscMPIInt size; 1993 PetscInt cm = A->cmap->n, cM, cN = B->cmap->N; 1994 Mat_TransMatMultDense *atb; 1995 PetscBool cisdense = PETSC_FALSE; 1996 PetscInt i; 1997 const PetscInt *ranges; 1998 1999 PetscFunctionBegin; 2000 MatCheckProduct(C, 4); 2001 PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty"); 2002 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 2003 if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) { 2004 SETERRQ(comm, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend); 2005 } 2006 2007 /* create matrix product C */ 2008 PetscCall(MatSetSizes(C, cm, B->cmap->n, A->cmap->N, B->cmap->N)); 2009 #if defined(PETSC_HAVE_CUDA) 2010 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSECUDA, "")); 2011 #endif 2012 #if defined(PETSC_HAVE_HIP) 2013 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSEHIP, "")); 2014 #endif 2015 if (!cisdense) PetscCall(MatSetType(C, ((PetscObject)A)->type_name)); 2016 PetscCall(MatSetUp(C)); 2017 2018 /* create data structure for reuse C */ 2019 PetscCallMPI(MPI_Comm_size(comm, &size)); 2020 PetscCall(PetscNew(&atb)); 2021 cM = C->rmap->N; 2022 PetscCall(PetscMalloc2(cM * cN, &atb->sendbuf, size, &atb->recvcounts)); 2023 PetscCall(MatGetOwnershipRanges(C, &ranges)); 2024 for (i = 0; i < size; i++) atb->recvcounts[i] = (ranges[i + 1] - ranges[i]) * cN; 2025 2026 C->product->data = atb; 2027 C->product->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense; 2028 PetscFunctionReturn(PETSC_SUCCESS); 2029 } 2030 2031 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C) 2032 { 2033 MPI_Comm comm; 2034 PetscMPIInt i, size; 2035 PetscInt maxRows, bufsiz; 2036 PetscMPIInt tag; 2037 PetscInt alg; 2038 Mat_MatTransMultDense *abt; 2039 Mat_Product *product = C->product; 2040 PetscBool flg; 2041 2042 PetscFunctionBegin; 2043 MatCheckProduct(C, 4); 2044 PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty"); 2045 /* check local size of A and B */ 2046 PetscCheck(A->cmap->n == B->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local column dimensions are incompatible, A (%" PetscInt_FMT ") != B (%" PetscInt_FMT ")", A->cmap->n, B->cmap->n); 2047 2048 PetscCall(PetscStrcmp(product->alg, "allgatherv", &flg)); 2049 alg = flg ? 0 : 1; 2050 2051 /* setup matrix product C */ 2052 PetscCall(MatSetSizes(C, A->rmap->n, B->rmap->n, A->rmap->N, B->rmap->N)); 2053 PetscCall(MatSetType(C, MATMPIDENSE)); 2054 PetscCall(MatSetUp(C)); 2055 PetscCall(PetscObjectGetNewTag((PetscObject)C, &tag)); 2056 2057 /* create data structure for reuse C */ 2058 PetscCall(PetscObjectGetComm((PetscObject)C, &comm)); 2059 PetscCallMPI(MPI_Comm_size(comm, &size)); 2060 PetscCall(PetscNew(&abt)); 2061 abt->tag = tag; 2062 abt->alg = alg; 2063 switch (alg) { 2064 case 1: /* alg: "cyclic" */ 2065 for (maxRows = 0, i = 0; i < size; i++) maxRows = PetscMax(maxRows, (B->rmap->range[i + 1] - B->rmap->range[i])); 2066 bufsiz = A->cmap->N * maxRows; 2067 PetscCall(PetscMalloc2(bufsiz, &(abt->buf[0]), bufsiz, &(abt->buf[1]))); 2068 break; 2069 default: /* alg: "allgatherv" */ 2070 PetscCall(PetscMalloc2(B->rmap->n * B->cmap->N, &(abt->buf[0]), B->rmap->N * B->cmap->N, &(abt->buf[1]))); 2071 PetscCall(PetscMalloc2(size, &(abt->recvcounts), size + 1, &(abt->recvdispls))); 2072 for (i = 0; i <= size; i++) abt->recvdispls[i] = B->rmap->range[i] * A->cmap->N; 2073 for (i = 0; i < size; i++) abt->recvcounts[i] = abt->recvdispls[i + 1] - abt->recvdispls[i]; 2074 break; 2075 } 2076 2077 C->product->data = abt; 2078 C->product->destroy = MatDestroy_MatMatTransMult_MPIDense_MPIDense; 2079 C->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_MPIDense_MPIDense; 2080 PetscFunctionReturn(PETSC_SUCCESS); 2081 } 2082 2083 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(Mat A, Mat B, Mat C) 2084 { 2085 Mat_MPIDense *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data; 2086 Mat_MatTransMultDense *abt; 2087 MPI_Comm comm; 2088 PetscMPIInt rank, size, sendsiz, recvsiz, sendto, recvfrom, recvisfrom; 2089 PetscScalar *sendbuf, *recvbuf = NULL, *cv; 2090 PetscInt i, cK = A->cmap->N, k, j, bn; 2091 PetscScalar _DOne = 1.0, _DZero = 0.0; 2092 const PetscScalar *av, *bv; 2093 PetscBLASInt cm, cn, ck, alda, blda = 0, clda; 2094 MPI_Request reqs[2]; 2095 const PetscInt *ranges; 2096 2097 PetscFunctionBegin; 2098 MatCheckProduct(C, 3); 2099 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 2100 abt = (Mat_MatTransMultDense *)C->product->data; 2101 PetscCall(PetscObjectGetComm((PetscObject)C, &comm)); 2102 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 2103 PetscCallMPI(MPI_Comm_size(comm, &size)); 2104 PetscCall(MatDenseGetArrayRead(a->A, &av)); 2105 PetscCall(MatDenseGetArrayRead(b->A, &bv)); 2106 PetscCall(MatDenseGetArrayWrite(c->A, &cv)); 2107 PetscCall(MatDenseGetLDA(a->A, &i)); 2108 PetscCall(PetscBLASIntCast(i, &alda)); 2109 PetscCall(MatDenseGetLDA(b->A, &i)); 2110 PetscCall(PetscBLASIntCast(i, &blda)); 2111 PetscCall(MatDenseGetLDA(c->A, &i)); 2112 PetscCall(PetscBLASIntCast(i, &clda)); 2113 PetscCall(MatGetOwnershipRanges(B, &ranges)); 2114 bn = B->rmap->n; 2115 if (blda == bn) { 2116 sendbuf = (PetscScalar *)bv; 2117 } else { 2118 sendbuf = abt->buf[0]; 2119 for (k = 0, i = 0; i < cK; i++) { 2120 for (j = 0; j < bn; j++, k++) sendbuf[k] = bv[i * blda + j]; 2121 } 2122 } 2123 if (size > 1) { 2124 sendto = (rank + size - 1) % size; 2125 recvfrom = (rank + size + 1) % size; 2126 } else { 2127 sendto = recvfrom = 0; 2128 } 2129 PetscCall(PetscBLASIntCast(cK, &ck)); 2130 PetscCall(PetscBLASIntCast(c->A->rmap->n, &cm)); 2131 recvisfrom = rank; 2132 for (i = 0; i < size; i++) { 2133 /* we have finished receiving in sending, bufs can be read/modified */ 2134 PetscInt nextrecvisfrom = (recvisfrom + 1) % size; /* which process the next recvbuf will originate on */ 2135 PetscInt nextbn = ranges[nextrecvisfrom + 1] - ranges[nextrecvisfrom]; 2136 2137 if (nextrecvisfrom != rank) { 2138 /* start the cyclic sends from sendbuf, to recvbuf (which will switch to sendbuf) */ 2139 sendsiz = cK * bn; 2140 recvsiz = cK * nextbn; 2141 recvbuf = (i & 1) ? abt->buf[0] : abt->buf[1]; 2142 PetscCallMPI(MPI_Isend(sendbuf, sendsiz, MPIU_SCALAR, sendto, abt->tag, comm, &reqs[0])); 2143 PetscCallMPI(MPI_Irecv(recvbuf, recvsiz, MPIU_SCALAR, recvfrom, abt->tag, comm, &reqs[1])); 2144 } 2145 2146 /* local aseq * sendbuf^T */ 2147 PetscCall(PetscBLASIntCast(ranges[recvisfrom + 1] - ranges[recvisfrom], &cn)); 2148 if (cm && cn && ck) PetscCallBLAS("BLASgemm", BLASgemm_("N", "T", &cm, &cn, &ck, &_DOne, av, &alda, sendbuf, &cn, &_DZero, cv + clda * ranges[recvisfrom], &clda)); 2149 2150 if (nextrecvisfrom != rank) { 2151 /* wait for the sends and receives to complete, swap sendbuf and recvbuf */ 2152 PetscCallMPI(MPI_Waitall(2, reqs, MPI_STATUSES_IGNORE)); 2153 } 2154 bn = nextbn; 2155 recvisfrom = nextrecvisfrom; 2156 sendbuf = recvbuf; 2157 } 2158 PetscCall(MatDenseRestoreArrayRead(a->A, &av)); 2159 PetscCall(MatDenseRestoreArrayRead(b->A, &bv)); 2160 PetscCall(MatDenseRestoreArrayWrite(c->A, &cv)); 2161 PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY)); 2162 PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY)); 2163 PetscFunctionReturn(PETSC_SUCCESS); 2164 } 2165 2166 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(Mat A, Mat B, Mat C) 2167 { 2168 Mat_MPIDense *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data; 2169 Mat_MatTransMultDense *abt; 2170 MPI_Comm comm; 2171 PetscMPIInt size; 2172 PetscScalar *cv, *sendbuf, *recvbuf; 2173 const PetscScalar *av, *bv; 2174 PetscInt blda, i, cK = A->cmap->N, k, j, bn; 2175 PetscScalar _DOne = 1.0, _DZero = 0.0; 2176 PetscBLASInt cm, cn, ck, alda, clda; 2177 2178 PetscFunctionBegin; 2179 MatCheckProduct(C, 3); 2180 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 2181 abt = (Mat_MatTransMultDense *)C->product->data; 2182 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 2183 PetscCallMPI(MPI_Comm_size(comm, &size)); 2184 PetscCall(MatDenseGetArrayRead(a->A, &av)); 2185 PetscCall(MatDenseGetArrayRead(b->A, &bv)); 2186 PetscCall(MatDenseGetArrayWrite(c->A, &cv)); 2187 PetscCall(MatDenseGetLDA(a->A, &i)); 2188 PetscCall(PetscBLASIntCast(i, &alda)); 2189 PetscCall(MatDenseGetLDA(b->A, &blda)); 2190 PetscCall(MatDenseGetLDA(c->A, &i)); 2191 PetscCall(PetscBLASIntCast(i, &clda)); 2192 /* copy transpose of B into buf[0] */ 2193 bn = B->rmap->n; 2194 sendbuf = abt->buf[0]; 2195 recvbuf = abt->buf[1]; 2196 for (k = 0, j = 0; j < bn; j++) { 2197 for (i = 0; i < cK; i++, k++) sendbuf[k] = bv[i * blda + j]; 2198 } 2199 PetscCall(MatDenseRestoreArrayRead(b->A, &bv)); 2200 PetscCallMPI(MPI_Allgatherv(sendbuf, bn * cK, MPIU_SCALAR, recvbuf, abt->recvcounts, abt->recvdispls, MPIU_SCALAR, comm)); 2201 PetscCall(PetscBLASIntCast(cK, &ck)); 2202 PetscCall(PetscBLASIntCast(c->A->rmap->n, &cm)); 2203 PetscCall(PetscBLASIntCast(c->A->cmap->n, &cn)); 2204 if (cm && cn && ck) PetscCallBLAS("BLASgemm", BLASgemm_("N", "N", &cm, &cn, &ck, &_DOne, av, &alda, recvbuf, &ck, &_DZero, cv, &clda)); 2205 PetscCall(MatDenseRestoreArrayRead(a->A, &av)); 2206 PetscCall(MatDenseRestoreArrayRead(b->A, &bv)); 2207 PetscCall(MatDenseRestoreArrayWrite(c->A, &cv)); 2208 PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY)); 2209 PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY)); 2210 PetscFunctionReturn(PETSC_SUCCESS); 2211 } 2212 2213 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C) 2214 { 2215 Mat_MatTransMultDense *abt; 2216 2217 PetscFunctionBegin; 2218 MatCheckProduct(C, 3); 2219 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 2220 abt = (Mat_MatTransMultDense *)C->product->data; 2221 switch (abt->alg) { 2222 case 1: 2223 PetscCall(MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(A, B, C)); 2224 break; 2225 default: 2226 PetscCall(MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(A, B, C)); 2227 break; 2228 } 2229 PetscFunctionReturn(PETSC_SUCCESS); 2230 } 2231 2232 #if defined(PETSC_HAVE_ELEMENTAL) 2233 static PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(void *data) 2234 { 2235 Mat_MatMultDense *ab = (Mat_MatMultDense *)data; 2236 2237 PetscFunctionBegin; 2238 PetscCall(MatDestroy(&ab->Ce)); 2239 PetscCall(MatDestroy(&ab->Ae)); 2240 PetscCall(MatDestroy(&ab->Be)); 2241 PetscCall(PetscFree(ab)); 2242 PetscFunctionReturn(PETSC_SUCCESS); 2243 } 2244 2245 PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C) 2246 { 2247 Mat_MatMultDense *ab; 2248 2249 PetscFunctionBegin; 2250 MatCheckProduct(C, 3); 2251 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Missing product data"); 2252 ab = (Mat_MatMultDense *)C->product->data; 2253 PetscCall(MatConvert_MPIDense_Elemental(A, MATELEMENTAL, MAT_REUSE_MATRIX, &ab->Ae)); 2254 PetscCall(MatConvert_MPIDense_Elemental(B, MATELEMENTAL, MAT_REUSE_MATRIX, &ab->Be)); 2255 PetscCall(MatMatMultNumeric_Elemental(ab->Ae, ab->Be, ab->Ce)); 2256 PetscCall(MatConvert(ab->Ce, MATMPIDENSE, MAT_REUSE_MATRIX, &C)); 2257 PetscFunctionReturn(PETSC_SUCCESS); 2258 } 2259 2260 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C) 2261 { 2262 Mat Ae, Be, Ce; 2263 Mat_MatMultDense *ab; 2264 2265 PetscFunctionBegin; 2266 MatCheckProduct(C, 4); 2267 PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty"); 2268 /* check local size of A and B */ 2269 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 2270 SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend); 2271 } 2272 2273 /* create elemental matrices Ae and Be */ 2274 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &Ae)); 2275 PetscCall(MatSetSizes(Ae, PETSC_DECIDE, PETSC_DECIDE, A->rmap->N, A->cmap->N)); 2276 PetscCall(MatSetType(Ae, MATELEMENTAL)); 2277 PetscCall(MatSetUp(Ae)); 2278 PetscCall(MatSetOption(Ae, MAT_ROW_ORIENTED, PETSC_FALSE)); 2279 2280 PetscCall(MatCreate(PetscObjectComm((PetscObject)B), &Be)); 2281 PetscCall(MatSetSizes(Be, PETSC_DECIDE, PETSC_DECIDE, B->rmap->N, B->cmap->N)); 2282 PetscCall(MatSetType(Be, MATELEMENTAL)); 2283 PetscCall(MatSetUp(Be)); 2284 PetscCall(MatSetOption(Be, MAT_ROW_ORIENTED, PETSC_FALSE)); 2285 2286 /* compute symbolic Ce = Ae*Be */ 2287 PetscCall(MatCreate(PetscObjectComm((PetscObject)C), &Ce)); 2288 PetscCall(MatMatMultSymbolic_Elemental(Ae, Be, fill, Ce)); 2289 2290 /* setup C */ 2291 PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, PETSC_DECIDE, PETSC_DECIDE)); 2292 PetscCall(MatSetType(C, MATDENSE)); 2293 PetscCall(MatSetUp(C)); 2294 2295 /* create data structure for reuse Cdense */ 2296 PetscCall(PetscNew(&ab)); 2297 ab->Ae = Ae; 2298 ab->Be = Be; 2299 ab->Ce = Ce; 2300 2301 C->product->data = ab; 2302 C->product->destroy = MatDestroy_MatMatMult_MPIDense_MPIDense; 2303 C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense; 2304 PetscFunctionReturn(PETSC_SUCCESS); 2305 } 2306 #endif 2307 2308 #if defined(PETSC_HAVE_ELEMENTAL) 2309 static PetscErrorCode MatProductSetFromOptions_MPIDense_AB(Mat C) 2310 { 2311 PetscFunctionBegin; 2312 C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIDense; 2313 C->ops->productsymbolic = MatProductSymbolic_AB; 2314 PetscFunctionReturn(PETSC_SUCCESS); 2315 } 2316 #endif 2317 2318 static PetscErrorCode MatProductSetFromOptions_MPIDense_AtB(Mat C) 2319 { 2320 Mat_Product *product = C->product; 2321 Mat A = product->A, B = product->B; 2322 2323 PetscFunctionBegin; 2324 if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) 2325 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend); 2326 C->ops->transposematmultsymbolic = MatTransposeMatMultSymbolic_MPIDense_MPIDense; 2327 C->ops->productsymbolic = MatProductSymbolic_AtB; 2328 PetscFunctionReturn(PETSC_SUCCESS); 2329 } 2330 2331 static PetscErrorCode MatProductSetFromOptions_MPIDense_ABt(Mat C) 2332 { 2333 Mat_Product *product = C->product; 2334 const char *algTypes[2] = {"allgatherv", "cyclic"}; 2335 PetscInt alg, nalg = 2; 2336 PetscBool flg = PETSC_FALSE; 2337 2338 PetscFunctionBegin; 2339 /* Set default algorithm */ 2340 alg = 0; /* default is allgatherv */ 2341 PetscCall(PetscStrcmp(product->alg, "default", &flg)); 2342 if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg])); 2343 2344 /* Get runtime option */ 2345 if (product->api_user) { 2346 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatMatTransposeMult", "Mat"); 2347 PetscCall(PetscOptionsEList("-matmattransmult_mpidense_mpidense_via", "Algorithmic approach", "MatMatTransposeMult", algTypes, nalg, algTypes[alg], &alg, &flg)); 2348 PetscOptionsEnd(); 2349 } else { 2350 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_ABt", "Mat"); 2351 PetscCall(PetscOptionsEList("-mat_product_algorithm", "Algorithmic approach", "MatProduct_ABt", algTypes, nalg, algTypes[alg], &alg, &flg)); 2352 PetscOptionsEnd(); 2353 } 2354 if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg])); 2355 2356 C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_MPIDense_MPIDense; 2357 C->ops->productsymbolic = MatProductSymbolic_ABt; 2358 PetscFunctionReturn(PETSC_SUCCESS); 2359 } 2360 2361 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense(Mat C) 2362 { 2363 Mat_Product *product = C->product; 2364 2365 PetscFunctionBegin; 2366 switch (product->type) { 2367 #if defined(PETSC_HAVE_ELEMENTAL) 2368 case MATPRODUCT_AB: 2369 PetscCall(MatProductSetFromOptions_MPIDense_AB(C)); 2370 break; 2371 #endif 2372 case MATPRODUCT_AtB: 2373 PetscCall(MatProductSetFromOptions_MPIDense_AtB(C)); 2374 break; 2375 case MATPRODUCT_ABt: 2376 PetscCall(MatProductSetFromOptions_MPIDense_ABt(C)); 2377 break; 2378 default: 2379 break; 2380 } 2381 PetscFunctionReturn(PETSC_SUCCESS); 2382 } 2383