1 /* 2 Basic functions for basic parallel dense matrices. 3 Portions of this code are under: 4 Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. 5 */ 6 7 #include <../src/mat/impls/dense/mpi/mpidense.h> /*I "petscmat.h" I*/ 8 #include <../src/mat/impls/aij/mpi/mpiaij.h> 9 #include <petscblaslapack.h> 10 11 /*@ 12 MatDenseGetLocalMatrix - For a `MATMPIDENSE` or `MATSEQDENSE` matrix returns the sequential 13 matrix that represents the operator. For sequential matrices it returns itself. 14 15 Input Parameter: 16 . A - the sequential or MPI `MATDENSE` matrix 17 18 Output Parameter: 19 . B - the inner matrix 20 21 Level: intermediate 22 23 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MATMPIDENSE`, `MATSEQDENSE` 24 @*/ 25 PetscErrorCode MatDenseGetLocalMatrix(Mat A, Mat *B) 26 { 27 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 28 PetscBool flg; 29 30 PetscFunctionBegin; 31 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 32 PetscAssertPointer(B, 2); 33 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIDENSE, &flg)); 34 if (flg) *B = mat->A; 35 else { 36 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQDENSE, &flg)); 37 PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Not for matrix type %s", ((PetscObject)A)->type_name); 38 *B = A; 39 } 40 PetscFunctionReturn(PETSC_SUCCESS); 41 } 42 43 static PetscErrorCode MatCopy_MPIDense(Mat A, Mat B, MatStructure s) 44 { 45 Mat_MPIDense *Amat = (Mat_MPIDense *)A->data; 46 Mat_MPIDense *Bmat = (Mat_MPIDense *)B->data; 47 48 PetscFunctionBegin; 49 PetscCall(MatCopy(Amat->A, Bmat->A, s)); 50 PetscFunctionReturn(PETSC_SUCCESS); 51 } 52 53 PetscErrorCode MatShift_MPIDense(Mat A, PetscScalar alpha) 54 { 55 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 56 PetscInt j, lda, rstart = A->rmap->rstart, rend = A->rmap->rend, rend2; 57 PetscScalar *v; 58 59 PetscFunctionBegin; 60 PetscCall(MatDenseGetArray(mat->A, &v)); 61 PetscCall(MatDenseGetLDA(mat->A, &lda)); 62 rend2 = PetscMin(rend, A->cmap->N); 63 if (rend2 > rstart) { 64 for (j = rstart; j < rend2; j++) v[j - rstart + j * lda] += alpha; 65 PetscCall(PetscLogFlops(rend2 - rstart)); 66 } 67 PetscCall(MatDenseRestoreArray(mat->A, &v)); 68 PetscFunctionReturn(PETSC_SUCCESS); 69 } 70 71 static PetscErrorCode MatGetRow_MPIDense(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 72 { 73 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 74 PetscInt lrow, rstart = A->rmap->rstart, rend = A->rmap->rend; 75 76 PetscFunctionBegin; 77 PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "only local rows"); 78 lrow = row - rstart; 79 PetscCall(MatGetRow(mat->A, lrow, nz, (const PetscInt **)idx, (const PetscScalar **)v)); 80 PetscFunctionReturn(PETSC_SUCCESS); 81 } 82 83 static PetscErrorCode MatRestoreRow_MPIDense(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 84 { 85 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 86 PetscInt lrow, rstart = A->rmap->rstart, rend = A->rmap->rend; 87 88 PetscFunctionBegin; 89 PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "only local rows"); 90 lrow = row - rstart; 91 PetscCall(MatRestoreRow(mat->A, lrow, nz, (const PetscInt **)idx, (const PetscScalar **)v)); 92 PetscFunctionReturn(PETSC_SUCCESS); 93 } 94 95 static PetscErrorCode MatGetDiagonalBlock_MPIDense(Mat A, Mat *a) 96 { 97 Mat_MPIDense *mdn = (Mat_MPIDense *)A->data; 98 PetscInt m = A->rmap->n, rstart = A->rmap->rstart; 99 PetscScalar *array; 100 MPI_Comm comm; 101 PetscBool flg; 102 Mat B; 103 104 PetscFunctionBegin; 105 PetscCall(MatHasCongruentLayouts(A, &flg)); 106 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only square matrices supported."); 107 PetscCall(PetscObjectQuery((PetscObject)A, "DiagonalBlock", (PetscObject *)&B)); 108 if (!B) { /* This should use MatDenseGetSubMatrix (not create), but we would need a call like MatRestoreDiagonalBlock */ 109 #if PetscDefined(HAVE_CUDA) 110 PetscCall(PetscObjectTypeCompare((PetscObject)mdn->A, MATSEQDENSECUDA, &flg)); 111 PetscCheck(!flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature", MATSEQDENSECUDA); 112 #elif PetscDefined(HAVE_HIP) 113 PetscCall(PetscObjectTypeCompare((PetscObject)mdn->A, MATSEQDENSEHIP, &flg)); 114 PetscCheck(!flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature", MATSEQDENSEHIP); 115 #endif 116 PetscCall(PetscObjectGetComm((PetscObject)(mdn->A), &comm)); 117 PetscCall(MatCreate(comm, &B)); 118 PetscCall(MatSetSizes(B, m, m, m, m)); 119 PetscCall(MatSetType(B, ((PetscObject)mdn->A)->type_name)); 120 PetscCall(MatDenseGetArrayRead(mdn->A, (const PetscScalar **)&array)); 121 PetscCall(MatSeqDenseSetPreallocation(B, array + m * rstart)); 122 PetscCall(MatDenseRestoreArrayRead(mdn->A, (const PetscScalar **)&array)); 123 PetscCall(PetscObjectCompose((PetscObject)A, "DiagonalBlock", (PetscObject)B)); 124 *a = B; 125 PetscCall(MatDestroy(&B)); 126 } else *a = B; 127 PetscFunctionReturn(PETSC_SUCCESS); 128 } 129 130 static PetscErrorCode MatSetValues_MPIDense(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], const PetscScalar v[], InsertMode addv) 131 { 132 Mat_MPIDense *A = (Mat_MPIDense *)mat->data; 133 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend, row; 134 PetscBool roworiented = A->roworiented; 135 136 PetscFunctionBegin; 137 for (i = 0; i < m; i++) { 138 if (idxm[i] < 0) continue; 139 PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large"); 140 if (idxm[i] >= rstart && idxm[i] < rend) { 141 row = idxm[i] - rstart; 142 if (roworiented) { 143 PetscCall(MatSetValues(A->A, 1, &row, n, idxn, v + i * n, addv)); 144 } else { 145 for (j = 0; j < n; j++) { 146 if (idxn[j] < 0) continue; 147 PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large"); 148 PetscCall(MatSetValues(A->A, 1, &row, 1, &idxn[j], v + i + j * m, addv)); 149 } 150 } 151 } else if (!A->donotstash) { 152 mat->assembled = PETSC_FALSE; 153 if (roworiented) { 154 PetscCall(MatStashValuesRow_Private(&mat->stash, idxm[i], n, idxn, v + i * n, PETSC_FALSE)); 155 } else { 156 PetscCall(MatStashValuesCol_Private(&mat->stash, idxm[i], n, idxn, v + i, m, PETSC_FALSE)); 157 } 158 } 159 } 160 PetscFunctionReturn(PETSC_SUCCESS); 161 } 162 163 static PetscErrorCode MatGetValues_MPIDense(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[]) 164 { 165 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 166 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend, row; 167 168 PetscFunctionBegin; 169 for (i = 0; i < m; i++) { 170 if (idxm[i] < 0) continue; /* negative row */ 171 PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large"); 172 if (idxm[i] >= rstart && idxm[i] < rend) { 173 row = idxm[i] - rstart; 174 for (j = 0; j < n; j++) { 175 if (idxn[j] < 0) continue; /* negative column */ 176 PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large"); 177 PetscCall(MatGetValues(mdn->A, 1, &row, 1, &idxn[j], v + i * n + j)); 178 } 179 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported"); 180 } 181 PetscFunctionReturn(PETSC_SUCCESS); 182 } 183 184 static PetscErrorCode MatDenseGetLDA_MPIDense(Mat A, PetscInt *lda) 185 { 186 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 187 188 PetscFunctionBegin; 189 PetscCall(MatDenseGetLDA(a->A, lda)); 190 PetscFunctionReturn(PETSC_SUCCESS); 191 } 192 193 static PetscErrorCode MatDenseSetLDA_MPIDense(Mat A, PetscInt lda) 194 { 195 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 196 MatType mtype = MATSEQDENSE; 197 198 PetscFunctionBegin; 199 if (!a->A) { 200 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 201 PetscCall(PetscLayoutSetUp(A->rmap)); 202 PetscCall(PetscLayoutSetUp(A->cmap)); 203 PetscCall(MatCreate(PETSC_COMM_SELF, &a->A)); 204 PetscCall(MatSetSizes(a->A, A->rmap->n, A->cmap->N, A->rmap->n, A->cmap->N)); 205 #if PetscDefined(HAVE_CUDA) 206 PetscBool iscuda; 207 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIDENSECUDA, &iscuda)); 208 if (iscuda) mtype = MATSEQDENSECUDA; 209 #elif PetscDefined(HAVE_HIP) 210 PetscBool iship; 211 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIDENSEHIP, &iship)); 212 if (iship) mtype = MATSEQDENSEHIP; 213 #endif 214 PetscCall(MatSetType(a->A, mtype)); 215 } 216 PetscCall(MatDenseSetLDA(a->A, lda)); 217 PetscFunctionReturn(PETSC_SUCCESS); 218 } 219 220 static PetscErrorCode MatDenseGetArray_MPIDense(Mat A, PetscScalar **array) 221 { 222 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 223 224 PetscFunctionBegin; 225 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 226 PetscCall(MatDenseGetArray(a->A, array)); 227 PetscFunctionReturn(PETSC_SUCCESS); 228 } 229 230 static PetscErrorCode MatDenseGetArrayRead_MPIDense(Mat A, const PetscScalar **array) 231 { 232 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 233 234 PetscFunctionBegin; 235 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 236 PetscCall(MatDenseGetArrayRead(a->A, array)); 237 PetscFunctionReturn(PETSC_SUCCESS); 238 } 239 240 static PetscErrorCode MatDenseGetArrayWrite_MPIDense(Mat A, PetscScalar **array) 241 { 242 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 243 244 PetscFunctionBegin; 245 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 246 PetscCall(MatDenseGetArrayWrite(a->A, array)); 247 PetscFunctionReturn(PETSC_SUCCESS); 248 } 249 250 static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A, const PetscScalar *array) 251 { 252 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 253 254 PetscFunctionBegin; 255 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 256 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 257 PetscCall(MatDensePlaceArray(a->A, array)); 258 PetscFunctionReturn(PETSC_SUCCESS); 259 } 260 261 static PetscErrorCode MatDenseResetArray_MPIDense(Mat A) 262 { 263 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 264 265 PetscFunctionBegin; 266 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 267 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 268 PetscCall(MatDenseResetArray(a->A)); 269 PetscFunctionReturn(PETSC_SUCCESS); 270 } 271 272 static PetscErrorCode MatDenseReplaceArray_MPIDense(Mat A, const PetscScalar *array) 273 { 274 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 275 276 PetscFunctionBegin; 277 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 278 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 279 PetscCall(MatDenseReplaceArray(a->A, array)); 280 PetscFunctionReturn(PETSC_SUCCESS); 281 } 282 283 static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A, IS isrow, IS iscol, MatReuse scall, Mat *B) 284 { 285 Mat_MPIDense *mat = (Mat_MPIDense *)A->data, *newmatd; 286 PetscInt lda, i, j, rstart, rend, nrows, ncols, Ncols, nlrows, nlcols; 287 const PetscInt *irow, *icol; 288 const PetscScalar *v; 289 PetscScalar *bv; 290 Mat newmat; 291 IS iscol_local; 292 MPI_Comm comm_is, comm_mat; 293 294 PetscFunctionBegin; 295 PetscCall(PetscObjectGetComm((PetscObject)A, &comm_mat)); 296 PetscCall(PetscObjectGetComm((PetscObject)iscol, &comm_is)); 297 PetscCheck(comm_mat == comm_is, PETSC_COMM_SELF, PETSC_ERR_ARG_NOTSAMECOMM, "IS communicator must match matrix communicator"); 298 299 PetscCall(ISAllGather(iscol, &iscol_local)); 300 PetscCall(ISGetIndices(isrow, &irow)); 301 PetscCall(ISGetIndices(iscol_local, &icol)); 302 PetscCall(ISGetLocalSize(isrow, &nrows)); 303 PetscCall(ISGetLocalSize(iscol, &ncols)); 304 PetscCall(ISGetSize(iscol, &Ncols)); /* global number of columns, size of iscol_local */ 305 306 /* No parallel redistribution currently supported! Should really check each index set 307 to confirm that it is OK. ... Currently supports only submatrix same partitioning as 308 original matrix! */ 309 310 PetscCall(MatGetLocalSize(A, &nlrows, &nlcols)); 311 PetscCall(MatGetOwnershipRange(A, &rstart, &rend)); 312 313 /* Check submatrix call */ 314 if (scall == MAT_REUSE_MATRIX) { 315 /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */ 316 /* Really need to test rows and column sizes! */ 317 newmat = *B; 318 } else { 319 /* Create and fill new matrix */ 320 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &newmat)); 321 PetscCall(MatSetSizes(newmat, nrows, ncols, PETSC_DECIDE, Ncols)); 322 PetscCall(MatSetType(newmat, ((PetscObject)A)->type_name)); 323 PetscCall(MatMPIDenseSetPreallocation(newmat, NULL)); 324 } 325 326 /* Now extract the data pointers and do the copy, column at a time */ 327 newmatd = (Mat_MPIDense *)newmat->data; 328 PetscCall(MatDenseGetArray(newmatd->A, &bv)); 329 PetscCall(MatDenseGetArrayRead(mat->A, &v)); 330 PetscCall(MatDenseGetLDA(mat->A, &lda)); 331 for (i = 0; i < Ncols; i++) { 332 const PetscScalar *av = v + lda * icol[i]; 333 for (j = 0; j < nrows; j++) *bv++ = av[irow[j] - rstart]; 334 } 335 PetscCall(MatDenseRestoreArrayRead(mat->A, &v)); 336 PetscCall(MatDenseRestoreArray(newmatd->A, &bv)); 337 338 /* Assemble the matrices so that the correct flags are set */ 339 PetscCall(MatAssemblyBegin(newmat, MAT_FINAL_ASSEMBLY)); 340 PetscCall(MatAssemblyEnd(newmat, MAT_FINAL_ASSEMBLY)); 341 342 /* Free work space */ 343 PetscCall(ISRestoreIndices(isrow, &irow)); 344 PetscCall(ISRestoreIndices(iscol_local, &icol)); 345 PetscCall(ISDestroy(&iscol_local)); 346 *B = newmat; 347 PetscFunctionReturn(PETSC_SUCCESS); 348 } 349 350 static PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A, PetscScalar **array) 351 { 352 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 353 354 PetscFunctionBegin; 355 PetscCall(MatDenseRestoreArray(a->A, array)); 356 PetscFunctionReturn(PETSC_SUCCESS); 357 } 358 359 static PetscErrorCode MatDenseRestoreArrayRead_MPIDense(Mat A, const PetscScalar **array) 360 { 361 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 362 363 PetscFunctionBegin; 364 PetscCall(MatDenseRestoreArrayRead(a->A, array)); 365 PetscFunctionReturn(PETSC_SUCCESS); 366 } 367 368 static PetscErrorCode MatDenseRestoreArrayWrite_MPIDense(Mat A, PetscScalar **array) 369 { 370 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 371 372 PetscFunctionBegin; 373 PetscCall(MatDenseRestoreArrayWrite(a->A, array)); 374 PetscFunctionReturn(PETSC_SUCCESS); 375 } 376 377 static PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat, MatAssemblyType mode) 378 { 379 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 380 PetscInt nstash, reallocs; 381 382 PetscFunctionBegin; 383 if (mdn->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS); 384 385 PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range)); 386 PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs)); 387 PetscCall(PetscInfo(mdn->A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 388 PetscFunctionReturn(PETSC_SUCCESS); 389 } 390 391 static PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat, MatAssemblyType mode) 392 { 393 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 394 PetscInt i, *row, *col, flg, j, rstart, ncols; 395 PetscMPIInt n; 396 PetscScalar *val; 397 398 PetscFunctionBegin; 399 if (!mdn->donotstash && !mat->nooffprocentries) { 400 /* wait on receives */ 401 while (1) { 402 PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg)); 403 if (!flg) break; 404 405 for (i = 0; i < n;) { 406 /* Now identify the consecutive vals belonging to the same row */ 407 for (j = i, rstart = row[j]; j < n; j++) { 408 if (row[j] != rstart) break; 409 } 410 if (j < n) ncols = j - i; 411 else ncols = n - i; 412 /* Now assemble all these values with a single function call */ 413 PetscCall(MatSetValues_MPIDense(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode)); 414 i = j; 415 } 416 } 417 PetscCall(MatStashScatterEnd_Private(&mat->stash)); 418 } 419 420 PetscCall(MatAssemblyBegin(mdn->A, mode)); 421 PetscCall(MatAssemblyEnd(mdn->A, mode)); 422 PetscFunctionReturn(PETSC_SUCCESS); 423 } 424 425 static PetscErrorCode MatZeroEntries_MPIDense(Mat A) 426 { 427 Mat_MPIDense *l = (Mat_MPIDense *)A->data; 428 429 PetscFunctionBegin; 430 PetscCall(MatZeroEntries(l->A)); 431 PetscFunctionReturn(PETSC_SUCCESS); 432 } 433 434 static PetscErrorCode MatZeroRows_MPIDense(Mat A, PetscInt n, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 435 { 436 Mat_MPIDense *l = (Mat_MPIDense *)A->data; 437 PetscInt i, len, *lrows; 438 439 PetscFunctionBegin; 440 /* get locally owned rows */ 441 PetscCall(PetscLayoutMapLocal(A->rmap, n, rows, &len, &lrows, NULL)); 442 /* fix right hand side if needed */ 443 if (x && b) { 444 const PetscScalar *xx; 445 PetscScalar *bb; 446 447 PetscCall(VecGetArrayRead(x, &xx)); 448 PetscCall(VecGetArrayWrite(b, &bb)); 449 for (i = 0; i < len; ++i) bb[lrows[i]] = diag * xx[lrows[i]]; 450 PetscCall(VecRestoreArrayRead(x, &xx)); 451 PetscCall(VecRestoreArrayWrite(b, &bb)); 452 } 453 PetscCall(MatZeroRows(l->A, len, lrows, 0.0, NULL, NULL)); 454 if (diag != 0.0) { 455 Vec d; 456 457 PetscCall(MatCreateVecs(A, NULL, &d)); 458 PetscCall(VecSet(d, diag)); 459 PetscCall(MatDiagonalSet(A, d, INSERT_VALUES)); 460 PetscCall(VecDestroy(&d)); 461 } 462 PetscCall(PetscFree(lrows)); 463 PetscFunctionReturn(PETSC_SUCCESS); 464 } 465 466 PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat, Vec, Vec); 467 PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat, Vec, Vec, Vec); 468 PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat, Vec, Vec); 469 PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat, Vec, Vec, Vec); 470 471 static PetscErrorCode MatMult_MPIDense(Mat mat, Vec xx, Vec yy) 472 { 473 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 474 const PetscScalar *ax; 475 PetscScalar *ay; 476 PetscMemType axmtype, aymtype; 477 478 PetscFunctionBegin; 479 if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat)); 480 PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype)); 481 PetscCall(VecGetArrayAndMemType(mdn->lvec, &ay, &aymtype)); 482 PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE)); 483 PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE)); 484 PetscCall(VecRestoreArrayAndMemType(mdn->lvec, &ay)); 485 PetscCall(VecRestoreArrayReadAndMemType(xx, &ax)); 486 PetscCall((*mdn->A->ops->mult)(mdn->A, mdn->lvec, yy)); 487 PetscFunctionReturn(PETSC_SUCCESS); 488 } 489 490 static PetscErrorCode MatMultAdd_MPIDense(Mat mat, Vec xx, Vec yy, Vec zz) 491 { 492 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 493 const PetscScalar *ax; 494 PetscScalar *ay; 495 PetscMemType axmtype, aymtype; 496 497 PetscFunctionBegin; 498 if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat)); 499 PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype)); 500 PetscCall(VecGetArrayAndMemType(mdn->lvec, &ay, &aymtype)); 501 PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE)); 502 PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE)); 503 PetscCall(VecRestoreArrayAndMemType(mdn->lvec, &ay)); 504 PetscCall(VecRestoreArrayReadAndMemType(xx, &ax)); 505 PetscCall((*mdn->A->ops->multadd)(mdn->A, mdn->lvec, yy, zz)); 506 PetscFunctionReturn(PETSC_SUCCESS); 507 } 508 509 static PetscErrorCode MatMultTranspose_MPIDense(Mat A, Vec xx, Vec yy) 510 { 511 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 512 const PetscScalar *ax; 513 PetscScalar *ay; 514 PetscMemType axmtype, aymtype; 515 516 PetscFunctionBegin; 517 if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A)); 518 PetscCall(VecSet(yy, 0.0)); 519 PetscCall((*a->A->ops->multtranspose)(a->A, xx, a->lvec)); 520 PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype)); 521 PetscCall(VecGetArrayAndMemType(yy, &ay, &aymtype)); 522 PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM)); 523 PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM)); 524 PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax)); 525 PetscCall(VecRestoreArrayAndMemType(yy, &ay)); 526 PetscFunctionReturn(PETSC_SUCCESS); 527 } 528 529 static PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A, Vec xx, Vec yy, Vec zz) 530 { 531 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 532 const PetscScalar *ax; 533 PetscScalar *ay; 534 PetscMemType axmtype, aymtype; 535 536 PetscFunctionBegin; 537 if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A)); 538 PetscCall(VecCopy(yy, zz)); 539 PetscCall((*a->A->ops->multtranspose)(a->A, xx, a->lvec)); 540 PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype)); 541 PetscCall(VecGetArrayAndMemType(zz, &ay, &aymtype)); 542 PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM)); 543 PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM)); 544 PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax)); 545 PetscCall(VecRestoreArrayAndMemType(zz, &ay)); 546 PetscFunctionReturn(PETSC_SUCCESS); 547 } 548 549 PetscErrorCode MatGetDiagonal_MPIDense(Mat A, Vec v) 550 { 551 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 552 PetscInt lda, len, i, nl, ng, m = A->rmap->n, radd; 553 PetscScalar *x; 554 const PetscScalar *av; 555 556 PetscFunctionBegin; 557 PetscCall(VecGetArray(v, &x)); 558 PetscCall(VecGetSize(v, &ng)); 559 PetscCheck(ng == A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming mat and vec"); 560 PetscCall(VecGetLocalSize(v, &nl)); 561 len = PetscMin(a->A->rmap->n, a->A->cmap->n); 562 radd = A->rmap->rstart * m; 563 PetscCall(MatDenseGetArrayRead(a->A, &av)); 564 PetscCall(MatDenseGetLDA(a->A, &lda)); 565 for (i = 0; i < len; i++) x[i] = av[radd + i * lda + i]; 566 PetscCall(MatDenseRestoreArrayRead(a->A, &av)); 567 PetscCall(PetscArrayzero(x + i, nl - i)); 568 PetscCall(VecRestoreArray(v, &x)); 569 PetscFunctionReturn(PETSC_SUCCESS); 570 } 571 572 static PetscErrorCode MatDestroy_MPIDense(Mat mat) 573 { 574 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 575 576 PetscFunctionBegin; 577 PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N)); 578 PetscCall(MatStashDestroy_Private(&mat->stash)); 579 PetscCheck(!mdn->vecinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 580 PetscCheck(!mdn->matinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 581 PetscCall(MatDestroy(&mdn->A)); 582 PetscCall(VecDestroy(&mdn->lvec)); 583 PetscCall(PetscSFDestroy(&mdn->Mvctx)); 584 PetscCall(VecDestroy(&mdn->cvec)); 585 PetscCall(MatDestroy(&mdn->cmat)); 586 587 PetscCall(PetscFree(mat->data)); 588 PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL)); 589 590 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetLDA_C", NULL)); 591 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseSetLDA_C", NULL)); 592 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArray_C", NULL)); 593 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArray_C", NULL)); 594 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayRead_C", NULL)); 595 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayRead_C", NULL)); 596 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayWrite_C", NULL)); 597 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayWrite_C", NULL)); 598 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDensePlaceArray_C", NULL)); 599 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseResetArray_C", NULL)); 600 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseReplaceArray_C", NULL)); 601 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", NULL)); 602 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpiaij_C", NULL)); 603 #if defined(PETSC_HAVE_ELEMENTAL) 604 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_elemental_C", NULL)); 605 #endif 606 #if defined(PETSC_HAVE_SCALAPACK) 607 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_scalapack_C", NULL)); 608 #endif 609 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIDenseSetPreallocation_C", NULL)); 610 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidense_C", NULL)); 611 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaij_C", NULL)); 612 #if defined(PETSC_HAVE_CUDA) 613 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidense_C", NULL)); 614 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijcusparse_C", NULL)); 615 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensecuda_C", NULL)); 616 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidensecuda_mpidense_C", NULL)); 617 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidensecuda_C", NULL)); 618 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C", NULL)); 619 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensecuda_mpiaij_C", NULL)); 620 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C", NULL)); 621 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArray_C", NULL)); 622 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArrayRead_C", NULL)); 623 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArrayWrite_C", NULL)); 624 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArray_C", NULL)); 625 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArrayRead_C", NULL)); 626 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArrayWrite_C", NULL)); 627 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAPlaceArray_C", NULL)); 628 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAResetArray_C", NULL)); 629 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAReplaceArray_C", NULL)); 630 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDASetPreallocation_C", NULL)); 631 #endif 632 #if defined(PETSC_HAVE_HIP) 633 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidense_C", NULL)); 634 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijhipsparse_C", NULL)); 635 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensehip_C", NULL)); 636 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidensehip_mpidense_C", NULL)); 637 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidensehip_C", NULL)); 638 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidensehip_C", NULL)); 639 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensehip_mpiaij_C", NULL)); 640 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensehip_mpiaijhipsparse_C", NULL)); 641 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArray_C", NULL)); 642 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArrayRead_C", NULL)); 643 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArrayWrite_C", NULL)); 644 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArray_C", NULL)); 645 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArrayRead_C", NULL)); 646 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArrayWrite_C", NULL)); 647 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPPlaceArray_C", NULL)); 648 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPResetArray_C", NULL)); 649 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPReplaceArray_C", NULL)); 650 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPSetPreallocation_C", NULL)); 651 #endif 652 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumn_C", NULL)); 653 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumn_C", NULL)); 654 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", NULL)); 655 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", NULL)); 656 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", NULL)); 657 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", NULL)); 658 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", NULL)); 659 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", NULL)); 660 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetSubMatrix_C", NULL)); 661 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreSubMatrix_C", NULL)); 662 663 PetscCall(PetscObjectCompose((PetscObject)mat, "DiagonalBlock", NULL)); 664 PetscFunctionReturn(PETSC_SUCCESS); 665 } 666 667 PETSC_INTERN PetscErrorCode MatView_SeqDense(Mat, PetscViewer); 668 669 #include <petscdraw.h> 670 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat, PetscViewer viewer) 671 { 672 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 673 PetscMPIInt rank; 674 PetscViewerType vtype; 675 PetscBool iascii, isdraw; 676 PetscViewer sviewer; 677 PetscViewerFormat format; 678 679 PetscFunctionBegin; 680 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 681 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 682 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 683 if (iascii) { 684 PetscCall(PetscViewerGetType(viewer, &vtype)); 685 PetscCall(PetscViewerGetFormat(viewer, &format)); 686 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 687 MatInfo info; 688 PetscCall(MatGetInfo(mat, MAT_LOCAL, &info)); 689 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 690 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %" PetscInt_FMT " \n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, 691 (PetscInt)info.memory)); 692 PetscCall(PetscViewerFlush(viewer)); 693 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 694 if (mdn->Mvctx) PetscCall(PetscSFView(mdn->Mvctx, viewer)); 695 PetscFunctionReturn(PETSC_SUCCESS); 696 } else if (format == PETSC_VIEWER_ASCII_INFO) { 697 PetscFunctionReturn(PETSC_SUCCESS); 698 } 699 } else if (isdraw) { 700 PetscDraw draw; 701 PetscBool isnull; 702 703 PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw)); 704 PetscCall(PetscDrawIsNull(draw, &isnull)); 705 if (isnull) PetscFunctionReturn(PETSC_SUCCESS); 706 } 707 708 { 709 /* assemble the entire matrix onto first processor. */ 710 Mat A; 711 PetscInt M = mat->rmap->N, N = mat->cmap->N, m, row, i, nz; 712 PetscInt *cols; 713 PetscScalar *vals; 714 715 PetscCall(MatCreate(PetscObjectComm((PetscObject)mat), &A)); 716 if (rank == 0) { 717 PetscCall(MatSetSizes(A, M, N, M, N)); 718 } else { 719 PetscCall(MatSetSizes(A, 0, 0, M, N)); 720 } 721 /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */ 722 PetscCall(MatSetType(A, MATMPIDENSE)); 723 PetscCall(MatMPIDenseSetPreallocation(A, NULL)); 724 725 /* Copy the matrix ... This isn't the most efficient means, 726 but it's quick for now */ 727 A->insertmode = INSERT_VALUES; 728 729 row = mat->rmap->rstart; 730 m = mdn->A->rmap->n; 731 for (i = 0; i < m; i++) { 732 PetscCall(MatGetRow_MPIDense(mat, row, &nz, &cols, &vals)); 733 PetscCall(MatSetValues_MPIDense(A, 1, &row, nz, cols, vals, INSERT_VALUES)); 734 PetscCall(MatRestoreRow_MPIDense(mat, row, &nz, &cols, &vals)); 735 row++; 736 } 737 738 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 739 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 740 PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 741 if (rank == 0) { 742 PetscCall(PetscObjectSetName((PetscObject)((Mat_MPIDense *)(A->data))->A, ((PetscObject)mat)->name)); 743 PetscCall(MatView_SeqDense(((Mat_MPIDense *)(A->data))->A, sviewer)); 744 } 745 PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 746 PetscCall(MatDestroy(&A)); 747 } 748 PetscFunctionReturn(PETSC_SUCCESS); 749 } 750 751 static PetscErrorCode MatView_MPIDense(Mat mat, PetscViewer viewer) 752 { 753 PetscBool iascii, isbinary, isdraw, issocket; 754 755 PetscFunctionBegin; 756 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 757 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 758 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket)); 759 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 760 761 if (iascii || issocket || isdraw) { 762 PetscCall(MatView_MPIDense_ASCIIorDraworSocket(mat, viewer)); 763 } else if (isbinary) PetscCall(MatView_Dense_Binary(mat, viewer)); 764 PetscFunctionReturn(PETSC_SUCCESS); 765 } 766 767 static PetscErrorCode MatGetInfo_MPIDense(Mat A, MatInfoType flag, MatInfo *info) 768 { 769 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 770 Mat mdn = mat->A; 771 PetscLogDouble isend[5], irecv[5]; 772 773 PetscFunctionBegin; 774 info->block_size = 1.0; 775 776 PetscCall(MatGetInfo(mdn, MAT_LOCAL, info)); 777 778 isend[0] = info->nz_used; 779 isend[1] = info->nz_allocated; 780 isend[2] = info->nz_unneeded; 781 isend[3] = info->memory; 782 isend[4] = info->mallocs; 783 if (flag == MAT_LOCAL) { 784 info->nz_used = isend[0]; 785 info->nz_allocated = isend[1]; 786 info->nz_unneeded = isend[2]; 787 info->memory = isend[3]; 788 info->mallocs = isend[4]; 789 } else if (flag == MAT_GLOBAL_MAX) { 790 PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)A))); 791 792 info->nz_used = irecv[0]; 793 info->nz_allocated = irecv[1]; 794 info->nz_unneeded = irecv[2]; 795 info->memory = irecv[3]; 796 info->mallocs = irecv[4]; 797 } else if (flag == MAT_GLOBAL_SUM) { 798 PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)A))); 799 800 info->nz_used = irecv[0]; 801 info->nz_allocated = irecv[1]; 802 info->nz_unneeded = irecv[2]; 803 info->memory = irecv[3]; 804 info->mallocs = irecv[4]; 805 } 806 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 807 info->fill_ratio_needed = 0; 808 info->factor_mallocs = 0; 809 PetscFunctionReturn(PETSC_SUCCESS); 810 } 811 812 static PetscErrorCode MatSetOption_MPIDense(Mat A, MatOption op, PetscBool flg) 813 { 814 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 815 816 PetscFunctionBegin; 817 switch (op) { 818 case MAT_NEW_NONZERO_LOCATIONS: 819 case MAT_NEW_NONZERO_LOCATION_ERR: 820 case MAT_NEW_NONZERO_ALLOCATION_ERR: 821 MatCheckPreallocated(A, 1); 822 PetscCall(MatSetOption(a->A, op, flg)); 823 break; 824 case MAT_ROW_ORIENTED: 825 MatCheckPreallocated(A, 1); 826 a->roworiented = flg; 827 PetscCall(MatSetOption(a->A, op, flg)); 828 break; 829 case MAT_FORCE_DIAGONAL_ENTRIES: 830 case MAT_KEEP_NONZERO_PATTERN: 831 case MAT_USE_HASH_TABLE: 832 case MAT_SORTED_FULL: 833 PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op])); 834 break; 835 case MAT_IGNORE_OFF_PROC_ENTRIES: 836 a->donotstash = flg; 837 break; 838 case MAT_SYMMETRIC: 839 case MAT_STRUCTURALLY_SYMMETRIC: 840 case MAT_HERMITIAN: 841 case MAT_SYMMETRY_ETERNAL: 842 case MAT_STRUCTURAL_SYMMETRY_ETERNAL: 843 case MAT_SPD: 844 case MAT_IGNORE_LOWER_TRIANGULAR: 845 case MAT_IGNORE_ZERO_ENTRIES: 846 case MAT_SPD_ETERNAL: 847 /* if the diagonal matrix is square it inherits some of the properties above */ 848 PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op])); 849 break; 850 default: 851 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %s", MatOptions[op]); 852 } 853 PetscFunctionReturn(PETSC_SUCCESS); 854 } 855 856 static PetscErrorCode MatDiagonalScale_MPIDense(Mat A, Vec ll, Vec rr) 857 { 858 Mat_MPIDense *mdn = (Mat_MPIDense *)A->data; 859 const PetscScalar *l; 860 PetscScalar x, *v, *vv, *r; 861 PetscInt i, j, s2a, s3a, s2, s3, m = mdn->A->rmap->n, n = mdn->A->cmap->n, lda; 862 863 PetscFunctionBegin; 864 PetscCall(MatDenseGetArray(mdn->A, &vv)); 865 PetscCall(MatDenseGetLDA(mdn->A, &lda)); 866 PetscCall(MatGetLocalSize(A, &s2, &s3)); 867 if (ll) { 868 PetscCall(VecGetLocalSize(ll, &s2a)); 869 PetscCheck(s2a == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Left scaling vector non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT, s2a, s2); 870 PetscCall(VecGetArrayRead(ll, &l)); 871 for (i = 0; i < m; i++) { 872 x = l[i]; 873 v = vv + i; 874 for (j = 0; j < n; j++) { 875 (*v) *= x; 876 v += lda; 877 } 878 } 879 PetscCall(VecRestoreArrayRead(ll, &l)); 880 PetscCall(PetscLogFlops(1.0 * n * m)); 881 } 882 if (rr) { 883 const PetscScalar *ar; 884 885 PetscCall(VecGetLocalSize(rr, &s3a)); 886 PetscCheck(s3a == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Right scaling vec non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT ".", s3a, s3); 887 PetscCall(VecGetArrayRead(rr, &ar)); 888 if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A)); 889 PetscCall(VecGetArray(mdn->lvec, &r)); 890 PetscCall(PetscSFBcastBegin(mdn->Mvctx, MPIU_SCALAR, ar, r, MPI_REPLACE)); 891 PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ar, r, MPI_REPLACE)); 892 PetscCall(VecRestoreArrayRead(rr, &ar)); 893 for (i = 0; i < n; i++) { 894 x = r[i]; 895 v = vv + i * lda; 896 for (j = 0; j < m; j++) (*v++) *= x; 897 } 898 PetscCall(VecRestoreArray(mdn->lvec, &r)); 899 PetscCall(PetscLogFlops(1.0 * n * m)); 900 } 901 PetscCall(MatDenseRestoreArray(mdn->A, &vv)); 902 PetscFunctionReturn(PETSC_SUCCESS); 903 } 904 905 static PetscErrorCode MatNorm_MPIDense(Mat A, NormType type, PetscReal *nrm) 906 { 907 Mat_MPIDense *mdn = (Mat_MPIDense *)A->data; 908 PetscInt i, j; 909 PetscMPIInt size; 910 PetscReal sum = 0.0; 911 const PetscScalar *av, *v; 912 913 PetscFunctionBegin; 914 PetscCall(MatDenseGetArrayRead(mdn->A, &av)); 915 v = av; 916 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 917 if (size == 1) { 918 PetscCall(MatNorm(mdn->A, type, nrm)); 919 } else { 920 if (type == NORM_FROBENIUS) { 921 for (i = 0; i < mdn->A->cmap->n * mdn->A->rmap->n; i++) { 922 sum += PetscRealPart(PetscConj(*v) * (*v)); 923 v++; 924 } 925 PetscCall(MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A))); 926 *nrm = PetscSqrtReal(*nrm); 927 PetscCall(PetscLogFlops(2.0 * mdn->A->cmap->n * mdn->A->rmap->n)); 928 } else if (type == NORM_1) { 929 PetscReal *tmp, *tmp2; 930 PetscCall(PetscCalloc2(A->cmap->N, &tmp, A->cmap->N, &tmp2)); 931 *nrm = 0.0; 932 v = av; 933 for (j = 0; j < mdn->A->cmap->n; j++) { 934 for (i = 0; i < mdn->A->rmap->n; i++) { 935 tmp[j] += PetscAbsScalar(*v); 936 v++; 937 } 938 } 939 PetscCall(MPIU_Allreduce(tmp, tmp2, A->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A))); 940 for (j = 0; j < A->cmap->N; j++) { 941 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 942 } 943 PetscCall(PetscFree2(tmp, tmp2)); 944 PetscCall(PetscLogFlops(A->cmap->n * A->rmap->n)); 945 } else if (type == NORM_INFINITY) { /* max row norm */ 946 PetscReal ntemp; 947 PetscCall(MatNorm(mdn->A, type, &ntemp)); 948 PetscCall(MPIU_Allreduce(&ntemp, nrm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A))); 949 } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "No support for two norm"); 950 } 951 PetscCall(MatDenseRestoreArrayRead(mdn->A, &av)); 952 PetscFunctionReturn(PETSC_SUCCESS); 953 } 954 955 static PetscErrorCode MatTranspose_MPIDense(Mat A, MatReuse reuse, Mat *matout) 956 { 957 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 958 Mat B; 959 PetscInt M = A->rmap->N, N = A->cmap->N, m, n, *rwork, rstart = A->rmap->rstart; 960 PetscInt j, i, lda; 961 PetscScalar *v; 962 963 PetscFunctionBegin; 964 if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout)); 965 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) { 966 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 967 PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M)); 968 PetscCall(MatSetType(B, ((PetscObject)A)->type_name)); 969 PetscCall(MatMPIDenseSetPreallocation(B, NULL)); 970 } else B = *matout; 971 972 m = a->A->rmap->n; 973 n = a->A->cmap->n; 974 PetscCall(MatDenseGetArrayRead(a->A, (const PetscScalar **)&v)); 975 PetscCall(MatDenseGetLDA(a->A, &lda)); 976 PetscCall(PetscMalloc1(m, &rwork)); 977 for (i = 0; i < m; i++) rwork[i] = rstart + i; 978 for (j = 0; j < n; j++) { 979 PetscCall(MatSetValues(B, 1, &j, m, rwork, v, INSERT_VALUES)); 980 v += lda; 981 } 982 PetscCall(MatDenseRestoreArrayRead(a->A, (const PetscScalar **)&v)); 983 PetscCall(PetscFree(rwork)); 984 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 985 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 986 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 987 *matout = B; 988 } else { 989 PetscCall(MatHeaderMerge(A, &B)); 990 } 991 PetscFunctionReturn(PETSC_SUCCESS); 992 } 993 994 static PetscErrorCode MatDuplicate_MPIDense(Mat, MatDuplicateOption, Mat *); 995 PETSC_INTERN PetscErrorCode MatScale_MPIDense(Mat, PetscScalar); 996 997 static PetscErrorCode MatSetUp_MPIDense(Mat A) 998 { 999 PetscFunctionBegin; 1000 PetscCall(PetscLayoutSetUp(A->rmap)); 1001 PetscCall(PetscLayoutSetUp(A->cmap)); 1002 if (!A->preallocated) PetscCall(MatMPIDenseSetPreallocation(A, NULL)); 1003 PetscFunctionReturn(PETSC_SUCCESS); 1004 } 1005 1006 static PetscErrorCode MatAXPY_MPIDense(Mat Y, PetscScalar alpha, Mat X, MatStructure str) 1007 { 1008 Mat_MPIDense *A = (Mat_MPIDense *)Y->data, *B = (Mat_MPIDense *)X->data; 1009 1010 PetscFunctionBegin; 1011 PetscCall(MatAXPY(A->A, alpha, B->A, str)); 1012 PetscFunctionReturn(PETSC_SUCCESS); 1013 } 1014 1015 static PetscErrorCode MatConjugate_MPIDense(Mat mat) 1016 { 1017 Mat_MPIDense *a = (Mat_MPIDense *)mat->data; 1018 1019 PetscFunctionBegin; 1020 PetscCall(MatConjugate(a->A)); 1021 PetscFunctionReturn(PETSC_SUCCESS); 1022 } 1023 1024 static PetscErrorCode MatRealPart_MPIDense(Mat A) 1025 { 1026 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1027 1028 PetscFunctionBegin; 1029 PetscCall(MatRealPart(a->A)); 1030 PetscFunctionReturn(PETSC_SUCCESS); 1031 } 1032 1033 static PetscErrorCode MatImaginaryPart_MPIDense(Mat A) 1034 { 1035 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1036 1037 PetscFunctionBegin; 1038 PetscCall(MatImaginaryPart(a->A)); 1039 PetscFunctionReturn(PETSC_SUCCESS); 1040 } 1041 1042 static PetscErrorCode MatGetColumnVector_MPIDense(Mat A, Vec v, PetscInt col) 1043 { 1044 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1045 1046 PetscFunctionBegin; 1047 PetscCheck(a->A, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Missing local matrix"); 1048 PetscCheck(a->A->ops->getcolumnvector, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Missing get column operation"); 1049 PetscCall((*a->A->ops->getcolumnvector)(a->A, v, col)); 1050 PetscFunctionReturn(PETSC_SUCCESS); 1051 } 1052 1053 PETSC_INTERN PetscErrorCode MatGetColumnReductions_SeqDense(Mat, PetscInt, PetscReal *); 1054 1055 static PetscErrorCode MatGetColumnReductions_MPIDense(Mat A, PetscInt type, PetscReal *reductions) 1056 { 1057 PetscInt i, m, n; 1058 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1059 PetscReal *work; 1060 1061 PetscFunctionBegin; 1062 PetscCall(MatGetSize(A, &m, &n)); 1063 PetscCall(PetscMalloc1(n, &work)); 1064 if (type == REDUCTION_MEAN_REALPART) { 1065 PetscCall(MatGetColumnReductions_SeqDense(a->A, (PetscInt)REDUCTION_SUM_REALPART, work)); 1066 } else if (type == REDUCTION_MEAN_IMAGINARYPART) { 1067 PetscCall(MatGetColumnReductions_SeqDense(a->A, (PetscInt)REDUCTION_SUM_IMAGINARYPART, work)); 1068 } else { 1069 PetscCall(MatGetColumnReductions_SeqDense(a->A, type, work)); 1070 } 1071 if (type == NORM_2) { 1072 for (i = 0; i < n; i++) work[i] *= work[i]; 1073 } 1074 if (type == NORM_INFINITY) { 1075 PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_MAX, A->hdr.comm)); 1076 } else { 1077 PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_SUM, A->hdr.comm)); 1078 } 1079 PetscCall(PetscFree(work)); 1080 if (type == NORM_2) { 1081 for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]); 1082 } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 1083 for (i = 0; i < n; i++) reductions[i] /= m; 1084 } 1085 PetscFunctionReturn(PETSC_SUCCESS); 1086 } 1087 1088 static PetscErrorCode MatSetRandom_MPIDense(Mat x, PetscRandom rctx) 1089 { 1090 Mat_MPIDense *d = (Mat_MPIDense *)x->data; 1091 1092 PetscFunctionBegin; 1093 PetscCall(MatSetRandom(d->A, rctx)); 1094 #if defined(PETSC_HAVE_DEVICE) 1095 x->offloadmask = d->A->offloadmask; 1096 #endif 1097 PetscFunctionReturn(PETSC_SUCCESS); 1098 } 1099 1100 static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A, PetscBool *missing, PetscInt *d) 1101 { 1102 PetscFunctionBegin; 1103 *missing = PETSC_FALSE; 1104 PetscFunctionReturn(PETSC_SUCCESS); 1105 } 1106 1107 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat, Mat, PetscReal, Mat); 1108 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat, Mat, Mat); 1109 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat, Mat, PetscReal, Mat); 1110 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat, Mat, Mat); 1111 static PetscErrorCode MatEqual_MPIDense(Mat, Mat, PetscBool *); 1112 static PetscErrorCode MatLoad_MPIDense(Mat, PetscViewer); 1113 1114 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense, 1115 MatGetRow_MPIDense, 1116 MatRestoreRow_MPIDense, 1117 MatMult_MPIDense, 1118 /* 4*/ MatMultAdd_MPIDense, 1119 MatMultTranspose_MPIDense, 1120 MatMultTransposeAdd_MPIDense, 1121 NULL, 1122 NULL, 1123 NULL, 1124 /* 10*/ NULL, 1125 NULL, 1126 NULL, 1127 NULL, 1128 MatTranspose_MPIDense, 1129 /* 15*/ MatGetInfo_MPIDense, 1130 MatEqual_MPIDense, 1131 MatGetDiagonal_MPIDense, 1132 MatDiagonalScale_MPIDense, 1133 MatNorm_MPIDense, 1134 /* 20*/ MatAssemblyBegin_MPIDense, 1135 MatAssemblyEnd_MPIDense, 1136 MatSetOption_MPIDense, 1137 MatZeroEntries_MPIDense, 1138 /* 24*/ MatZeroRows_MPIDense, 1139 NULL, 1140 NULL, 1141 NULL, 1142 NULL, 1143 /* 29*/ MatSetUp_MPIDense, 1144 NULL, 1145 NULL, 1146 MatGetDiagonalBlock_MPIDense, 1147 NULL, 1148 /* 34*/ MatDuplicate_MPIDense, 1149 NULL, 1150 NULL, 1151 NULL, 1152 NULL, 1153 /* 39*/ MatAXPY_MPIDense, 1154 MatCreateSubMatrices_MPIDense, 1155 NULL, 1156 MatGetValues_MPIDense, 1157 MatCopy_MPIDense, 1158 /* 44*/ NULL, 1159 MatScale_MPIDense, 1160 MatShift_MPIDense, 1161 NULL, 1162 NULL, 1163 /* 49*/ MatSetRandom_MPIDense, 1164 NULL, 1165 NULL, 1166 NULL, 1167 NULL, 1168 /* 54*/ NULL, 1169 NULL, 1170 NULL, 1171 NULL, 1172 NULL, 1173 /* 59*/ MatCreateSubMatrix_MPIDense, 1174 MatDestroy_MPIDense, 1175 MatView_MPIDense, 1176 NULL, 1177 NULL, 1178 /* 64*/ NULL, 1179 NULL, 1180 NULL, 1181 NULL, 1182 NULL, 1183 /* 69*/ NULL, 1184 NULL, 1185 NULL, 1186 NULL, 1187 NULL, 1188 /* 74*/ NULL, 1189 NULL, 1190 NULL, 1191 NULL, 1192 NULL, 1193 /* 79*/ NULL, 1194 NULL, 1195 NULL, 1196 NULL, 1197 /* 83*/ MatLoad_MPIDense, 1198 NULL, 1199 NULL, 1200 NULL, 1201 NULL, 1202 NULL, 1203 /* 89*/ NULL, 1204 NULL, 1205 NULL, 1206 NULL, 1207 NULL, 1208 /* 94*/ NULL, 1209 NULL, 1210 MatMatTransposeMultSymbolic_MPIDense_MPIDense, 1211 MatMatTransposeMultNumeric_MPIDense_MPIDense, 1212 NULL, 1213 /* 99*/ MatProductSetFromOptions_MPIDense, 1214 NULL, 1215 NULL, 1216 MatConjugate_MPIDense, 1217 NULL, 1218 /*104*/ NULL, 1219 MatRealPart_MPIDense, 1220 MatImaginaryPart_MPIDense, 1221 NULL, 1222 NULL, 1223 /*109*/ NULL, 1224 NULL, 1225 NULL, 1226 MatGetColumnVector_MPIDense, 1227 MatMissingDiagonal_MPIDense, 1228 /*114*/ NULL, 1229 NULL, 1230 NULL, 1231 NULL, 1232 NULL, 1233 /*119*/ NULL, 1234 NULL, 1235 NULL, 1236 NULL, 1237 NULL, 1238 /*124*/ NULL, 1239 MatGetColumnReductions_MPIDense, 1240 NULL, 1241 NULL, 1242 NULL, 1243 /*129*/ NULL, 1244 NULL, 1245 MatTransposeMatMultSymbolic_MPIDense_MPIDense, 1246 MatTransposeMatMultNumeric_MPIDense_MPIDense, 1247 NULL, 1248 /*134*/ NULL, 1249 NULL, 1250 NULL, 1251 NULL, 1252 NULL, 1253 /*139*/ NULL, 1254 NULL, 1255 NULL, 1256 NULL, 1257 NULL, 1258 MatCreateMPIMatConcatenateSeqMat_MPIDense, 1259 /*145*/ NULL, 1260 NULL, 1261 NULL, 1262 NULL, 1263 NULL, 1264 /*150*/ NULL, 1265 NULL}; 1266 1267 static PetscErrorCode MatMPIDenseSetPreallocation_MPIDense(Mat mat, PetscScalar *data) 1268 { 1269 Mat_MPIDense *a = (Mat_MPIDense *)mat->data; 1270 MatType mtype = MATSEQDENSE; 1271 1272 PetscFunctionBegin; 1273 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1274 PetscCall(PetscLayoutSetUp(mat->rmap)); 1275 PetscCall(PetscLayoutSetUp(mat->cmap)); 1276 if (!a->A) { 1277 PetscCall(MatCreate(PETSC_COMM_SELF, &a->A)); 1278 PetscCall(MatSetSizes(a->A, mat->rmap->n, mat->cmap->N, mat->rmap->n, mat->cmap->N)); 1279 } 1280 #if defined(PETSC_HAVE_CUDA) 1281 PetscBool iscuda; 1282 PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPIDENSECUDA, &iscuda)); 1283 if (iscuda) mtype = MATSEQDENSECUDA; 1284 #endif 1285 #if defined(PETSC_HAVE_HIP) 1286 PetscBool iship; 1287 PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPIDENSEHIP, &iship)); 1288 if (iship) mtype = MATSEQDENSEHIP; 1289 #endif 1290 PetscCall(MatSetType(a->A, mtype)); 1291 PetscCall(MatSeqDenseSetPreallocation(a->A, data)); 1292 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 1293 mat->offloadmask = a->A->offloadmask; 1294 #endif 1295 mat->preallocated = PETSC_TRUE; 1296 mat->assembled = PETSC_TRUE; 1297 PetscFunctionReturn(PETSC_SUCCESS); 1298 } 1299 1300 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIDense(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 1301 { 1302 Mat B, C; 1303 1304 PetscFunctionBegin; 1305 PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &C)); 1306 PetscCall(MatConvert_SeqAIJ_SeqDense(C, MATSEQDENSE, MAT_INITIAL_MATRIX, &B)); 1307 PetscCall(MatDestroy(&C)); 1308 if (reuse == MAT_REUSE_MATRIX) { 1309 C = *newmat; 1310 } else C = NULL; 1311 PetscCall(MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A), B, A->cmap->n, !C ? MAT_INITIAL_MATRIX : MAT_REUSE_MATRIX, &C)); 1312 PetscCall(MatDestroy(&B)); 1313 if (reuse == MAT_INPLACE_MATRIX) { 1314 PetscCall(MatHeaderReplace(A, &C)); 1315 } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C; 1316 PetscFunctionReturn(PETSC_SUCCESS); 1317 } 1318 1319 static PetscErrorCode MatConvert_MPIDense_MPIAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 1320 { 1321 Mat B, C; 1322 1323 PetscFunctionBegin; 1324 PetscCall(MatDenseGetLocalMatrix(A, &C)); 1325 PetscCall(MatConvert_SeqDense_SeqAIJ(C, MATSEQAIJ, MAT_INITIAL_MATRIX, &B)); 1326 if (reuse == MAT_REUSE_MATRIX) { 1327 C = *newmat; 1328 } else C = NULL; 1329 PetscCall(MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A), B, A->cmap->n, !C ? MAT_INITIAL_MATRIX : MAT_REUSE_MATRIX, &C)); 1330 PetscCall(MatDestroy(&B)); 1331 if (reuse == MAT_INPLACE_MATRIX) { 1332 PetscCall(MatHeaderReplace(A, &C)); 1333 } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C; 1334 PetscFunctionReturn(PETSC_SUCCESS); 1335 } 1336 1337 #if defined(PETSC_HAVE_ELEMENTAL) 1338 PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 1339 { 1340 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1341 Mat mat_elemental; 1342 PetscScalar *v; 1343 PetscInt m = A->rmap->n, N = A->cmap->N, rstart = A->rmap->rstart, i, *rows, *cols, lda; 1344 1345 PetscFunctionBegin; 1346 if (reuse == MAT_REUSE_MATRIX) { 1347 mat_elemental = *newmat; 1348 PetscCall(MatZeroEntries(*newmat)); 1349 } else { 1350 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental)); 1351 PetscCall(MatSetSizes(mat_elemental, PETSC_DECIDE, PETSC_DECIDE, A->rmap->N, A->cmap->N)); 1352 PetscCall(MatSetType(mat_elemental, MATELEMENTAL)); 1353 PetscCall(MatSetUp(mat_elemental)); 1354 PetscCall(MatSetOption(mat_elemental, MAT_ROW_ORIENTED, PETSC_FALSE)); 1355 } 1356 1357 PetscCall(PetscMalloc2(m, &rows, N, &cols)); 1358 for (i = 0; i < N; i++) cols[i] = i; 1359 for (i = 0; i < m; i++) rows[i] = rstart + i; 1360 1361 /* PETSc-Elemental interface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */ 1362 PetscCall(MatDenseGetArray(A, &v)); 1363 PetscCall(MatDenseGetLDA(a->A, &lda)); 1364 if (lda == m) PetscCall(MatSetValues(mat_elemental, m, rows, N, cols, v, ADD_VALUES)); 1365 else { 1366 for (i = 0; i < N; i++) PetscCall(MatSetValues(mat_elemental, m, rows, 1, &i, v + lda * i, ADD_VALUES)); 1367 } 1368 PetscCall(MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY)); 1369 PetscCall(MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY)); 1370 PetscCall(MatDenseRestoreArray(A, &v)); 1371 PetscCall(PetscFree2(rows, cols)); 1372 1373 if (reuse == MAT_INPLACE_MATRIX) { 1374 PetscCall(MatHeaderReplace(A, &mat_elemental)); 1375 } else { 1376 *newmat = mat_elemental; 1377 } 1378 PetscFunctionReturn(PETSC_SUCCESS); 1379 } 1380 #endif 1381 1382 static PetscErrorCode MatDenseGetColumn_MPIDense(Mat A, PetscInt col, PetscScalar **vals) 1383 { 1384 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 1385 1386 PetscFunctionBegin; 1387 PetscCall(MatDenseGetColumn(mat->A, col, vals)); 1388 PetscFunctionReturn(PETSC_SUCCESS); 1389 } 1390 1391 static PetscErrorCode MatDenseRestoreColumn_MPIDense(Mat A, PetscScalar **vals) 1392 { 1393 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 1394 1395 PetscFunctionBegin; 1396 PetscCall(MatDenseRestoreColumn(mat->A, vals)); 1397 PetscFunctionReturn(PETSC_SUCCESS); 1398 } 1399 1400 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIDense(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat) 1401 { 1402 Mat_MPIDense *mat; 1403 PetscInt m, nloc, N; 1404 1405 PetscFunctionBegin; 1406 PetscCall(MatGetSize(inmat, &m, &N)); 1407 PetscCall(MatGetLocalSize(inmat, NULL, &nloc)); 1408 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 1409 PetscInt sum; 1410 1411 if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnership(comm, &n, &N)); 1412 /* Check sum(n) = N */ 1413 PetscCall(MPIU_Allreduce(&n, &sum, 1, MPIU_INT, MPI_SUM, comm)); 1414 PetscCheck(sum == N, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT, sum, N); 1415 1416 PetscCall(MatCreateDense(comm, m, n, PETSC_DETERMINE, N, NULL, outmat)); 1417 PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 1418 } 1419 1420 /* numeric phase */ 1421 mat = (Mat_MPIDense *)(*outmat)->data; 1422 PetscCall(MatCopy(inmat, mat->A, SAME_NONZERO_PATTERN)); 1423 PetscFunctionReturn(PETSC_SUCCESS); 1424 } 1425 1426 PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat A, PetscInt col, Vec *v) 1427 { 1428 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1429 PetscInt lda; 1430 1431 PetscFunctionBegin; 1432 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1433 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1434 if (!a->cvec) PetscCall(MatDenseCreateColumnVec_Private(A, &a->cvec)); 1435 a->vecinuse = col + 1; 1436 PetscCall(MatDenseGetLDA(a->A, &lda)); 1437 PetscCall(MatDenseGetArray(a->A, (PetscScalar **)&a->ptrinuse)); 1438 PetscCall(VecPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda)); 1439 *v = a->cvec; 1440 PetscFunctionReturn(PETSC_SUCCESS); 1441 } 1442 1443 PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat A, PetscInt col, Vec *v) 1444 { 1445 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1446 1447 PetscFunctionBegin; 1448 PetscCheck(a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first"); 1449 PetscCheck(a->cvec, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing internal column vector"); 1450 a->vecinuse = 0; 1451 PetscCall(MatDenseRestoreArray(a->A, (PetscScalar **)&a->ptrinuse)); 1452 PetscCall(VecResetArray(a->cvec)); 1453 if (v) *v = NULL; 1454 PetscFunctionReturn(PETSC_SUCCESS); 1455 } 1456 1457 PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat A, PetscInt col, Vec *v) 1458 { 1459 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1460 PetscInt lda; 1461 1462 PetscFunctionBegin; 1463 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1464 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1465 if (!a->cvec) PetscCall(MatDenseCreateColumnVec_Private(A, &a->cvec)); 1466 a->vecinuse = col + 1; 1467 PetscCall(MatDenseGetLDA(a->A, &lda)); 1468 PetscCall(MatDenseGetArrayRead(a->A, &a->ptrinuse)); 1469 PetscCall(VecPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda)); 1470 PetscCall(VecLockReadPush(a->cvec)); 1471 *v = a->cvec; 1472 PetscFunctionReturn(PETSC_SUCCESS); 1473 } 1474 1475 PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat A, PetscInt col, Vec *v) 1476 { 1477 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1478 1479 PetscFunctionBegin; 1480 PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first"); 1481 PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector"); 1482 a->vecinuse = 0; 1483 PetscCall(MatDenseRestoreArrayRead(a->A, &a->ptrinuse)); 1484 PetscCall(VecLockReadPop(a->cvec)); 1485 PetscCall(VecResetArray(a->cvec)); 1486 if (v) *v = NULL; 1487 PetscFunctionReturn(PETSC_SUCCESS); 1488 } 1489 1490 PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat A, PetscInt col, Vec *v) 1491 { 1492 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1493 PetscInt lda; 1494 1495 PetscFunctionBegin; 1496 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1497 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1498 if (!a->cvec) PetscCall(MatDenseCreateColumnVec_Private(A, &a->cvec)); 1499 a->vecinuse = col + 1; 1500 PetscCall(MatDenseGetLDA(a->A, &lda)); 1501 PetscCall(MatDenseGetArrayWrite(a->A, (PetscScalar **)&a->ptrinuse)); 1502 PetscCall(VecPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda)); 1503 *v = a->cvec; 1504 PetscFunctionReturn(PETSC_SUCCESS); 1505 } 1506 1507 PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat A, PetscInt col, Vec *v) 1508 { 1509 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1510 1511 PetscFunctionBegin; 1512 PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first"); 1513 PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector"); 1514 a->vecinuse = 0; 1515 PetscCall(MatDenseRestoreArrayWrite(a->A, (PetscScalar **)&a->ptrinuse)); 1516 PetscCall(VecResetArray(a->cvec)); 1517 if (v) *v = NULL; 1518 PetscFunctionReturn(PETSC_SUCCESS); 1519 } 1520 1521 static PetscErrorCode MatDenseGetSubMatrix_MPIDense(Mat A, PetscInt rbegin, PetscInt rend, PetscInt cbegin, PetscInt cend, Mat *v) 1522 { 1523 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1524 Mat_MPIDense *c; 1525 MPI_Comm comm; 1526 PetscInt pbegin, pend; 1527 1528 PetscFunctionBegin; 1529 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 1530 PetscCheck(!a->vecinuse, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1531 PetscCheck(!a->matinuse, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1532 pbegin = PetscMax(0, PetscMin(A->rmap->rend, rbegin) - A->rmap->rstart); 1533 pend = PetscMin(A->rmap->n, PetscMax(0, rend - A->rmap->rstart)); 1534 if (!a->cmat) { 1535 PetscCall(MatCreate(comm, &a->cmat)); 1536 PetscCall(MatSetType(a->cmat, ((PetscObject)A)->type_name)); 1537 if (rend - rbegin == A->rmap->N) PetscCall(PetscLayoutReference(A->rmap, &a->cmat->rmap)); 1538 else { 1539 PetscCall(PetscLayoutSetLocalSize(a->cmat->rmap, pend - pbegin)); 1540 PetscCall(PetscLayoutSetSize(a->cmat->rmap, rend - rbegin)); 1541 PetscCall(PetscLayoutSetUp(a->cmat->rmap)); 1542 } 1543 PetscCall(PetscLayoutSetSize(a->cmat->cmap, cend - cbegin)); 1544 PetscCall(PetscLayoutSetUp(a->cmat->cmap)); 1545 } else { 1546 PetscBool same = (PetscBool)(rend - rbegin == a->cmat->rmap->N); 1547 if (same && a->cmat->rmap->N != A->rmap->N) { 1548 same = (PetscBool)(pend - pbegin == a->cmat->rmap->n); 1549 PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &same, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A))); 1550 } 1551 if (!same) { 1552 PetscCall(PetscLayoutDestroy(&a->cmat->rmap)); 1553 PetscCall(PetscLayoutCreate(comm, &a->cmat->rmap)); 1554 PetscCall(PetscLayoutSetLocalSize(a->cmat->rmap, pend - pbegin)); 1555 PetscCall(PetscLayoutSetSize(a->cmat->rmap, rend - rbegin)); 1556 PetscCall(PetscLayoutSetUp(a->cmat->rmap)); 1557 } 1558 if (cend - cbegin != a->cmat->cmap->N) { 1559 PetscCall(PetscLayoutDestroy(&a->cmat->cmap)); 1560 PetscCall(PetscLayoutCreate(comm, &a->cmat->cmap)); 1561 PetscCall(PetscLayoutSetSize(a->cmat->cmap, cend - cbegin)); 1562 PetscCall(PetscLayoutSetUp(a->cmat->cmap)); 1563 } 1564 } 1565 c = (Mat_MPIDense *)a->cmat->data; 1566 PetscCheck(!c->A, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1567 PetscCall(MatDenseGetSubMatrix(a->A, pbegin, pend, cbegin, cend, &c->A)); 1568 1569 a->cmat->preallocated = PETSC_TRUE; 1570 a->cmat->assembled = PETSC_TRUE; 1571 #if defined(PETSC_HAVE_DEVICE) 1572 a->cmat->offloadmask = c->A->offloadmask; 1573 #endif 1574 a->matinuse = cbegin + 1; 1575 *v = a->cmat; 1576 PetscFunctionReturn(PETSC_SUCCESS); 1577 } 1578 1579 static PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat A, Mat *v) 1580 { 1581 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1582 Mat_MPIDense *c; 1583 1584 PetscFunctionBegin; 1585 PetscCheck(a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetSubMatrix() first"); 1586 PetscCheck(a->cmat, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal matrix"); 1587 PetscCheck(*v == a->cmat, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Not the matrix obtained from MatDenseGetSubMatrix()"); 1588 a->matinuse = 0; 1589 c = (Mat_MPIDense *)a->cmat->data; 1590 PetscCall(MatDenseRestoreSubMatrix(a->A, &c->A)); 1591 if (v) *v = NULL; 1592 #if defined(PETSC_HAVE_DEVICE) 1593 A->offloadmask = a->A->offloadmask; 1594 #endif 1595 PetscFunctionReturn(PETSC_SUCCESS); 1596 } 1597 1598 /*MC 1599 MATMPIDENSE - MATMPIDENSE = "mpidense" - A matrix type to be used for distributed dense matrices. 1600 1601 Options Database Key: 1602 . -mat_type mpidense - sets the matrix type to `MATMPIDENSE` during a call to `MatSetFromOptions()` 1603 1604 Level: beginner 1605 1606 .seealso: [](ch_matrices), `Mat`, `MatCreateDense()`, `MATSEQDENSE`, `MATDENSE` 1607 M*/ 1608 PetscErrorCode MatCreate_MPIDense(Mat mat) 1609 { 1610 Mat_MPIDense *a; 1611 1612 PetscFunctionBegin; 1613 PetscCall(PetscNew(&a)); 1614 mat->data = (void *)a; 1615 mat->ops[0] = MatOps_Values; 1616 1617 mat->insertmode = NOT_SET_VALUES; 1618 1619 /* build cache for off array entries formed */ 1620 a->donotstash = PETSC_FALSE; 1621 1622 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)mat), 1, &mat->stash)); 1623 1624 /* stuff used for matrix vector multiply */ 1625 a->lvec = NULL; 1626 a->Mvctx = NULL; 1627 a->roworiented = PETSC_TRUE; 1628 1629 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetLDA_C", MatDenseGetLDA_MPIDense)); 1630 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseSetLDA_C", MatDenseSetLDA_MPIDense)); 1631 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArray_C", MatDenseGetArray_MPIDense)); 1632 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArray_C", MatDenseRestoreArray_MPIDense)); 1633 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayRead_C", MatDenseGetArrayRead_MPIDense)); 1634 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayRead_C", MatDenseRestoreArrayRead_MPIDense)); 1635 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayWrite_C", MatDenseGetArrayWrite_MPIDense)); 1636 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayWrite_C", MatDenseRestoreArrayWrite_MPIDense)); 1637 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDensePlaceArray_C", MatDensePlaceArray_MPIDense)); 1638 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseResetArray_C", MatDenseResetArray_MPIDense)); 1639 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseReplaceArray_C", MatDenseReplaceArray_MPIDense)); 1640 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", MatDenseGetColumnVec_MPIDense)); 1641 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", MatDenseRestoreColumnVec_MPIDense)); 1642 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", MatDenseGetColumnVecRead_MPIDense)); 1643 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", MatDenseRestoreColumnVecRead_MPIDense)); 1644 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", MatDenseGetColumnVecWrite_MPIDense)); 1645 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", MatDenseRestoreColumnVecWrite_MPIDense)); 1646 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetSubMatrix_C", MatDenseGetSubMatrix_MPIDense)); 1647 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreSubMatrix_C", MatDenseRestoreSubMatrix_MPIDense)); 1648 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", MatConvert_MPIAIJ_MPIDense)); 1649 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpiaij_C", MatConvert_MPIDense_MPIAIJ)); 1650 #if defined(PETSC_HAVE_ELEMENTAL) 1651 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_elemental_C", MatConvert_MPIDense_Elemental)); 1652 #endif 1653 #if defined(PETSC_HAVE_SCALAPACK) 1654 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_scalapack_C", MatConvert_Dense_ScaLAPACK)); 1655 #endif 1656 #if defined(PETSC_HAVE_CUDA) 1657 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensecuda_C", MatConvert_MPIDense_MPIDenseCUDA)); 1658 #endif 1659 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIDenseSetPreallocation_C", MatMPIDenseSetPreallocation_MPIDense)); 1660 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense)); 1661 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaij_C", MatProductSetFromOptions_MPIDense_MPIAIJ)); 1662 #if defined(PETSC_HAVE_CUDA) 1663 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense)); 1664 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijcusparse_C", MatProductSetFromOptions_MPIDense_MPIAIJ)); 1665 #endif 1666 #if defined(PETSC_HAVE_HIP) 1667 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensehip_C", MatConvert_MPIDense_MPIDenseHIP)); 1668 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense)); 1669 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijhipsparse_C", MatProductSetFromOptions_MPIDense_MPIAIJ)); 1670 #endif 1671 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumn_C", MatDenseGetColumn_MPIDense)); 1672 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumn_C", MatDenseRestoreColumn_MPIDense)); 1673 PetscCall(PetscObjectChangeTypeName((PetscObject)mat, MATMPIDENSE)); 1674 PetscFunctionReturn(PETSC_SUCCESS); 1675 } 1676 1677 /*MC 1678 MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices. 1679 1680 This matrix type is identical to `MATSEQDENSE` when constructed with a single process communicator, 1681 and `MATMPIDENSE` otherwise. 1682 1683 Options Database Key: 1684 . -mat_type dense - sets the matrix type to `MATDENSE` during a call to `MatSetFromOptions()` 1685 1686 Level: beginner 1687 1688 .seealso: [](ch_matrices), `Mat`, `MATSEQDENSE`, `MATMPIDENSE`, `MATDENSECUDA`, `MATDENSEHIP` 1689 M*/ 1690 1691 /*@C 1692 MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries 1693 1694 Collective 1695 1696 Input Parameters: 1697 + B - the matrix 1698 - data - optional location of matrix data. Set to `NULL` for PETSc 1699 to control all matrix memory allocation. 1700 1701 Level: intermediate 1702 1703 Notes: 1704 The dense format is fully compatible with standard Fortran 1705 storage by columns. 1706 1707 The data input variable is intended primarily for Fortran programmers 1708 who wish to allocate their own matrix memory space. Most users should 1709 set `data` to `NULL`. 1710 1711 .seealso: [](ch_matrices), `Mat`, `MATMPIDENSE`, `MatCreate()`, `MatCreateSeqDense()`, `MatSetValues()` 1712 @*/ 1713 PetscErrorCode MatMPIDenseSetPreallocation(Mat B, PetscScalar *data) 1714 { 1715 PetscFunctionBegin; 1716 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 1717 PetscTryMethod(B, "MatMPIDenseSetPreallocation_C", (Mat, PetscScalar *), (B, data)); 1718 PetscFunctionReturn(PETSC_SUCCESS); 1719 } 1720 1721 /*@ 1722 MatDensePlaceArray - Allows one to replace the array in a `MATDENSE` matrix with an 1723 array provided by the user. This is useful to avoid copying an array 1724 into a matrix 1725 1726 Not Collective 1727 1728 Input Parameters: 1729 + mat - the matrix 1730 - array - the array in column major order 1731 1732 Level: developer 1733 1734 Note: 1735 You can return to the original array with a call to `MatDenseResetArray()`. The user is responsible for freeing this array; it will not be 1736 freed when the matrix is destroyed. 1737 1738 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseGetArray()`, `MatDenseResetArray()`, `VecPlaceArray()`, `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`, `VecResetArray()`, 1739 `MatDenseReplaceArray()` 1740 @*/ 1741 PetscErrorCode MatDensePlaceArray(Mat mat, const PetscScalar *array) 1742 { 1743 PetscFunctionBegin; 1744 PetscValidHeaderSpecific(mat, MAT_CLASSID, 1); 1745 PetscUseMethod(mat, "MatDensePlaceArray_C", (Mat, const PetscScalar *), (mat, array)); 1746 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 1747 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 1748 mat->offloadmask = PETSC_OFFLOAD_CPU; 1749 #endif 1750 PetscFunctionReturn(PETSC_SUCCESS); 1751 } 1752 1753 /*@ 1754 MatDenseResetArray - Resets the matrix array to that it previously had before the call to `MatDensePlaceArray()` 1755 1756 Not Collective 1757 1758 Input Parameter: 1759 . mat - the matrix 1760 1761 Level: developer 1762 1763 Note: 1764 You can only call this after a call to `MatDensePlaceArray()` 1765 1766 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseGetArray()`, `MatDensePlaceArray()`, `VecPlaceArray()`, `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`, `VecResetArray()` 1767 @*/ 1768 PetscErrorCode MatDenseResetArray(Mat mat) 1769 { 1770 PetscFunctionBegin; 1771 PetscValidHeaderSpecific(mat, MAT_CLASSID, 1); 1772 PetscUseMethod(mat, "MatDenseResetArray_C", (Mat), (mat)); 1773 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 1774 PetscFunctionReturn(PETSC_SUCCESS); 1775 } 1776 1777 /*@ 1778 MatDenseReplaceArray - Allows one to replace the array in a dense matrix with an 1779 array provided by the user. This is useful to avoid copying an array 1780 into a matrix 1781 1782 Not Collective 1783 1784 Input Parameters: 1785 + mat - the matrix 1786 - array - the array in column major order 1787 1788 Level: developer 1789 1790 Note: 1791 The memory passed in MUST be obtained with `PetscMalloc()` and CANNOT be 1792 freed by the user. It will be freed when the matrix is destroyed. 1793 1794 .seealso: [](ch_matrices), `Mat`, `MatDensePlaceArray()`, `MatDenseGetArray()`, `VecReplaceArray()` 1795 @*/ 1796 PetscErrorCode MatDenseReplaceArray(Mat mat, const PetscScalar *array) 1797 { 1798 PetscFunctionBegin; 1799 PetscValidHeaderSpecific(mat, MAT_CLASSID, 1); 1800 PetscUseMethod(mat, "MatDenseReplaceArray_C", (Mat, const PetscScalar *), (mat, array)); 1801 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 1802 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 1803 mat->offloadmask = PETSC_OFFLOAD_CPU; 1804 #endif 1805 PetscFunctionReturn(PETSC_SUCCESS); 1806 } 1807 1808 /*@C 1809 MatCreateDense - Creates a matrix in `MATDENSE` format. 1810 1811 Collective 1812 1813 Input Parameters: 1814 + comm - MPI communicator 1815 . m - number of local rows (or `PETSC_DECIDE` to have calculated if `M` is given) 1816 . n - number of local columns (or `PETSC_DECIDE` to have calculated if `N` is given) 1817 . M - number of global rows (or `PETSC_DECIDE` to have calculated if `m` is given) 1818 . N - number of global columns (or `PETSC_DECIDE` to have calculated if `n` is given) 1819 - data - optional location of matrix data. Set data to `NULL` (`PETSC_NULL_SCALAR` for Fortran users) for PETSc 1820 to control all matrix memory allocation. 1821 1822 Output Parameter: 1823 . A - the matrix 1824 1825 Level: intermediate 1826 1827 Notes: 1828 The dense format is fully compatible with standard Fortran 1829 storage by columns. 1830 1831 Although local portions of the matrix are stored in column-major 1832 order, the matrix is partitioned across MPI ranks by row. 1833 1834 The data input variable is intended primarily for Fortran programmers 1835 who wish to allocate their own matrix memory space. Most users should 1836 set `data` to `NULL` (`PETSC_NULL_SCALAR` for Fortran users). 1837 1838 The user MUST specify either the local or global matrix dimensions 1839 (possibly both). 1840 1841 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatCreate()`, `MatCreateSeqDense()`, `MatSetValues()` 1842 @*/ 1843 PetscErrorCode MatCreateDense(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscScalar *data, Mat *A) 1844 { 1845 PetscFunctionBegin; 1846 PetscCall(MatCreate(comm, A)); 1847 PetscCall(MatSetSizes(*A, m, n, M, N)); 1848 PetscCall(MatSetType(*A, MATDENSE)); 1849 PetscCall(MatSeqDenseSetPreallocation(*A, data)); 1850 PetscCall(MatMPIDenseSetPreallocation(*A, data)); 1851 PetscFunctionReturn(PETSC_SUCCESS); 1852 } 1853 1854 static PetscErrorCode MatDuplicate_MPIDense(Mat A, MatDuplicateOption cpvalues, Mat *newmat) 1855 { 1856 Mat mat; 1857 Mat_MPIDense *a, *oldmat = (Mat_MPIDense *)A->data; 1858 1859 PetscFunctionBegin; 1860 *newmat = NULL; 1861 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat)); 1862 PetscCall(MatSetSizes(mat, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 1863 PetscCall(MatSetType(mat, ((PetscObject)A)->type_name)); 1864 a = (Mat_MPIDense *)mat->data; 1865 1866 mat->factortype = A->factortype; 1867 mat->assembled = PETSC_TRUE; 1868 mat->preallocated = PETSC_TRUE; 1869 1870 mat->insertmode = NOT_SET_VALUES; 1871 a->donotstash = oldmat->donotstash; 1872 1873 PetscCall(PetscLayoutReference(A->rmap, &mat->rmap)); 1874 PetscCall(PetscLayoutReference(A->cmap, &mat->cmap)); 1875 1876 PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A)); 1877 1878 *newmat = mat; 1879 PetscFunctionReturn(PETSC_SUCCESS); 1880 } 1881 1882 static PetscErrorCode MatLoad_MPIDense(Mat newMat, PetscViewer viewer) 1883 { 1884 PetscBool isbinary; 1885 #if defined(PETSC_HAVE_HDF5) 1886 PetscBool ishdf5; 1887 #endif 1888 1889 PetscFunctionBegin; 1890 PetscValidHeaderSpecific(newMat, MAT_CLASSID, 1); 1891 PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2); 1892 /* force binary viewer to load .info file if it has not yet done so */ 1893 PetscCall(PetscViewerSetUp(viewer)); 1894 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 1895 #if defined(PETSC_HAVE_HDF5) 1896 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5)); 1897 #endif 1898 if (isbinary) { 1899 PetscCall(MatLoad_Dense_Binary(newMat, viewer)); 1900 #if defined(PETSC_HAVE_HDF5) 1901 } else if (ishdf5) { 1902 PetscCall(MatLoad_Dense_HDF5(newMat, viewer)); 1903 #endif 1904 } else SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name); 1905 PetscFunctionReturn(PETSC_SUCCESS); 1906 } 1907 1908 static PetscErrorCode MatEqual_MPIDense(Mat A, Mat B, PetscBool *flag) 1909 { 1910 Mat_MPIDense *matB = (Mat_MPIDense *)B->data, *matA = (Mat_MPIDense *)A->data; 1911 Mat a, b; 1912 1913 PetscFunctionBegin; 1914 a = matA->A; 1915 b = matB->A; 1916 PetscCall(MatEqual(a, b, flag)); 1917 PetscCall(MPIU_Allreduce(MPI_IN_PLACE, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A))); 1918 PetscFunctionReturn(PETSC_SUCCESS); 1919 } 1920 1921 static PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(void *data) 1922 { 1923 Mat_TransMatMultDense *atb = (Mat_TransMatMultDense *)data; 1924 1925 PetscFunctionBegin; 1926 PetscCall(PetscFree2(atb->sendbuf, atb->recvcounts)); 1927 PetscCall(MatDestroy(&atb->atb)); 1928 PetscCall(PetscFree(atb)); 1929 PetscFunctionReturn(PETSC_SUCCESS); 1930 } 1931 1932 static PetscErrorCode MatDestroy_MatMatTransMult_MPIDense_MPIDense(void *data) 1933 { 1934 Mat_MatTransMultDense *abt = (Mat_MatTransMultDense *)data; 1935 1936 PetscFunctionBegin; 1937 PetscCall(PetscFree2(abt->buf[0], abt->buf[1])); 1938 PetscCall(PetscFree2(abt->recvcounts, abt->recvdispls)); 1939 PetscCall(PetscFree(abt)); 1940 PetscFunctionReturn(PETSC_SUCCESS); 1941 } 1942 1943 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C) 1944 { 1945 Mat_MPIDense *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data; 1946 Mat_TransMatMultDense *atb; 1947 MPI_Comm comm; 1948 PetscMPIInt size, *recvcounts; 1949 PetscScalar *carray, *sendbuf; 1950 const PetscScalar *atbarray; 1951 PetscInt i, cN = C->cmap->N, proc, k, j, lda; 1952 const PetscInt *ranges; 1953 1954 PetscFunctionBegin; 1955 MatCheckProduct(C, 3); 1956 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 1957 atb = (Mat_TransMatMultDense *)C->product->data; 1958 recvcounts = atb->recvcounts; 1959 sendbuf = atb->sendbuf; 1960 1961 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 1962 PetscCallMPI(MPI_Comm_size(comm, &size)); 1963 1964 /* compute atbarray = aseq^T * bseq */ 1965 PetscCall(MatTransposeMatMult(a->A, b->A, atb->atb ? MAT_REUSE_MATRIX : MAT_INITIAL_MATRIX, PETSC_DEFAULT, &atb->atb)); 1966 1967 PetscCall(MatGetOwnershipRanges(C, &ranges)); 1968 1969 /* arrange atbarray into sendbuf */ 1970 PetscCall(MatDenseGetArrayRead(atb->atb, &atbarray)); 1971 PetscCall(MatDenseGetLDA(atb->atb, &lda)); 1972 for (proc = 0, k = 0; proc < size; proc++) { 1973 for (j = 0; j < cN; j++) { 1974 for (i = ranges[proc]; i < ranges[proc + 1]; i++) sendbuf[k++] = atbarray[i + j * lda]; 1975 } 1976 } 1977 PetscCall(MatDenseRestoreArrayRead(atb->atb, &atbarray)); 1978 1979 /* sum all atbarray to local values of C */ 1980 PetscCall(MatDenseGetArrayWrite(c->A, &carray)); 1981 PetscCallMPI(MPI_Reduce_scatter(sendbuf, carray, recvcounts, MPIU_SCALAR, MPIU_SUM, comm)); 1982 PetscCall(MatDenseRestoreArrayWrite(c->A, &carray)); 1983 PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY)); 1984 PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY)); 1985 PetscFunctionReturn(PETSC_SUCCESS); 1986 } 1987 1988 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C) 1989 { 1990 MPI_Comm comm; 1991 PetscMPIInt size; 1992 PetscInt cm = A->cmap->n, cM, cN = B->cmap->N; 1993 Mat_TransMatMultDense *atb; 1994 PetscBool cisdense = PETSC_FALSE; 1995 PetscInt i; 1996 const PetscInt *ranges; 1997 1998 PetscFunctionBegin; 1999 MatCheckProduct(C, 4); 2000 PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty"); 2001 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 2002 if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) { 2003 SETERRQ(comm, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend); 2004 } 2005 2006 /* create matrix product C */ 2007 PetscCall(MatSetSizes(C, cm, B->cmap->n, A->cmap->N, B->cmap->N)); 2008 #if defined(PETSC_HAVE_CUDA) 2009 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSECUDA, "")); 2010 #endif 2011 #if defined(PETSC_HAVE_HIP) 2012 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSEHIP, "")); 2013 #endif 2014 if (!cisdense) PetscCall(MatSetType(C, ((PetscObject)A)->type_name)); 2015 PetscCall(MatSetUp(C)); 2016 2017 /* create data structure for reuse C */ 2018 PetscCallMPI(MPI_Comm_size(comm, &size)); 2019 PetscCall(PetscNew(&atb)); 2020 cM = C->rmap->N; 2021 PetscCall(PetscMalloc2(cM * cN, &atb->sendbuf, size, &atb->recvcounts)); 2022 PetscCall(MatGetOwnershipRanges(C, &ranges)); 2023 for (i = 0; i < size; i++) atb->recvcounts[i] = (ranges[i + 1] - ranges[i]) * cN; 2024 2025 C->product->data = atb; 2026 C->product->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense; 2027 PetscFunctionReturn(PETSC_SUCCESS); 2028 } 2029 2030 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C) 2031 { 2032 MPI_Comm comm; 2033 PetscMPIInt i, size; 2034 PetscInt maxRows, bufsiz; 2035 PetscMPIInt tag; 2036 PetscInt alg; 2037 Mat_MatTransMultDense *abt; 2038 Mat_Product *product = C->product; 2039 PetscBool flg; 2040 2041 PetscFunctionBegin; 2042 MatCheckProduct(C, 4); 2043 PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty"); 2044 /* check local size of A and B */ 2045 PetscCheck(A->cmap->n == B->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local column dimensions are incompatible, A (%" PetscInt_FMT ") != B (%" PetscInt_FMT ")", A->cmap->n, B->cmap->n); 2046 2047 PetscCall(PetscStrcmp(product->alg, "allgatherv", &flg)); 2048 alg = flg ? 0 : 1; 2049 2050 /* setup matrix product C */ 2051 PetscCall(MatSetSizes(C, A->rmap->n, B->rmap->n, A->rmap->N, B->rmap->N)); 2052 PetscCall(MatSetType(C, MATMPIDENSE)); 2053 PetscCall(MatSetUp(C)); 2054 PetscCall(PetscObjectGetNewTag((PetscObject)C, &tag)); 2055 2056 /* create data structure for reuse C */ 2057 PetscCall(PetscObjectGetComm((PetscObject)C, &comm)); 2058 PetscCallMPI(MPI_Comm_size(comm, &size)); 2059 PetscCall(PetscNew(&abt)); 2060 abt->tag = tag; 2061 abt->alg = alg; 2062 switch (alg) { 2063 case 1: /* alg: "cyclic" */ 2064 for (maxRows = 0, i = 0; i < size; i++) maxRows = PetscMax(maxRows, (B->rmap->range[i + 1] - B->rmap->range[i])); 2065 bufsiz = A->cmap->N * maxRows; 2066 PetscCall(PetscMalloc2(bufsiz, &(abt->buf[0]), bufsiz, &(abt->buf[1]))); 2067 break; 2068 default: /* alg: "allgatherv" */ 2069 PetscCall(PetscMalloc2(B->rmap->n * B->cmap->N, &(abt->buf[0]), B->rmap->N * B->cmap->N, &(abt->buf[1]))); 2070 PetscCall(PetscMalloc2(size, &(abt->recvcounts), size + 1, &(abt->recvdispls))); 2071 for (i = 0; i <= size; i++) abt->recvdispls[i] = B->rmap->range[i] * A->cmap->N; 2072 for (i = 0; i < size; i++) abt->recvcounts[i] = abt->recvdispls[i + 1] - abt->recvdispls[i]; 2073 break; 2074 } 2075 2076 C->product->data = abt; 2077 C->product->destroy = MatDestroy_MatMatTransMult_MPIDense_MPIDense; 2078 C->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_MPIDense_MPIDense; 2079 PetscFunctionReturn(PETSC_SUCCESS); 2080 } 2081 2082 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(Mat A, Mat B, Mat C) 2083 { 2084 Mat_MPIDense *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data; 2085 Mat_MatTransMultDense *abt; 2086 MPI_Comm comm; 2087 PetscMPIInt rank, size, sendsiz, recvsiz, sendto, recvfrom, recvisfrom; 2088 PetscScalar *sendbuf, *recvbuf = NULL, *cv; 2089 PetscInt i, cK = A->cmap->N, k, j, bn; 2090 PetscScalar _DOne = 1.0, _DZero = 0.0; 2091 const PetscScalar *av, *bv; 2092 PetscBLASInt cm, cn, ck, alda, blda = 0, clda; 2093 MPI_Request reqs[2]; 2094 const PetscInt *ranges; 2095 2096 PetscFunctionBegin; 2097 MatCheckProduct(C, 3); 2098 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 2099 abt = (Mat_MatTransMultDense *)C->product->data; 2100 PetscCall(PetscObjectGetComm((PetscObject)C, &comm)); 2101 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 2102 PetscCallMPI(MPI_Comm_size(comm, &size)); 2103 PetscCall(MatDenseGetArrayRead(a->A, &av)); 2104 PetscCall(MatDenseGetArrayRead(b->A, &bv)); 2105 PetscCall(MatDenseGetArrayWrite(c->A, &cv)); 2106 PetscCall(MatDenseGetLDA(a->A, &i)); 2107 PetscCall(PetscBLASIntCast(i, &alda)); 2108 PetscCall(MatDenseGetLDA(b->A, &i)); 2109 PetscCall(PetscBLASIntCast(i, &blda)); 2110 PetscCall(MatDenseGetLDA(c->A, &i)); 2111 PetscCall(PetscBLASIntCast(i, &clda)); 2112 PetscCall(MatGetOwnershipRanges(B, &ranges)); 2113 bn = B->rmap->n; 2114 if (blda == bn) { 2115 sendbuf = (PetscScalar *)bv; 2116 } else { 2117 sendbuf = abt->buf[0]; 2118 for (k = 0, i = 0; i < cK; i++) { 2119 for (j = 0; j < bn; j++, k++) sendbuf[k] = bv[i * blda + j]; 2120 } 2121 } 2122 if (size > 1) { 2123 sendto = (rank + size - 1) % size; 2124 recvfrom = (rank + size + 1) % size; 2125 } else { 2126 sendto = recvfrom = 0; 2127 } 2128 PetscCall(PetscBLASIntCast(cK, &ck)); 2129 PetscCall(PetscBLASIntCast(c->A->rmap->n, &cm)); 2130 recvisfrom = rank; 2131 for (i = 0; i < size; i++) { 2132 /* we have finished receiving in sending, bufs can be read/modified */ 2133 PetscInt nextrecvisfrom = (recvisfrom + 1) % size; /* which process the next recvbuf will originate on */ 2134 PetscInt nextbn = ranges[nextrecvisfrom + 1] - ranges[nextrecvisfrom]; 2135 2136 if (nextrecvisfrom != rank) { 2137 /* start the cyclic sends from sendbuf, to recvbuf (which will switch to sendbuf) */ 2138 sendsiz = cK * bn; 2139 recvsiz = cK * nextbn; 2140 recvbuf = (i & 1) ? abt->buf[0] : abt->buf[1]; 2141 PetscCallMPI(MPI_Isend(sendbuf, sendsiz, MPIU_SCALAR, sendto, abt->tag, comm, &reqs[0])); 2142 PetscCallMPI(MPI_Irecv(recvbuf, recvsiz, MPIU_SCALAR, recvfrom, abt->tag, comm, &reqs[1])); 2143 } 2144 2145 /* local aseq * sendbuf^T */ 2146 PetscCall(PetscBLASIntCast(ranges[recvisfrom + 1] - ranges[recvisfrom], &cn)); 2147 if (cm && cn && ck) PetscCallBLAS("BLASgemm", BLASgemm_("N", "T", &cm, &cn, &ck, &_DOne, av, &alda, sendbuf, &cn, &_DZero, cv + clda * ranges[recvisfrom], &clda)); 2148 2149 if (nextrecvisfrom != rank) { 2150 /* wait for the sends and receives to complete, swap sendbuf and recvbuf */ 2151 PetscCallMPI(MPI_Waitall(2, reqs, MPI_STATUSES_IGNORE)); 2152 } 2153 bn = nextbn; 2154 recvisfrom = nextrecvisfrom; 2155 sendbuf = recvbuf; 2156 } 2157 PetscCall(MatDenseRestoreArrayRead(a->A, &av)); 2158 PetscCall(MatDenseRestoreArrayRead(b->A, &bv)); 2159 PetscCall(MatDenseRestoreArrayWrite(c->A, &cv)); 2160 PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY)); 2161 PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY)); 2162 PetscFunctionReturn(PETSC_SUCCESS); 2163 } 2164 2165 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(Mat A, Mat B, Mat C) 2166 { 2167 Mat_MPIDense *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data; 2168 Mat_MatTransMultDense *abt; 2169 MPI_Comm comm; 2170 PetscMPIInt size; 2171 PetscScalar *cv, *sendbuf, *recvbuf; 2172 const PetscScalar *av, *bv; 2173 PetscInt blda, i, cK = A->cmap->N, k, j, bn; 2174 PetscScalar _DOne = 1.0, _DZero = 0.0; 2175 PetscBLASInt cm, cn, ck, alda, clda; 2176 2177 PetscFunctionBegin; 2178 MatCheckProduct(C, 3); 2179 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 2180 abt = (Mat_MatTransMultDense *)C->product->data; 2181 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 2182 PetscCallMPI(MPI_Comm_size(comm, &size)); 2183 PetscCall(MatDenseGetArrayRead(a->A, &av)); 2184 PetscCall(MatDenseGetArrayRead(b->A, &bv)); 2185 PetscCall(MatDenseGetArrayWrite(c->A, &cv)); 2186 PetscCall(MatDenseGetLDA(a->A, &i)); 2187 PetscCall(PetscBLASIntCast(i, &alda)); 2188 PetscCall(MatDenseGetLDA(b->A, &blda)); 2189 PetscCall(MatDenseGetLDA(c->A, &i)); 2190 PetscCall(PetscBLASIntCast(i, &clda)); 2191 /* copy transpose of B into buf[0] */ 2192 bn = B->rmap->n; 2193 sendbuf = abt->buf[0]; 2194 recvbuf = abt->buf[1]; 2195 for (k = 0, j = 0; j < bn; j++) { 2196 for (i = 0; i < cK; i++, k++) sendbuf[k] = bv[i * blda + j]; 2197 } 2198 PetscCall(MatDenseRestoreArrayRead(b->A, &bv)); 2199 PetscCallMPI(MPI_Allgatherv(sendbuf, bn * cK, MPIU_SCALAR, recvbuf, abt->recvcounts, abt->recvdispls, MPIU_SCALAR, comm)); 2200 PetscCall(PetscBLASIntCast(cK, &ck)); 2201 PetscCall(PetscBLASIntCast(c->A->rmap->n, &cm)); 2202 PetscCall(PetscBLASIntCast(c->A->cmap->n, &cn)); 2203 if (cm && cn && ck) PetscCallBLAS("BLASgemm", BLASgemm_("N", "N", &cm, &cn, &ck, &_DOne, av, &alda, recvbuf, &ck, &_DZero, cv, &clda)); 2204 PetscCall(MatDenseRestoreArrayRead(a->A, &av)); 2205 PetscCall(MatDenseRestoreArrayRead(b->A, &bv)); 2206 PetscCall(MatDenseRestoreArrayWrite(c->A, &cv)); 2207 PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY)); 2208 PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY)); 2209 PetscFunctionReturn(PETSC_SUCCESS); 2210 } 2211 2212 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C) 2213 { 2214 Mat_MatTransMultDense *abt; 2215 2216 PetscFunctionBegin; 2217 MatCheckProduct(C, 3); 2218 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 2219 abt = (Mat_MatTransMultDense *)C->product->data; 2220 switch (abt->alg) { 2221 case 1: 2222 PetscCall(MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(A, B, C)); 2223 break; 2224 default: 2225 PetscCall(MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(A, B, C)); 2226 break; 2227 } 2228 PetscFunctionReturn(PETSC_SUCCESS); 2229 } 2230 2231 #if defined(PETSC_HAVE_ELEMENTAL) 2232 static PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(void *data) 2233 { 2234 Mat_MatMultDense *ab = (Mat_MatMultDense *)data; 2235 2236 PetscFunctionBegin; 2237 PetscCall(MatDestroy(&ab->Ce)); 2238 PetscCall(MatDestroy(&ab->Ae)); 2239 PetscCall(MatDestroy(&ab->Be)); 2240 PetscCall(PetscFree(ab)); 2241 PetscFunctionReturn(PETSC_SUCCESS); 2242 } 2243 2244 PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C) 2245 { 2246 Mat_MatMultDense *ab; 2247 2248 PetscFunctionBegin; 2249 MatCheckProduct(C, 3); 2250 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Missing product data"); 2251 ab = (Mat_MatMultDense *)C->product->data; 2252 PetscCall(MatConvert_MPIDense_Elemental(A, MATELEMENTAL, MAT_REUSE_MATRIX, &ab->Ae)); 2253 PetscCall(MatConvert_MPIDense_Elemental(B, MATELEMENTAL, MAT_REUSE_MATRIX, &ab->Be)); 2254 PetscCall(MatMatMultNumeric_Elemental(ab->Ae, ab->Be, ab->Ce)); 2255 PetscCall(MatConvert(ab->Ce, MATMPIDENSE, MAT_REUSE_MATRIX, &C)); 2256 PetscFunctionReturn(PETSC_SUCCESS); 2257 } 2258 2259 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C) 2260 { 2261 Mat Ae, Be, Ce; 2262 Mat_MatMultDense *ab; 2263 2264 PetscFunctionBegin; 2265 MatCheckProduct(C, 4); 2266 PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty"); 2267 /* check local size of A and B */ 2268 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 2269 SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend); 2270 } 2271 2272 /* create elemental matrices Ae and Be */ 2273 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &Ae)); 2274 PetscCall(MatSetSizes(Ae, PETSC_DECIDE, PETSC_DECIDE, A->rmap->N, A->cmap->N)); 2275 PetscCall(MatSetType(Ae, MATELEMENTAL)); 2276 PetscCall(MatSetUp(Ae)); 2277 PetscCall(MatSetOption(Ae, MAT_ROW_ORIENTED, PETSC_FALSE)); 2278 2279 PetscCall(MatCreate(PetscObjectComm((PetscObject)B), &Be)); 2280 PetscCall(MatSetSizes(Be, PETSC_DECIDE, PETSC_DECIDE, B->rmap->N, B->cmap->N)); 2281 PetscCall(MatSetType(Be, MATELEMENTAL)); 2282 PetscCall(MatSetUp(Be)); 2283 PetscCall(MatSetOption(Be, MAT_ROW_ORIENTED, PETSC_FALSE)); 2284 2285 /* compute symbolic Ce = Ae*Be */ 2286 PetscCall(MatCreate(PetscObjectComm((PetscObject)C), &Ce)); 2287 PetscCall(MatMatMultSymbolic_Elemental(Ae, Be, fill, Ce)); 2288 2289 /* setup C */ 2290 PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, PETSC_DECIDE, PETSC_DECIDE)); 2291 PetscCall(MatSetType(C, MATDENSE)); 2292 PetscCall(MatSetUp(C)); 2293 2294 /* create data structure for reuse Cdense */ 2295 PetscCall(PetscNew(&ab)); 2296 ab->Ae = Ae; 2297 ab->Be = Be; 2298 ab->Ce = Ce; 2299 2300 C->product->data = ab; 2301 C->product->destroy = MatDestroy_MatMatMult_MPIDense_MPIDense; 2302 C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense; 2303 PetscFunctionReturn(PETSC_SUCCESS); 2304 } 2305 #endif 2306 2307 #if defined(PETSC_HAVE_ELEMENTAL) 2308 static PetscErrorCode MatProductSetFromOptions_MPIDense_AB(Mat C) 2309 { 2310 PetscFunctionBegin; 2311 C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIDense; 2312 C->ops->productsymbolic = MatProductSymbolic_AB; 2313 PetscFunctionReturn(PETSC_SUCCESS); 2314 } 2315 #endif 2316 2317 static PetscErrorCode MatProductSetFromOptions_MPIDense_AtB(Mat C) 2318 { 2319 Mat_Product *product = C->product; 2320 Mat A = product->A, B = product->B; 2321 2322 PetscFunctionBegin; 2323 if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) 2324 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend); 2325 C->ops->transposematmultsymbolic = MatTransposeMatMultSymbolic_MPIDense_MPIDense; 2326 C->ops->productsymbolic = MatProductSymbolic_AtB; 2327 PetscFunctionReturn(PETSC_SUCCESS); 2328 } 2329 2330 static PetscErrorCode MatProductSetFromOptions_MPIDense_ABt(Mat C) 2331 { 2332 Mat_Product *product = C->product; 2333 const char *algTypes[2] = {"allgatherv", "cyclic"}; 2334 PetscInt alg, nalg = 2; 2335 PetscBool flg = PETSC_FALSE; 2336 2337 PetscFunctionBegin; 2338 /* Set default algorithm */ 2339 alg = 0; /* default is allgatherv */ 2340 PetscCall(PetscStrcmp(product->alg, "default", &flg)); 2341 if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg])); 2342 2343 /* Get runtime option */ 2344 if (product->api_user) { 2345 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatMatTransposeMult", "Mat"); 2346 PetscCall(PetscOptionsEList("-matmattransmult_mpidense_mpidense_via", "Algorithmic approach", "MatMatTransposeMult", algTypes, nalg, algTypes[alg], &alg, &flg)); 2347 PetscOptionsEnd(); 2348 } else { 2349 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_ABt", "Mat"); 2350 PetscCall(PetscOptionsEList("-mat_product_algorithm", "Algorithmic approach", "MatProduct_ABt", algTypes, nalg, algTypes[alg], &alg, &flg)); 2351 PetscOptionsEnd(); 2352 } 2353 if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg])); 2354 2355 C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_MPIDense_MPIDense; 2356 C->ops->productsymbolic = MatProductSymbolic_ABt; 2357 PetscFunctionReturn(PETSC_SUCCESS); 2358 } 2359 2360 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense(Mat C) 2361 { 2362 Mat_Product *product = C->product; 2363 2364 PetscFunctionBegin; 2365 switch (product->type) { 2366 #if defined(PETSC_HAVE_ELEMENTAL) 2367 case MATPRODUCT_AB: 2368 PetscCall(MatProductSetFromOptions_MPIDense_AB(C)); 2369 break; 2370 #endif 2371 case MATPRODUCT_AtB: 2372 PetscCall(MatProductSetFromOptions_MPIDense_AtB(C)); 2373 break; 2374 case MATPRODUCT_ABt: 2375 PetscCall(MatProductSetFromOptions_MPIDense_ABt(C)); 2376 break; 2377 default: 2378 break; 2379 } 2380 PetscFunctionReturn(PETSC_SUCCESS); 2381 } 2382