1 2 /* 3 Basic functions for basic parallel dense matrices. 4 Portions of this code are under: 5 Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. 6 */ 7 8 #include <../src/mat/impls/dense/mpi/mpidense.h> /*I "petscmat.h" I*/ 9 #include <../src/mat/impls/aij/mpi/mpiaij.h> 10 #include <petscblaslapack.h> 11 12 /*@ 13 MatDenseGetLocalMatrix - For a `MATMPIDENSE` or `MATSEQDENSE` matrix returns the sequential 14 matrix that represents the operator. For sequential matrices it returns itself. 15 16 Input Parameter: 17 . A - the sequential or MPI `MATDENSE` matrix 18 19 Output Parameter: 20 . B - the inner matrix 21 22 Level: intermediate 23 24 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MATMPIDENSE`, `MATSEQDENSE` 25 @*/ 26 PetscErrorCode MatDenseGetLocalMatrix(Mat A, Mat *B) 27 { 28 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 29 PetscBool flg; 30 31 PetscFunctionBegin; 32 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 33 PetscAssertPointer(B, 2); 34 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIDENSE, &flg)); 35 if (flg) *B = mat->A; 36 else { 37 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQDENSE, &flg)); 38 PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Not for matrix type %s", ((PetscObject)A)->type_name); 39 *B = A; 40 } 41 PetscFunctionReturn(PETSC_SUCCESS); 42 } 43 44 static PetscErrorCode MatCopy_MPIDense(Mat A, Mat B, MatStructure s) 45 { 46 Mat_MPIDense *Amat = (Mat_MPIDense *)A->data; 47 Mat_MPIDense *Bmat = (Mat_MPIDense *)B->data; 48 49 PetscFunctionBegin; 50 PetscCall(MatCopy(Amat->A, Bmat->A, s)); 51 PetscFunctionReturn(PETSC_SUCCESS); 52 } 53 54 PetscErrorCode MatShift_MPIDense(Mat A, PetscScalar alpha) 55 { 56 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 57 PetscInt j, lda, rstart = A->rmap->rstart, rend = A->rmap->rend, rend2; 58 PetscScalar *v; 59 60 PetscFunctionBegin; 61 PetscCall(MatDenseGetArray(mat->A, &v)); 62 PetscCall(MatDenseGetLDA(mat->A, &lda)); 63 rend2 = PetscMin(rend, A->cmap->N); 64 if (rend2 > rstart) { 65 for (j = rstart; j < rend2; j++) v[j - rstart + j * lda] += alpha; 66 PetscCall(PetscLogFlops(rend2 - rstart)); 67 } 68 PetscCall(MatDenseRestoreArray(mat->A, &v)); 69 PetscFunctionReturn(PETSC_SUCCESS); 70 } 71 72 static PetscErrorCode MatGetRow_MPIDense(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 73 { 74 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 75 PetscInt lrow, rstart = A->rmap->rstart, rend = A->rmap->rend; 76 77 PetscFunctionBegin; 78 PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "only local rows"); 79 lrow = row - rstart; 80 PetscCall(MatGetRow(mat->A, lrow, nz, (const PetscInt **)idx, (const PetscScalar **)v)); 81 PetscFunctionReturn(PETSC_SUCCESS); 82 } 83 84 static PetscErrorCode MatRestoreRow_MPIDense(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 85 { 86 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 87 PetscInt lrow, rstart = A->rmap->rstart, rend = A->rmap->rend; 88 89 PetscFunctionBegin; 90 PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "only local rows"); 91 lrow = row - rstart; 92 PetscCall(MatRestoreRow(mat->A, lrow, nz, (const PetscInt **)idx, (const PetscScalar **)v)); 93 PetscFunctionReturn(PETSC_SUCCESS); 94 } 95 96 static PetscErrorCode MatGetDiagonalBlock_MPIDense(Mat A, Mat *a) 97 { 98 Mat_MPIDense *mdn = (Mat_MPIDense *)A->data; 99 PetscInt m = A->rmap->n, rstart = A->rmap->rstart; 100 PetscScalar *array; 101 MPI_Comm comm; 102 PetscBool flg; 103 Mat B; 104 105 PetscFunctionBegin; 106 PetscCall(MatHasCongruentLayouts(A, &flg)); 107 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only square matrices supported."); 108 PetscCall(PetscObjectQuery((PetscObject)A, "DiagonalBlock", (PetscObject *)&B)); 109 if (!B) { /* This should use MatDenseGetSubMatrix (not create), but we would need a call like MatRestoreDiagonalBlock */ 110 #if PetscDefined(HAVE_CUDA) 111 PetscCall(PetscObjectTypeCompare((PetscObject)mdn->A, MATSEQDENSECUDA, &flg)); 112 PetscCheck(!flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature", MATSEQDENSECUDA); 113 #elif PetscDefined(HAVE_HIP) 114 PetscCall(PetscObjectTypeCompare((PetscObject)mdn->A, MATSEQDENSEHIP, &flg)); 115 PetscCheck(!flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature", MATSEQDENSEHIP); 116 #endif 117 PetscCall(PetscObjectGetComm((PetscObject)(mdn->A), &comm)); 118 PetscCall(MatCreate(comm, &B)); 119 PetscCall(MatSetSizes(B, m, m, m, m)); 120 PetscCall(MatSetType(B, ((PetscObject)mdn->A)->type_name)); 121 PetscCall(MatDenseGetArrayRead(mdn->A, (const PetscScalar **)&array)); 122 PetscCall(MatSeqDenseSetPreallocation(B, array + m * rstart)); 123 PetscCall(MatDenseRestoreArrayRead(mdn->A, (const PetscScalar **)&array)); 124 PetscCall(PetscObjectCompose((PetscObject)A, "DiagonalBlock", (PetscObject)B)); 125 *a = B; 126 PetscCall(MatDestroy(&B)); 127 } else *a = B; 128 PetscFunctionReturn(PETSC_SUCCESS); 129 } 130 131 static PetscErrorCode MatSetValues_MPIDense(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], const PetscScalar v[], InsertMode addv) 132 { 133 Mat_MPIDense *A = (Mat_MPIDense *)mat->data; 134 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend, row; 135 PetscBool roworiented = A->roworiented; 136 137 PetscFunctionBegin; 138 for (i = 0; i < m; i++) { 139 if (idxm[i] < 0) continue; 140 PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large"); 141 if (idxm[i] >= rstart && idxm[i] < rend) { 142 row = idxm[i] - rstart; 143 if (roworiented) { 144 PetscCall(MatSetValues(A->A, 1, &row, n, idxn, v + i * n, addv)); 145 } else { 146 for (j = 0; j < n; j++) { 147 if (idxn[j] < 0) continue; 148 PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large"); 149 PetscCall(MatSetValues(A->A, 1, &row, 1, &idxn[j], v + i + j * m, addv)); 150 } 151 } 152 } else if (!A->donotstash) { 153 mat->assembled = PETSC_FALSE; 154 if (roworiented) { 155 PetscCall(MatStashValuesRow_Private(&mat->stash, idxm[i], n, idxn, v + i * n, PETSC_FALSE)); 156 } else { 157 PetscCall(MatStashValuesCol_Private(&mat->stash, idxm[i], n, idxn, v + i, m, PETSC_FALSE)); 158 } 159 } 160 } 161 PetscFunctionReturn(PETSC_SUCCESS); 162 } 163 164 static PetscErrorCode MatGetValues_MPIDense(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[]) 165 { 166 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 167 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend, row; 168 169 PetscFunctionBegin; 170 for (i = 0; i < m; i++) { 171 if (idxm[i] < 0) continue; /* negative row */ 172 PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large"); 173 if (idxm[i] >= rstart && idxm[i] < rend) { 174 row = idxm[i] - rstart; 175 for (j = 0; j < n; j++) { 176 if (idxn[j] < 0) continue; /* negative column */ 177 PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large"); 178 PetscCall(MatGetValues(mdn->A, 1, &row, 1, &idxn[j], v + i * n + j)); 179 } 180 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported"); 181 } 182 PetscFunctionReturn(PETSC_SUCCESS); 183 } 184 185 static PetscErrorCode MatDenseGetLDA_MPIDense(Mat A, PetscInt *lda) 186 { 187 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 188 189 PetscFunctionBegin; 190 PetscCall(MatDenseGetLDA(a->A, lda)); 191 PetscFunctionReturn(PETSC_SUCCESS); 192 } 193 194 static PetscErrorCode MatDenseSetLDA_MPIDense(Mat A, PetscInt lda) 195 { 196 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 197 MatType mtype = MATSEQDENSE; 198 199 PetscFunctionBegin; 200 if (!a->A) { 201 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 202 PetscCall(PetscLayoutSetUp(A->rmap)); 203 PetscCall(PetscLayoutSetUp(A->cmap)); 204 PetscCall(MatCreate(PETSC_COMM_SELF, &a->A)); 205 PetscCall(MatSetSizes(a->A, A->rmap->n, A->cmap->N, A->rmap->n, A->cmap->N)); 206 #if PetscDefined(HAVE_CUDA) 207 PetscBool iscuda; 208 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIDENSECUDA, &iscuda)); 209 if (iscuda) mtype = MATSEQDENSECUDA; 210 #elif PetscDefined(HAVE_HIP) 211 PetscBool iship; 212 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIDENSEHIP, &iship)); 213 if (iship) mtype = MATSEQDENSEHIP; 214 #endif 215 PetscCall(MatSetType(a->A, mtype)); 216 } 217 PetscCall(MatDenseSetLDA(a->A, lda)); 218 PetscFunctionReturn(PETSC_SUCCESS); 219 } 220 221 static PetscErrorCode MatDenseGetArray_MPIDense(Mat A, PetscScalar **array) 222 { 223 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 224 225 PetscFunctionBegin; 226 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 227 PetscCall(MatDenseGetArray(a->A, array)); 228 PetscFunctionReturn(PETSC_SUCCESS); 229 } 230 231 static PetscErrorCode MatDenseGetArrayRead_MPIDense(Mat A, const PetscScalar **array) 232 { 233 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 234 235 PetscFunctionBegin; 236 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 237 PetscCall(MatDenseGetArrayRead(a->A, array)); 238 PetscFunctionReturn(PETSC_SUCCESS); 239 } 240 241 static PetscErrorCode MatDenseGetArrayWrite_MPIDense(Mat A, PetscScalar **array) 242 { 243 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 244 245 PetscFunctionBegin; 246 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 247 PetscCall(MatDenseGetArrayWrite(a->A, array)); 248 PetscFunctionReturn(PETSC_SUCCESS); 249 } 250 251 static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A, const PetscScalar *array) 252 { 253 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 254 255 PetscFunctionBegin; 256 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 257 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 258 PetscCall(MatDensePlaceArray(a->A, array)); 259 PetscFunctionReturn(PETSC_SUCCESS); 260 } 261 262 static PetscErrorCode MatDenseResetArray_MPIDense(Mat A) 263 { 264 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 265 266 PetscFunctionBegin; 267 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 268 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 269 PetscCall(MatDenseResetArray(a->A)); 270 PetscFunctionReturn(PETSC_SUCCESS); 271 } 272 273 static PetscErrorCode MatDenseReplaceArray_MPIDense(Mat A, const PetscScalar *array) 274 { 275 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 276 277 PetscFunctionBegin; 278 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 279 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 280 PetscCall(MatDenseReplaceArray(a->A, array)); 281 PetscFunctionReturn(PETSC_SUCCESS); 282 } 283 284 static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A, IS isrow, IS iscol, MatReuse scall, Mat *B) 285 { 286 Mat_MPIDense *mat = (Mat_MPIDense *)A->data, *newmatd; 287 PetscInt lda, i, j, rstart, rend, nrows, ncols, Ncols, nlrows, nlcols; 288 const PetscInt *irow, *icol; 289 const PetscScalar *v; 290 PetscScalar *bv; 291 Mat newmat; 292 IS iscol_local; 293 MPI_Comm comm_is, comm_mat; 294 295 PetscFunctionBegin; 296 PetscCall(PetscObjectGetComm((PetscObject)A, &comm_mat)); 297 PetscCall(PetscObjectGetComm((PetscObject)iscol, &comm_is)); 298 PetscCheck(comm_mat == comm_is, PETSC_COMM_SELF, PETSC_ERR_ARG_NOTSAMECOMM, "IS communicator must match matrix communicator"); 299 300 PetscCall(ISAllGather(iscol, &iscol_local)); 301 PetscCall(ISGetIndices(isrow, &irow)); 302 PetscCall(ISGetIndices(iscol_local, &icol)); 303 PetscCall(ISGetLocalSize(isrow, &nrows)); 304 PetscCall(ISGetLocalSize(iscol, &ncols)); 305 PetscCall(ISGetSize(iscol, &Ncols)); /* global number of columns, size of iscol_local */ 306 307 /* No parallel redistribution currently supported! Should really check each index set 308 to confirm that it is OK. ... Currently supports only submatrix same partitioning as 309 original matrix! */ 310 311 PetscCall(MatGetLocalSize(A, &nlrows, &nlcols)); 312 PetscCall(MatGetOwnershipRange(A, &rstart, &rend)); 313 314 /* Check submatrix call */ 315 if (scall == MAT_REUSE_MATRIX) { 316 /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */ 317 /* Really need to test rows and column sizes! */ 318 newmat = *B; 319 } else { 320 /* Create and fill new matrix */ 321 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &newmat)); 322 PetscCall(MatSetSizes(newmat, nrows, ncols, PETSC_DECIDE, Ncols)); 323 PetscCall(MatSetType(newmat, ((PetscObject)A)->type_name)); 324 PetscCall(MatMPIDenseSetPreallocation(newmat, NULL)); 325 } 326 327 /* Now extract the data pointers and do the copy, column at a time */ 328 newmatd = (Mat_MPIDense *)newmat->data; 329 PetscCall(MatDenseGetArray(newmatd->A, &bv)); 330 PetscCall(MatDenseGetArrayRead(mat->A, &v)); 331 PetscCall(MatDenseGetLDA(mat->A, &lda)); 332 for (i = 0; i < Ncols; i++) { 333 const PetscScalar *av = v + lda * icol[i]; 334 for (j = 0; j < nrows; j++) *bv++ = av[irow[j] - rstart]; 335 } 336 PetscCall(MatDenseRestoreArrayRead(mat->A, &v)); 337 PetscCall(MatDenseRestoreArray(newmatd->A, &bv)); 338 339 /* Assemble the matrices so that the correct flags are set */ 340 PetscCall(MatAssemblyBegin(newmat, MAT_FINAL_ASSEMBLY)); 341 PetscCall(MatAssemblyEnd(newmat, MAT_FINAL_ASSEMBLY)); 342 343 /* Free work space */ 344 PetscCall(ISRestoreIndices(isrow, &irow)); 345 PetscCall(ISRestoreIndices(iscol_local, &icol)); 346 PetscCall(ISDestroy(&iscol_local)); 347 *B = newmat; 348 PetscFunctionReturn(PETSC_SUCCESS); 349 } 350 351 static PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A, PetscScalar **array) 352 { 353 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 354 355 PetscFunctionBegin; 356 PetscCall(MatDenseRestoreArray(a->A, array)); 357 PetscFunctionReturn(PETSC_SUCCESS); 358 } 359 360 static PetscErrorCode MatDenseRestoreArrayRead_MPIDense(Mat A, const PetscScalar **array) 361 { 362 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 363 364 PetscFunctionBegin; 365 PetscCall(MatDenseRestoreArrayRead(a->A, array)); 366 PetscFunctionReturn(PETSC_SUCCESS); 367 } 368 369 static PetscErrorCode MatDenseRestoreArrayWrite_MPIDense(Mat A, PetscScalar **array) 370 { 371 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 372 373 PetscFunctionBegin; 374 PetscCall(MatDenseRestoreArrayWrite(a->A, array)); 375 PetscFunctionReturn(PETSC_SUCCESS); 376 } 377 378 static PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat, MatAssemblyType mode) 379 { 380 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 381 PetscInt nstash, reallocs; 382 383 PetscFunctionBegin; 384 if (mdn->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS); 385 386 PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range)); 387 PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs)); 388 PetscCall(PetscInfo(mdn->A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 389 PetscFunctionReturn(PETSC_SUCCESS); 390 } 391 392 static PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat, MatAssemblyType mode) 393 { 394 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 395 PetscInt i, *row, *col, flg, j, rstart, ncols; 396 PetscMPIInt n; 397 PetscScalar *val; 398 399 PetscFunctionBegin; 400 if (!mdn->donotstash && !mat->nooffprocentries) { 401 /* wait on receives */ 402 while (1) { 403 PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg)); 404 if (!flg) break; 405 406 for (i = 0; i < n;) { 407 /* Now identify the consecutive vals belonging to the same row */ 408 for (j = i, rstart = row[j]; j < n; j++) { 409 if (row[j] != rstart) break; 410 } 411 if (j < n) ncols = j - i; 412 else ncols = n - i; 413 /* Now assemble all these values with a single function call */ 414 PetscCall(MatSetValues_MPIDense(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode)); 415 i = j; 416 } 417 } 418 PetscCall(MatStashScatterEnd_Private(&mat->stash)); 419 } 420 421 PetscCall(MatAssemblyBegin(mdn->A, mode)); 422 PetscCall(MatAssemblyEnd(mdn->A, mode)); 423 PetscFunctionReturn(PETSC_SUCCESS); 424 } 425 426 static PetscErrorCode MatZeroEntries_MPIDense(Mat A) 427 { 428 Mat_MPIDense *l = (Mat_MPIDense *)A->data; 429 430 PetscFunctionBegin; 431 PetscCall(MatZeroEntries(l->A)); 432 PetscFunctionReturn(PETSC_SUCCESS); 433 } 434 435 static PetscErrorCode MatZeroRows_MPIDense(Mat A, PetscInt n, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 436 { 437 Mat_MPIDense *l = (Mat_MPIDense *)A->data; 438 PetscInt i, len, *lrows; 439 440 PetscFunctionBegin; 441 /* get locally owned rows */ 442 PetscCall(PetscLayoutMapLocal(A->rmap, n, rows, &len, &lrows, NULL)); 443 /* fix right hand side if needed */ 444 if (x && b) { 445 const PetscScalar *xx; 446 PetscScalar *bb; 447 448 PetscCall(VecGetArrayRead(x, &xx)); 449 PetscCall(VecGetArrayWrite(b, &bb)); 450 for (i = 0; i < len; ++i) bb[lrows[i]] = diag * xx[lrows[i]]; 451 PetscCall(VecRestoreArrayRead(x, &xx)); 452 PetscCall(VecRestoreArrayWrite(b, &bb)); 453 } 454 PetscCall(MatZeroRows(l->A, len, lrows, 0.0, NULL, NULL)); 455 if (diag != 0.0) { 456 Vec d; 457 458 PetscCall(MatCreateVecs(A, NULL, &d)); 459 PetscCall(VecSet(d, diag)); 460 PetscCall(MatDiagonalSet(A, d, INSERT_VALUES)); 461 PetscCall(VecDestroy(&d)); 462 } 463 PetscCall(PetscFree(lrows)); 464 PetscFunctionReturn(PETSC_SUCCESS); 465 } 466 467 PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat, Vec, Vec); 468 PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat, Vec, Vec, Vec); 469 PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat, Vec, Vec); 470 PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat, Vec, Vec, Vec); 471 472 static PetscErrorCode MatMult_MPIDense(Mat mat, Vec xx, Vec yy) 473 { 474 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 475 const PetscScalar *ax; 476 PetscScalar *ay; 477 PetscMemType axmtype, aymtype; 478 479 PetscFunctionBegin; 480 if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat)); 481 PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype)); 482 PetscCall(VecGetArrayAndMemType(mdn->lvec, &ay, &aymtype)); 483 PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE)); 484 PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE)); 485 PetscCall(VecRestoreArrayAndMemType(mdn->lvec, &ay)); 486 PetscCall(VecRestoreArrayReadAndMemType(xx, &ax)); 487 PetscCall((*mdn->A->ops->mult)(mdn->A, mdn->lvec, yy)); 488 PetscFunctionReturn(PETSC_SUCCESS); 489 } 490 491 static PetscErrorCode MatMultAdd_MPIDense(Mat mat, Vec xx, Vec yy, Vec zz) 492 { 493 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 494 const PetscScalar *ax; 495 PetscScalar *ay; 496 PetscMemType axmtype, aymtype; 497 498 PetscFunctionBegin; 499 if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat)); 500 PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype)); 501 PetscCall(VecGetArrayAndMemType(mdn->lvec, &ay, &aymtype)); 502 PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE)); 503 PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE)); 504 PetscCall(VecRestoreArrayAndMemType(mdn->lvec, &ay)); 505 PetscCall(VecRestoreArrayReadAndMemType(xx, &ax)); 506 PetscCall((*mdn->A->ops->multadd)(mdn->A, mdn->lvec, yy, zz)); 507 PetscFunctionReturn(PETSC_SUCCESS); 508 } 509 510 static PetscErrorCode MatMultTranspose_MPIDense(Mat A, Vec xx, Vec yy) 511 { 512 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 513 const PetscScalar *ax; 514 PetscScalar *ay; 515 PetscMemType axmtype, aymtype; 516 517 PetscFunctionBegin; 518 if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A)); 519 PetscCall(VecSet(yy, 0.0)); 520 PetscCall((*a->A->ops->multtranspose)(a->A, xx, a->lvec)); 521 PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype)); 522 PetscCall(VecGetArrayAndMemType(yy, &ay, &aymtype)); 523 PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM)); 524 PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM)); 525 PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax)); 526 PetscCall(VecRestoreArrayAndMemType(yy, &ay)); 527 PetscFunctionReturn(PETSC_SUCCESS); 528 } 529 530 static PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A, Vec xx, Vec yy, Vec zz) 531 { 532 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 533 const PetscScalar *ax; 534 PetscScalar *ay; 535 PetscMemType axmtype, aymtype; 536 537 PetscFunctionBegin; 538 if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A)); 539 PetscCall(VecCopy(yy, zz)); 540 PetscCall((*a->A->ops->multtranspose)(a->A, xx, a->lvec)); 541 PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype)); 542 PetscCall(VecGetArrayAndMemType(zz, &ay, &aymtype)); 543 PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM)); 544 PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM)); 545 PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax)); 546 PetscCall(VecRestoreArrayAndMemType(zz, &ay)); 547 PetscFunctionReturn(PETSC_SUCCESS); 548 } 549 550 PetscErrorCode MatGetDiagonal_MPIDense(Mat A, Vec v) 551 { 552 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 553 PetscInt lda, len, i, nl, ng, m = A->rmap->n, radd; 554 PetscScalar *x; 555 const PetscScalar *av; 556 557 PetscFunctionBegin; 558 PetscCall(VecGetArray(v, &x)); 559 PetscCall(VecGetSize(v, &ng)); 560 PetscCheck(ng == A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming mat and vec"); 561 PetscCall(VecGetLocalSize(v, &nl)); 562 len = PetscMin(a->A->rmap->n, a->A->cmap->n); 563 radd = A->rmap->rstart * m; 564 PetscCall(MatDenseGetArrayRead(a->A, &av)); 565 PetscCall(MatDenseGetLDA(a->A, &lda)); 566 for (i = 0; i < len; i++) x[i] = av[radd + i * lda + i]; 567 PetscCall(MatDenseRestoreArrayRead(a->A, &av)); 568 PetscCall(PetscArrayzero(x + i, nl - i)); 569 PetscCall(VecRestoreArray(v, &x)); 570 PetscFunctionReturn(PETSC_SUCCESS); 571 } 572 573 static PetscErrorCode MatDestroy_MPIDense(Mat mat) 574 { 575 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 576 577 PetscFunctionBegin; 578 PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N)); 579 PetscCall(MatStashDestroy_Private(&mat->stash)); 580 PetscCheck(!mdn->vecinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 581 PetscCheck(!mdn->matinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 582 PetscCall(MatDestroy(&mdn->A)); 583 PetscCall(VecDestroy(&mdn->lvec)); 584 PetscCall(PetscSFDestroy(&mdn->Mvctx)); 585 PetscCall(VecDestroy(&mdn->cvec)); 586 PetscCall(MatDestroy(&mdn->cmat)); 587 588 PetscCall(PetscFree(mat->data)); 589 PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL)); 590 591 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetLDA_C", NULL)); 592 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseSetLDA_C", NULL)); 593 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArray_C", NULL)); 594 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArray_C", NULL)); 595 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayRead_C", NULL)); 596 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayRead_C", NULL)); 597 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayWrite_C", NULL)); 598 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayWrite_C", NULL)); 599 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDensePlaceArray_C", NULL)); 600 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseResetArray_C", NULL)); 601 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseReplaceArray_C", NULL)); 602 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", NULL)); 603 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpiaij_C", NULL)); 604 #if defined(PETSC_HAVE_ELEMENTAL) 605 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_elemental_C", NULL)); 606 #endif 607 #if defined(PETSC_HAVE_SCALAPACK) 608 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_scalapack_C", NULL)); 609 #endif 610 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIDenseSetPreallocation_C", NULL)); 611 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidense_C", NULL)); 612 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaij_C", NULL)); 613 #if defined(PETSC_HAVE_CUDA) 614 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidense_C", NULL)); 615 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijcusparse_C", NULL)); 616 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensecuda_C", NULL)); 617 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidensecuda_mpidense_C", NULL)); 618 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidensecuda_C", NULL)); 619 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C", NULL)); 620 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensecuda_mpiaij_C", NULL)); 621 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C", NULL)); 622 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArray_C", NULL)); 623 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArrayRead_C", NULL)); 624 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArrayWrite_C", NULL)); 625 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArray_C", NULL)); 626 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArrayRead_C", NULL)); 627 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArrayWrite_C", NULL)); 628 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAPlaceArray_C", NULL)); 629 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAResetArray_C", NULL)); 630 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAReplaceArray_C", NULL)); 631 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDASetPreallocation_C", NULL)); 632 #endif 633 #if defined(PETSC_HAVE_HIP) 634 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidense_C", NULL)); 635 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijhipsparse_C", NULL)); 636 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensehip_C", NULL)); 637 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidensehip_mpidense_C", NULL)); 638 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidensehip_C", NULL)); 639 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidensehip_C", NULL)); 640 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensehip_mpiaij_C", NULL)); 641 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensehip_mpiaijhipsparse_C", NULL)); 642 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArray_C", NULL)); 643 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArrayRead_C", NULL)); 644 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArrayWrite_C", NULL)); 645 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArray_C", NULL)); 646 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArrayRead_C", NULL)); 647 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArrayWrite_C", NULL)); 648 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPPlaceArray_C", NULL)); 649 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPResetArray_C", NULL)); 650 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPReplaceArray_C", NULL)); 651 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPSetPreallocation_C", NULL)); 652 #endif 653 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumn_C", NULL)); 654 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumn_C", NULL)); 655 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", NULL)); 656 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", NULL)); 657 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", NULL)); 658 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", NULL)); 659 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", NULL)); 660 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", NULL)); 661 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetSubMatrix_C", NULL)); 662 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreSubMatrix_C", NULL)); 663 664 PetscCall(PetscObjectCompose((PetscObject)mat, "DiagonalBlock", NULL)); 665 PetscFunctionReturn(PETSC_SUCCESS); 666 } 667 668 PETSC_INTERN PetscErrorCode MatView_SeqDense(Mat, PetscViewer); 669 670 #include <petscdraw.h> 671 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat, PetscViewer viewer) 672 { 673 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 674 PetscMPIInt rank; 675 PetscViewerType vtype; 676 PetscBool iascii, isdraw; 677 PetscViewer sviewer; 678 PetscViewerFormat format; 679 680 PetscFunctionBegin; 681 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 682 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 683 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 684 if (iascii) { 685 PetscCall(PetscViewerGetType(viewer, &vtype)); 686 PetscCall(PetscViewerGetFormat(viewer, &format)); 687 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 688 MatInfo info; 689 PetscCall(MatGetInfo(mat, MAT_LOCAL, &info)); 690 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 691 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %" PetscInt_FMT " \n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, 692 (PetscInt)info.memory)); 693 PetscCall(PetscViewerFlush(viewer)); 694 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 695 if (mdn->Mvctx) PetscCall(PetscSFView(mdn->Mvctx, viewer)); 696 PetscFunctionReturn(PETSC_SUCCESS); 697 } else if (format == PETSC_VIEWER_ASCII_INFO) { 698 PetscFunctionReturn(PETSC_SUCCESS); 699 } 700 } else if (isdraw) { 701 PetscDraw draw; 702 PetscBool isnull; 703 704 PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw)); 705 PetscCall(PetscDrawIsNull(draw, &isnull)); 706 if (isnull) PetscFunctionReturn(PETSC_SUCCESS); 707 } 708 709 { 710 /* assemble the entire matrix onto first processor. */ 711 Mat A; 712 PetscInt M = mat->rmap->N, N = mat->cmap->N, m, row, i, nz; 713 PetscInt *cols; 714 PetscScalar *vals; 715 716 PetscCall(MatCreate(PetscObjectComm((PetscObject)mat), &A)); 717 if (rank == 0) { 718 PetscCall(MatSetSizes(A, M, N, M, N)); 719 } else { 720 PetscCall(MatSetSizes(A, 0, 0, M, N)); 721 } 722 /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */ 723 PetscCall(MatSetType(A, MATMPIDENSE)); 724 PetscCall(MatMPIDenseSetPreallocation(A, NULL)); 725 726 /* Copy the matrix ... This isn't the most efficient means, 727 but it's quick for now */ 728 A->insertmode = INSERT_VALUES; 729 730 row = mat->rmap->rstart; 731 m = mdn->A->rmap->n; 732 for (i = 0; i < m; i++) { 733 PetscCall(MatGetRow_MPIDense(mat, row, &nz, &cols, &vals)); 734 PetscCall(MatSetValues_MPIDense(A, 1, &row, nz, cols, vals, INSERT_VALUES)); 735 PetscCall(MatRestoreRow_MPIDense(mat, row, &nz, &cols, &vals)); 736 row++; 737 } 738 739 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 740 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 741 PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 742 if (rank == 0) { 743 PetscCall(PetscObjectSetName((PetscObject)((Mat_MPIDense *)(A->data))->A, ((PetscObject)mat)->name)); 744 PetscCall(MatView_SeqDense(((Mat_MPIDense *)(A->data))->A, sviewer)); 745 } 746 PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 747 PetscCall(PetscViewerFlush(viewer)); 748 PetscCall(MatDestroy(&A)); 749 } 750 PetscFunctionReturn(PETSC_SUCCESS); 751 } 752 753 static PetscErrorCode MatView_MPIDense(Mat mat, PetscViewer viewer) 754 { 755 PetscBool iascii, isbinary, isdraw, issocket; 756 757 PetscFunctionBegin; 758 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 759 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 760 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket)); 761 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 762 763 if (iascii || issocket || isdraw) { 764 PetscCall(MatView_MPIDense_ASCIIorDraworSocket(mat, viewer)); 765 } else if (isbinary) PetscCall(MatView_Dense_Binary(mat, viewer)); 766 PetscFunctionReturn(PETSC_SUCCESS); 767 } 768 769 static PetscErrorCode MatGetInfo_MPIDense(Mat A, MatInfoType flag, MatInfo *info) 770 { 771 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 772 Mat mdn = mat->A; 773 PetscLogDouble isend[5], irecv[5]; 774 775 PetscFunctionBegin; 776 info->block_size = 1.0; 777 778 PetscCall(MatGetInfo(mdn, MAT_LOCAL, info)); 779 780 isend[0] = info->nz_used; 781 isend[1] = info->nz_allocated; 782 isend[2] = info->nz_unneeded; 783 isend[3] = info->memory; 784 isend[4] = info->mallocs; 785 if (flag == MAT_LOCAL) { 786 info->nz_used = isend[0]; 787 info->nz_allocated = isend[1]; 788 info->nz_unneeded = isend[2]; 789 info->memory = isend[3]; 790 info->mallocs = isend[4]; 791 } else if (flag == MAT_GLOBAL_MAX) { 792 PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)A))); 793 794 info->nz_used = irecv[0]; 795 info->nz_allocated = irecv[1]; 796 info->nz_unneeded = irecv[2]; 797 info->memory = irecv[3]; 798 info->mallocs = irecv[4]; 799 } else if (flag == MAT_GLOBAL_SUM) { 800 PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)A))); 801 802 info->nz_used = irecv[0]; 803 info->nz_allocated = irecv[1]; 804 info->nz_unneeded = irecv[2]; 805 info->memory = irecv[3]; 806 info->mallocs = irecv[4]; 807 } 808 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 809 info->fill_ratio_needed = 0; 810 info->factor_mallocs = 0; 811 PetscFunctionReturn(PETSC_SUCCESS); 812 } 813 814 static PetscErrorCode MatSetOption_MPIDense(Mat A, MatOption op, PetscBool flg) 815 { 816 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 817 818 PetscFunctionBegin; 819 switch (op) { 820 case MAT_NEW_NONZERO_LOCATIONS: 821 case MAT_NEW_NONZERO_LOCATION_ERR: 822 case MAT_NEW_NONZERO_ALLOCATION_ERR: 823 MatCheckPreallocated(A, 1); 824 PetscCall(MatSetOption(a->A, op, flg)); 825 break; 826 case MAT_ROW_ORIENTED: 827 MatCheckPreallocated(A, 1); 828 a->roworiented = flg; 829 PetscCall(MatSetOption(a->A, op, flg)); 830 break; 831 case MAT_FORCE_DIAGONAL_ENTRIES: 832 case MAT_KEEP_NONZERO_PATTERN: 833 case MAT_USE_HASH_TABLE: 834 case MAT_SORTED_FULL: 835 PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op])); 836 break; 837 case MAT_IGNORE_OFF_PROC_ENTRIES: 838 a->donotstash = flg; 839 break; 840 case MAT_SYMMETRIC: 841 case MAT_STRUCTURALLY_SYMMETRIC: 842 case MAT_HERMITIAN: 843 case MAT_SYMMETRY_ETERNAL: 844 case MAT_STRUCTURAL_SYMMETRY_ETERNAL: 845 case MAT_SPD: 846 case MAT_IGNORE_LOWER_TRIANGULAR: 847 case MAT_IGNORE_ZERO_ENTRIES: 848 case MAT_SPD_ETERNAL: 849 /* if the diagonal matrix is square it inherits some of the properties above */ 850 PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op])); 851 break; 852 default: 853 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %s", MatOptions[op]); 854 } 855 PetscFunctionReturn(PETSC_SUCCESS); 856 } 857 858 static PetscErrorCode MatDiagonalScale_MPIDense(Mat A, Vec ll, Vec rr) 859 { 860 Mat_MPIDense *mdn = (Mat_MPIDense *)A->data; 861 const PetscScalar *l; 862 PetscScalar x, *v, *vv, *r; 863 PetscInt i, j, s2a, s3a, s2, s3, m = mdn->A->rmap->n, n = mdn->A->cmap->n, lda; 864 865 PetscFunctionBegin; 866 PetscCall(MatDenseGetArray(mdn->A, &vv)); 867 PetscCall(MatDenseGetLDA(mdn->A, &lda)); 868 PetscCall(MatGetLocalSize(A, &s2, &s3)); 869 if (ll) { 870 PetscCall(VecGetLocalSize(ll, &s2a)); 871 PetscCheck(s2a == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Left scaling vector non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT, s2a, s2); 872 PetscCall(VecGetArrayRead(ll, &l)); 873 for (i = 0; i < m; i++) { 874 x = l[i]; 875 v = vv + i; 876 for (j = 0; j < n; j++) { 877 (*v) *= x; 878 v += lda; 879 } 880 } 881 PetscCall(VecRestoreArrayRead(ll, &l)); 882 PetscCall(PetscLogFlops(1.0 * n * m)); 883 } 884 if (rr) { 885 const PetscScalar *ar; 886 887 PetscCall(VecGetLocalSize(rr, &s3a)); 888 PetscCheck(s3a == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Right scaling vec non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT ".", s3a, s3); 889 PetscCall(VecGetArrayRead(rr, &ar)); 890 if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A)); 891 PetscCall(VecGetArray(mdn->lvec, &r)); 892 PetscCall(PetscSFBcastBegin(mdn->Mvctx, MPIU_SCALAR, ar, r, MPI_REPLACE)); 893 PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ar, r, MPI_REPLACE)); 894 PetscCall(VecRestoreArrayRead(rr, &ar)); 895 for (i = 0; i < n; i++) { 896 x = r[i]; 897 v = vv + i * lda; 898 for (j = 0; j < m; j++) (*v++) *= x; 899 } 900 PetscCall(VecRestoreArray(mdn->lvec, &r)); 901 PetscCall(PetscLogFlops(1.0 * n * m)); 902 } 903 PetscCall(MatDenseRestoreArray(mdn->A, &vv)); 904 PetscFunctionReturn(PETSC_SUCCESS); 905 } 906 907 static PetscErrorCode MatNorm_MPIDense(Mat A, NormType type, PetscReal *nrm) 908 { 909 Mat_MPIDense *mdn = (Mat_MPIDense *)A->data; 910 PetscInt i, j; 911 PetscMPIInt size; 912 PetscReal sum = 0.0; 913 const PetscScalar *av, *v; 914 915 PetscFunctionBegin; 916 PetscCall(MatDenseGetArrayRead(mdn->A, &av)); 917 v = av; 918 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 919 if (size == 1) { 920 PetscCall(MatNorm(mdn->A, type, nrm)); 921 } else { 922 if (type == NORM_FROBENIUS) { 923 for (i = 0; i < mdn->A->cmap->n * mdn->A->rmap->n; i++) { 924 sum += PetscRealPart(PetscConj(*v) * (*v)); 925 v++; 926 } 927 PetscCall(MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A))); 928 *nrm = PetscSqrtReal(*nrm); 929 PetscCall(PetscLogFlops(2.0 * mdn->A->cmap->n * mdn->A->rmap->n)); 930 } else if (type == NORM_1) { 931 PetscReal *tmp, *tmp2; 932 PetscCall(PetscCalloc2(A->cmap->N, &tmp, A->cmap->N, &tmp2)); 933 *nrm = 0.0; 934 v = av; 935 for (j = 0; j < mdn->A->cmap->n; j++) { 936 for (i = 0; i < mdn->A->rmap->n; i++) { 937 tmp[j] += PetscAbsScalar(*v); 938 v++; 939 } 940 } 941 PetscCall(MPIU_Allreduce(tmp, tmp2, A->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A))); 942 for (j = 0; j < A->cmap->N; j++) { 943 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 944 } 945 PetscCall(PetscFree2(tmp, tmp2)); 946 PetscCall(PetscLogFlops(A->cmap->n * A->rmap->n)); 947 } else if (type == NORM_INFINITY) { /* max row norm */ 948 PetscReal ntemp; 949 PetscCall(MatNorm(mdn->A, type, &ntemp)); 950 PetscCall(MPIU_Allreduce(&ntemp, nrm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A))); 951 } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "No support for two norm"); 952 } 953 PetscCall(MatDenseRestoreArrayRead(mdn->A, &av)); 954 PetscFunctionReturn(PETSC_SUCCESS); 955 } 956 957 static PetscErrorCode MatTranspose_MPIDense(Mat A, MatReuse reuse, Mat *matout) 958 { 959 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 960 Mat B; 961 PetscInt M = A->rmap->N, N = A->cmap->N, m, n, *rwork, rstart = A->rmap->rstart; 962 PetscInt j, i, lda; 963 PetscScalar *v; 964 965 PetscFunctionBegin; 966 if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout)); 967 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) { 968 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 969 PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M)); 970 PetscCall(MatSetType(B, ((PetscObject)A)->type_name)); 971 PetscCall(MatMPIDenseSetPreallocation(B, NULL)); 972 } else B = *matout; 973 974 m = a->A->rmap->n; 975 n = a->A->cmap->n; 976 PetscCall(MatDenseGetArrayRead(a->A, (const PetscScalar **)&v)); 977 PetscCall(MatDenseGetLDA(a->A, &lda)); 978 PetscCall(PetscMalloc1(m, &rwork)); 979 for (i = 0; i < m; i++) rwork[i] = rstart + i; 980 for (j = 0; j < n; j++) { 981 PetscCall(MatSetValues(B, 1, &j, m, rwork, v, INSERT_VALUES)); 982 v += lda; 983 } 984 PetscCall(MatDenseRestoreArrayRead(a->A, (const PetscScalar **)&v)); 985 PetscCall(PetscFree(rwork)); 986 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 987 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 988 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 989 *matout = B; 990 } else { 991 PetscCall(MatHeaderMerge(A, &B)); 992 } 993 PetscFunctionReturn(PETSC_SUCCESS); 994 } 995 996 static PetscErrorCode MatDuplicate_MPIDense(Mat, MatDuplicateOption, Mat *); 997 PETSC_INTERN PetscErrorCode MatScale_MPIDense(Mat, PetscScalar); 998 999 static PetscErrorCode MatSetUp_MPIDense(Mat A) 1000 { 1001 PetscFunctionBegin; 1002 PetscCall(PetscLayoutSetUp(A->rmap)); 1003 PetscCall(PetscLayoutSetUp(A->cmap)); 1004 if (!A->preallocated) PetscCall(MatMPIDenseSetPreallocation(A, NULL)); 1005 PetscFunctionReturn(PETSC_SUCCESS); 1006 } 1007 1008 static PetscErrorCode MatAXPY_MPIDense(Mat Y, PetscScalar alpha, Mat X, MatStructure str) 1009 { 1010 Mat_MPIDense *A = (Mat_MPIDense *)Y->data, *B = (Mat_MPIDense *)X->data; 1011 1012 PetscFunctionBegin; 1013 PetscCall(MatAXPY(A->A, alpha, B->A, str)); 1014 PetscFunctionReturn(PETSC_SUCCESS); 1015 } 1016 1017 static PetscErrorCode MatConjugate_MPIDense(Mat mat) 1018 { 1019 Mat_MPIDense *a = (Mat_MPIDense *)mat->data; 1020 1021 PetscFunctionBegin; 1022 PetscCall(MatConjugate(a->A)); 1023 PetscFunctionReturn(PETSC_SUCCESS); 1024 } 1025 1026 static PetscErrorCode MatRealPart_MPIDense(Mat A) 1027 { 1028 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1029 1030 PetscFunctionBegin; 1031 PetscCall(MatRealPart(a->A)); 1032 PetscFunctionReturn(PETSC_SUCCESS); 1033 } 1034 1035 static PetscErrorCode MatImaginaryPart_MPIDense(Mat A) 1036 { 1037 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1038 1039 PetscFunctionBegin; 1040 PetscCall(MatImaginaryPart(a->A)); 1041 PetscFunctionReturn(PETSC_SUCCESS); 1042 } 1043 1044 static PetscErrorCode MatGetColumnVector_MPIDense(Mat A, Vec v, PetscInt col) 1045 { 1046 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1047 1048 PetscFunctionBegin; 1049 PetscCheck(a->A, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Missing local matrix"); 1050 PetscCheck(a->A->ops->getcolumnvector, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Missing get column operation"); 1051 PetscCall((*a->A->ops->getcolumnvector)(a->A, v, col)); 1052 PetscFunctionReturn(PETSC_SUCCESS); 1053 } 1054 1055 PETSC_INTERN PetscErrorCode MatGetColumnReductions_SeqDense(Mat, PetscInt, PetscReal *); 1056 1057 static PetscErrorCode MatGetColumnReductions_MPIDense(Mat A, PetscInt type, PetscReal *reductions) 1058 { 1059 PetscInt i, m, n; 1060 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1061 PetscReal *work; 1062 1063 PetscFunctionBegin; 1064 PetscCall(MatGetSize(A, &m, &n)); 1065 PetscCall(PetscMalloc1(n, &work)); 1066 if (type == REDUCTION_MEAN_REALPART) { 1067 PetscCall(MatGetColumnReductions_SeqDense(a->A, (PetscInt)REDUCTION_SUM_REALPART, work)); 1068 } else if (type == REDUCTION_MEAN_IMAGINARYPART) { 1069 PetscCall(MatGetColumnReductions_SeqDense(a->A, (PetscInt)REDUCTION_SUM_IMAGINARYPART, work)); 1070 } else { 1071 PetscCall(MatGetColumnReductions_SeqDense(a->A, type, work)); 1072 } 1073 if (type == NORM_2) { 1074 for (i = 0; i < n; i++) work[i] *= work[i]; 1075 } 1076 if (type == NORM_INFINITY) { 1077 PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_MAX, A->hdr.comm)); 1078 } else { 1079 PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_SUM, A->hdr.comm)); 1080 } 1081 PetscCall(PetscFree(work)); 1082 if (type == NORM_2) { 1083 for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]); 1084 } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 1085 for (i = 0; i < n; i++) reductions[i] /= m; 1086 } 1087 PetscFunctionReturn(PETSC_SUCCESS); 1088 } 1089 1090 static PetscErrorCode MatSetRandom_MPIDense(Mat x, PetscRandom rctx) 1091 { 1092 Mat_MPIDense *d = (Mat_MPIDense *)x->data; 1093 1094 PetscFunctionBegin; 1095 PetscCall(MatSetRandom(d->A, rctx)); 1096 #if defined(PETSC_HAVE_DEVICE) 1097 x->offloadmask = d->A->offloadmask; 1098 #endif 1099 PetscFunctionReturn(PETSC_SUCCESS); 1100 } 1101 1102 static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A, PetscBool *missing, PetscInt *d) 1103 { 1104 PetscFunctionBegin; 1105 *missing = PETSC_FALSE; 1106 PetscFunctionReturn(PETSC_SUCCESS); 1107 } 1108 1109 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat, Mat, PetscReal, Mat); 1110 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat, Mat, Mat); 1111 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat, Mat, PetscReal, Mat); 1112 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat, Mat, Mat); 1113 static PetscErrorCode MatEqual_MPIDense(Mat, Mat, PetscBool *); 1114 static PetscErrorCode MatLoad_MPIDense(Mat, PetscViewer); 1115 1116 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense, 1117 MatGetRow_MPIDense, 1118 MatRestoreRow_MPIDense, 1119 MatMult_MPIDense, 1120 /* 4*/ MatMultAdd_MPIDense, 1121 MatMultTranspose_MPIDense, 1122 MatMultTransposeAdd_MPIDense, 1123 NULL, 1124 NULL, 1125 NULL, 1126 /* 10*/ NULL, 1127 NULL, 1128 NULL, 1129 NULL, 1130 MatTranspose_MPIDense, 1131 /* 15*/ MatGetInfo_MPIDense, 1132 MatEqual_MPIDense, 1133 MatGetDiagonal_MPIDense, 1134 MatDiagonalScale_MPIDense, 1135 MatNorm_MPIDense, 1136 /* 20*/ MatAssemblyBegin_MPIDense, 1137 MatAssemblyEnd_MPIDense, 1138 MatSetOption_MPIDense, 1139 MatZeroEntries_MPIDense, 1140 /* 24*/ MatZeroRows_MPIDense, 1141 NULL, 1142 NULL, 1143 NULL, 1144 NULL, 1145 /* 29*/ MatSetUp_MPIDense, 1146 NULL, 1147 NULL, 1148 MatGetDiagonalBlock_MPIDense, 1149 NULL, 1150 /* 34*/ MatDuplicate_MPIDense, 1151 NULL, 1152 NULL, 1153 NULL, 1154 NULL, 1155 /* 39*/ MatAXPY_MPIDense, 1156 MatCreateSubMatrices_MPIDense, 1157 NULL, 1158 MatGetValues_MPIDense, 1159 MatCopy_MPIDense, 1160 /* 44*/ NULL, 1161 MatScale_MPIDense, 1162 MatShift_MPIDense, 1163 NULL, 1164 NULL, 1165 /* 49*/ MatSetRandom_MPIDense, 1166 NULL, 1167 NULL, 1168 NULL, 1169 NULL, 1170 /* 54*/ NULL, 1171 NULL, 1172 NULL, 1173 NULL, 1174 NULL, 1175 /* 59*/ MatCreateSubMatrix_MPIDense, 1176 MatDestroy_MPIDense, 1177 MatView_MPIDense, 1178 NULL, 1179 NULL, 1180 /* 64*/ NULL, 1181 NULL, 1182 NULL, 1183 NULL, 1184 NULL, 1185 /* 69*/ NULL, 1186 NULL, 1187 NULL, 1188 NULL, 1189 NULL, 1190 /* 74*/ NULL, 1191 NULL, 1192 NULL, 1193 NULL, 1194 NULL, 1195 /* 79*/ NULL, 1196 NULL, 1197 NULL, 1198 NULL, 1199 /* 83*/ MatLoad_MPIDense, 1200 NULL, 1201 NULL, 1202 NULL, 1203 NULL, 1204 NULL, 1205 /* 89*/ NULL, 1206 NULL, 1207 NULL, 1208 NULL, 1209 NULL, 1210 /* 94*/ NULL, 1211 NULL, 1212 MatMatTransposeMultSymbolic_MPIDense_MPIDense, 1213 MatMatTransposeMultNumeric_MPIDense_MPIDense, 1214 NULL, 1215 /* 99*/ MatProductSetFromOptions_MPIDense, 1216 NULL, 1217 NULL, 1218 MatConjugate_MPIDense, 1219 NULL, 1220 /*104*/ NULL, 1221 MatRealPart_MPIDense, 1222 MatImaginaryPart_MPIDense, 1223 NULL, 1224 NULL, 1225 /*109*/ NULL, 1226 NULL, 1227 NULL, 1228 MatGetColumnVector_MPIDense, 1229 MatMissingDiagonal_MPIDense, 1230 /*114*/ NULL, 1231 NULL, 1232 NULL, 1233 NULL, 1234 NULL, 1235 /*119*/ NULL, 1236 NULL, 1237 NULL, 1238 NULL, 1239 NULL, 1240 /*124*/ NULL, 1241 MatGetColumnReductions_MPIDense, 1242 NULL, 1243 NULL, 1244 NULL, 1245 /*129*/ NULL, 1246 NULL, 1247 MatTransposeMatMultSymbolic_MPIDense_MPIDense, 1248 MatTransposeMatMultNumeric_MPIDense_MPIDense, 1249 NULL, 1250 /*134*/ NULL, 1251 NULL, 1252 NULL, 1253 NULL, 1254 NULL, 1255 /*139*/ NULL, 1256 NULL, 1257 NULL, 1258 NULL, 1259 NULL, 1260 MatCreateMPIMatConcatenateSeqMat_MPIDense, 1261 /*145*/ NULL, 1262 NULL, 1263 NULL, 1264 NULL, 1265 NULL, 1266 /*150*/ NULL, 1267 NULL}; 1268 1269 static PetscErrorCode MatMPIDenseSetPreallocation_MPIDense(Mat mat, PetscScalar *data) 1270 { 1271 Mat_MPIDense *a = (Mat_MPIDense *)mat->data; 1272 MatType mtype = MATSEQDENSE; 1273 1274 PetscFunctionBegin; 1275 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1276 PetscCall(PetscLayoutSetUp(mat->rmap)); 1277 PetscCall(PetscLayoutSetUp(mat->cmap)); 1278 if (!a->A) { 1279 PetscCall(MatCreate(PETSC_COMM_SELF, &a->A)); 1280 PetscCall(MatSetSizes(a->A, mat->rmap->n, mat->cmap->N, mat->rmap->n, mat->cmap->N)); 1281 } 1282 #if defined(PETSC_HAVE_CUDA) 1283 PetscBool iscuda; 1284 PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPIDENSECUDA, &iscuda)); 1285 if (iscuda) mtype = MATSEQDENSECUDA; 1286 #endif 1287 #if defined(PETSC_HAVE_HIP) 1288 PetscBool iship; 1289 PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPIDENSEHIP, &iship)); 1290 if (iship) mtype = MATSEQDENSEHIP; 1291 #endif 1292 PetscCall(MatSetType(a->A, mtype)); 1293 PetscCall(MatSeqDenseSetPreallocation(a->A, data)); 1294 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 1295 mat->offloadmask = a->A->offloadmask; 1296 #endif 1297 mat->preallocated = PETSC_TRUE; 1298 mat->assembled = PETSC_TRUE; 1299 PetscFunctionReturn(PETSC_SUCCESS); 1300 } 1301 1302 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIDense(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 1303 { 1304 Mat B, C; 1305 1306 PetscFunctionBegin; 1307 PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &C)); 1308 PetscCall(MatConvert_SeqAIJ_SeqDense(C, MATSEQDENSE, MAT_INITIAL_MATRIX, &B)); 1309 PetscCall(MatDestroy(&C)); 1310 if (reuse == MAT_REUSE_MATRIX) { 1311 C = *newmat; 1312 } else C = NULL; 1313 PetscCall(MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A), B, A->cmap->n, !C ? MAT_INITIAL_MATRIX : MAT_REUSE_MATRIX, &C)); 1314 PetscCall(MatDestroy(&B)); 1315 if (reuse == MAT_INPLACE_MATRIX) { 1316 PetscCall(MatHeaderReplace(A, &C)); 1317 } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C; 1318 PetscFunctionReturn(PETSC_SUCCESS); 1319 } 1320 1321 static PetscErrorCode MatConvert_MPIDense_MPIAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 1322 { 1323 Mat B, C; 1324 1325 PetscFunctionBegin; 1326 PetscCall(MatDenseGetLocalMatrix(A, &C)); 1327 PetscCall(MatConvert_SeqDense_SeqAIJ(C, MATSEQAIJ, MAT_INITIAL_MATRIX, &B)); 1328 if (reuse == MAT_REUSE_MATRIX) { 1329 C = *newmat; 1330 } else C = NULL; 1331 PetscCall(MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A), B, A->cmap->n, !C ? MAT_INITIAL_MATRIX : MAT_REUSE_MATRIX, &C)); 1332 PetscCall(MatDestroy(&B)); 1333 if (reuse == MAT_INPLACE_MATRIX) { 1334 PetscCall(MatHeaderReplace(A, &C)); 1335 } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C; 1336 PetscFunctionReturn(PETSC_SUCCESS); 1337 } 1338 1339 #if defined(PETSC_HAVE_ELEMENTAL) 1340 PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 1341 { 1342 Mat mat_elemental; 1343 PetscScalar *v; 1344 PetscInt m = A->rmap->n, N = A->cmap->N, rstart = A->rmap->rstart, i, *rows, *cols; 1345 1346 PetscFunctionBegin; 1347 if (reuse == MAT_REUSE_MATRIX) { 1348 mat_elemental = *newmat; 1349 PetscCall(MatZeroEntries(*newmat)); 1350 } else { 1351 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental)); 1352 PetscCall(MatSetSizes(mat_elemental, PETSC_DECIDE, PETSC_DECIDE, A->rmap->N, A->cmap->N)); 1353 PetscCall(MatSetType(mat_elemental, MATELEMENTAL)); 1354 PetscCall(MatSetUp(mat_elemental)); 1355 PetscCall(MatSetOption(mat_elemental, MAT_ROW_ORIENTED, PETSC_FALSE)); 1356 } 1357 1358 PetscCall(PetscMalloc2(m, &rows, N, &cols)); 1359 for (i = 0; i < N; i++) cols[i] = i; 1360 for (i = 0; i < m; i++) rows[i] = rstart + i; 1361 1362 /* PETSc-Elemental interface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */ 1363 PetscCall(MatDenseGetArray(A, &v)); 1364 PetscCall(MatSetValues(mat_elemental, m, rows, N, cols, v, ADD_VALUES)); 1365 PetscCall(MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY)); 1366 PetscCall(MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY)); 1367 PetscCall(MatDenseRestoreArray(A, &v)); 1368 PetscCall(PetscFree2(rows, cols)); 1369 1370 if (reuse == MAT_INPLACE_MATRIX) { 1371 PetscCall(MatHeaderReplace(A, &mat_elemental)); 1372 } else { 1373 *newmat = mat_elemental; 1374 } 1375 PetscFunctionReturn(PETSC_SUCCESS); 1376 } 1377 #endif 1378 1379 static PetscErrorCode MatDenseGetColumn_MPIDense(Mat A, PetscInt col, PetscScalar **vals) 1380 { 1381 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 1382 1383 PetscFunctionBegin; 1384 PetscCall(MatDenseGetColumn(mat->A, col, vals)); 1385 PetscFunctionReturn(PETSC_SUCCESS); 1386 } 1387 1388 static PetscErrorCode MatDenseRestoreColumn_MPIDense(Mat A, PetscScalar **vals) 1389 { 1390 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 1391 1392 PetscFunctionBegin; 1393 PetscCall(MatDenseRestoreColumn(mat->A, vals)); 1394 PetscFunctionReturn(PETSC_SUCCESS); 1395 } 1396 1397 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIDense(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat) 1398 { 1399 Mat_MPIDense *mat; 1400 PetscInt m, nloc, N; 1401 1402 PetscFunctionBegin; 1403 PetscCall(MatGetSize(inmat, &m, &N)); 1404 PetscCall(MatGetLocalSize(inmat, NULL, &nloc)); 1405 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 1406 PetscInt sum; 1407 1408 if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnership(comm, &n, &N)); 1409 /* Check sum(n) = N */ 1410 PetscCall(MPIU_Allreduce(&n, &sum, 1, MPIU_INT, MPI_SUM, comm)); 1411 PetscCheck(sum == N, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT, sum, N); 1412 1413 PetscCall(MatCreateDense(comm, m, n, PETSC_DETERMINE, N, NULL, outmat)); 1414 PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 1415 } 1416 1417 /* numeric phase */ 1418 mat = (Mat_MPIDense *)(*outmat)->data; 1419 PetscCall(MatCopy(inmat, mat->A, SAME_NONZERO_PATTERN)); 1420 PetscFunctionReturn(PETSC_SUCCESS); 1421 } 1422 1423 PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat A, PetscInt col, Vec *v) 1424 { 1425 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1426 PetscInt lda; 1427 1428 PetscFunctionBegin; 1429 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1430 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1431 if (!a->cvec) PetscCall(MatDenseCreateColumnVec_Private(A, &a->cvec)); 1432 a->vecinuse = col + 1; 1433 PetscCall(MatDenseGetLDA(a->A, &lda)); 1434 PetscCall(MatDenseGetArray(a->A, (PetscScalar **)&a->ptrinuse)); 1435 PetscCall(VecPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda)); 1436 *v = a->cvec; 1437 PetscFunctionReturn(PETSC_SUCCESS); 1438 } 1439 1440 PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat A, PetscInt col, Vec *v) 1441 { 1442 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1443 1444 PetscFunctionBegin; 1445 PetscCheck(a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first"); 1446 PetscCheck(a->cvec, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing internal column vector"); 1447 a->vecinuse = 0; 1448 PetscCall(MatDenseRestoreArray(a->A, (PetscScalar **)&a->ptrinuse)); 1449 PetscCall(VecResetArray(a->cvec)); 1450 if (v) *v = NULL; 1451 PetscFunctionReturn(PETSC_SUCCESS); 1452 } 1453 1454 PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat A, PetscInt col, Vec *v) 1455 { 1456 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1457 PetscInt lda; 1458 1459 PetscFunctionBegin; 1460 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1461 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1462 if (!a->cvec) PetscCall(MatDenseCreateColumnVec_Private(A, &a->cvec)); 1463 a->vecinuse = col + 1; 1464 PetscCall(MatDenseGetLDA(a->A, &lda)); 1465 PetscCall(MatDenseGetArrayRead(a->A, &a->ptrinuse)); 1466 PetscCall(VecPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda)); 1467 PetscCall(VecLockReadPush(a->cvec)); 1468 *v = a->cvec; 1469 PetscFunctionReturn(PETSC_SUCCESS); 1470 } 1471 1472 PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat A, PetscInt col, Vec *v) 1473 { 1474 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1475 1476 PetscFunctionBegin; 1477 PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first"); 1478 PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector"); 1479 a->vecinuse = 0; 1480 PetscCall(MatDenseRestoreArrayRead(a->A, &a->ptrinuse)); 1481 PetscCall(VecLockReadPop(a->cvec)); 1482 PetscCall(VecResetArray(a->cvec)); 1483 if (v) *v = NULL; 1484 PetscFunctionReturn(PETSC_SUCCESS); 1485 } 1486 1487 PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat A, PetscInt col, Vec *v) 1488 { 1489 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1490 PetscInt lda; 1491 1492 PetscFunctionBegin; 1493 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1494 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1495 if (!a->cvec) PetscCall(MatDenseCreateColumnVec_Private(A, &a->cvec)); 1496 a->vecinuse = col + 1; 1497 PetscCall(MatDenseGetLDA(a->A, &lda)); 1498 PetscCall(MatDenseGetArrayWrite(a->A, (PetscScalar **)&a->ptrinuse)); 1499 PetscCall(VecPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda)); 1500 *v = a->cvec; 1501 PetscFunctionReturn(PETSC_SUCCESS); 1502 } 1503 1504 PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat A, PetscInt col, Vec *v) 1505 { 1506 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1507 1508 PetscFunctionBegin; 1509 PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first"); 1510 PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector"); 1511 a->vecinuse = 0; 1512 PetscCall(MatDenseRestoreArrayWrite(a->A, (PetscScalar **)&a->ptrinuse)); 1513 PetscCall(VecResetArray(a->cvec)); 1514 if (v) *v = NULL; 1515 PetscFunctionReturn(PETSC_SUCCESS); 1516 } 1517 1518 static PetscErrorCode MatDenseGetSubMatrix_MPIDense(Mat A, PetscInt rbegin, PetscInt rend, PetscInt cbegin, PetscInt cend, Mat *v) 1519 { 1520 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1521 Mat_MPIDense *c; 1522 MPI_Comm comm; 1523 PetscInt pbegin, pend; 1524 1525 PetscFunctionBegin; 1526 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 1527 PetscCheck(!a->vecinuse, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1528 PetscCheck(!a->matinuse, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1529 pbegin = PetscMax(0, PetscMin(A->rmap->rend, rbegin) - A->rmap->rstart); 1530 pend = PetscMin(A->rmap->n, PetscMax(0, rend - A->rmap->rstart)); 1531 if (!a->cmat) { 1532 PetscCall(MatCreate(comm, &a->cmat)); 1533 PetscCall(MatSetType(a->cmat, ((PetscObject)A)->type_name)); 1534 if (rend - rbegin == A->rmap->N) PetscCall(PetscLayoutReference(A->rmap, &a->cmat->rmap)); 1535 else { 1536 PetscCall(PetscLayoutSetLocalSize(a->cmat->rmap, pend - pbegin)); 1537 PetscCall(PetscLayoutSetSize(a->cmat->rmap, rend - rbegin)); 1538 PetscCall(PetscLayoutSetUp(a->cmat->rmap)); 1539 } 1540 PetscCall(PetscLayoutSetSize(a->cmat->cmap, cend - cbegin)); 1541 PetscCall(PetscLayoutSetUp(a->cmat->cmap)); 1542 } else { 1543 PetscBool same = (PetscBool)(rend - rbegin == a->cmat->rmap->N); 1544 if (same && a->cmat->rmap->N != A->rmap->N) { 1545 same = (PetscBool)(pend - pbegin == a->cmat->rmap->n); 1546 PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &same, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A))); 1547 } 1548 if (!same) { 1549 PetscCall(PetscLayoutDestroy(&a->cmat->rmap)); 1550 PetscCall(PetscLayoutCreate(comm, &a->cmat->rmap)); 1551 PetscCall(PetscLayoutSetLocalSize(a->cmat->rmap, pend - pbegin)); 1552 PetscCall(PetscLayoutSetSize(a->cmat->rmap, rend - rbegin)); 1553 PetscCall(PetscLayoutSetUp(a->cmat->rmap)); 1554 } 1555 if (cend - cbegin != a->cmat->cmap->N) { 1556 PetscCall(PetscLayoutDestroy(&a->cmat->cmap)); 1557 PetscCall(PetscLayoutCreate(comm, &a->cmat->cmap)); 1558 PetscCall(PetscLayoutSetSize(a->cmat->cmap, cend - cbegin)); 1559 PetscCall(PetscLayoutSetUp(a->cmat->cmap)); 1560 } 1561 } 1562 c = (Mat_MPIDense *)a->cmat->data; 1563 PetscCheck(!c->A, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1564 PetscCall(MatDenseGetSubMatrix(a->A, pbegin, pend, cbegin, cend, &c->A)); 1565 1566 a->cmat->preallocated = PETSC_TRUE; 1567 a->cmat->assembled = PETSC_TRUE; 1568 #if defined(PETSC_HAVE_DEVICE) 1569 a->cmat->offloadmask = c->A->offloadmask; 1570 #endif 1571 a->matinuse = cbegin + 1; 1572 *v = a->cmat; 1573 PetscFunctionReturn(PETSC_SUCCESS); 1574 } 1575 1576 static PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat A, Mat *v) 1577 { 1578 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1579 Mat_MPIDense *c; 1580 1581 PetscFunctionBegin; 1582 PetscCheck(a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetSubMatrix() first"); 1583 PetscCheck(a->cmat, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal matrix"); 1584 PetscCheck(*v == a->cmat, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Not the matrix obtained from MatDenseGetSubMatrix()"); 1585 a->matinuse = 0; 1586 c = (Mat_MPIDense *)a->cmat->data; 1587 PetscCall(MatDenseRestoreSubMatrix(a->A, &c->A)); 1588 if (v) *v = NULL; 1589 #if defined(PETSC_HAVE_DEVICE) 1590 A->offloadmask = a->A->offloadmask; 1591 #endif 1592 PetscFunctionReturn(PETSC_SUCCESS); 1593 } 1594 1595 /*MC 1596 MATMPIDENSE - MATMPIDENSE = "mpidense" - A matrix type to be used for distributed dense matrices. 1597 1598 Options Database Key: 1599 . -mat_type mpidense - sets the matrix type to `MATMPIDENSE` during a call to `MatSetFromOptions()` 1600 1601 Level: beginner 1602 1603 .seealso: [](ch_matrices), `Mat`, `MatCreateDense()`, `MATSEQDENSE`, `MATDENSE` 1604 M*/ 1605 PetscErrorCode MatCreate_MPIDense(Mat mat) 1606 { 1607 Mat_MPIDense *a; 1608 1609 PetscFunctionBegin; 1610 PetscCall(PetscNew(&a)); 1611 mat->data = (void *)a; 1612 mat->ops[0] = MatOps_Values; 1613 1614 mat->insertmode = NOT_SET_VALUES; 1615 1616 /* build cache for off array entries formed */ 1617 a->donotstash = PETSC_FALSE; 1618 1619 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)mat), 1, &mat->stash)); 1620 1621 /* stuff used for matrix vector multiply */ 1622 a->lvec = NULL; 1623 a->Mvctx = NULL; 1624 a->roworiented = PETSC_TRUE; 1625 1626 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetLDA_C", MatDenseGetLDA_MPIDense)); 1627 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseSetLDA_C", MatDenseSetLDA_MPIDense)); 1628 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArray_C", MatDenseGetArray_MPIDense)); 1629 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArray_C", MatDenseRestoreArray_MPIDense)); 1630 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayRead_C", MatDenseGetArrayRead_MPIDense)); 1631 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayRead_C", MatDenseRestoreArrayRead_MPIDense)); 1632 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayWrite_C", MatDenseGetArrayWrite_MPIDense)); 1633 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayWrite_C", MatDenseRestoreArrayWrite_MPIDense)); 1634 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDensePlaceArray_C", MatDensePlaceArray_MPIDense)); 1635 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseResetArray_C", MatDenseResetArray_MPIDense)); 1636 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseReplaceArray_C", MatDenseReplaceArray_MPIDense)); 1637 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", MatDenseGetColumnVec_MPIDense)); 1638 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", MatDenseRestoreColumnVec_MPIDense)); 1639 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", MatDenseGetColumnVecRead_MPIDense)); 1640 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", MatDenseRestoreColumnVecRead_MPIDense)); 1641 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", MatDenseGetColumnVecWrite_MPIDense)); 1642 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", MatDenseRestoreColumnVecWrite_MPIDense)); 1643 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetSubMatrix_C", MatDenseGetSubMatrix_MPIDense)); 1644 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreSubMatrix_C", MatDenseRestoreSubMatrix_MPIDense)); 1645 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", MatConvert_MPIAIJ_MPIDense)); 1646 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpiaij_C", MatConvert_MPIDense_MPIAIJ)); 1647 #if defined(PETSC_HAVE_ELEMENTAL) 1648 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_elemental_C", MatConvert_MPIDense_Elemental)); 1649 #endif 1650 #if defined(PETSC_HAVE_SCALAPACK) 1651 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_scalapack_C", MatConvert_Dense_ScaLAPACK)); 1652 #endif 1653 #if defined(PETSC_HAVE_CUDA) 1654 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensecuda_C", MatConvert_MPIDense_MPIDenseCUDA)); 1655 #endif 1656 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIDenseSetPreallocation_C", MatMPIDenseSetPreallocation_MPIDense)); 1657 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense)); 1658 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaij_C", MatProductSetFromOptions_MPIDense_MPIAIJ)); 1659 #if defined(PETSC_HAVE_CUDA) 1660 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense)); 1661 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijcusparse_C", MatProductSetFromOptions_MPIDense_MPIAIJ)); 1662 #endif 1663 #if defined(PETSC_HAVE_HIP) 1664 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensehip_C", MatConvert_MPIDense_MPIDenseHIP)); 1665 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense)); 1666 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijhipsparse_C", MatProductSetFromOptions_MPIDense_MPIAIJ)); 1667 #endif 1668 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumn_C", MatDenseGetColumn_MPIDense)); 1669 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumn_C", MatDenseRestoreColumn_MPIDense)); 1670 PetscCall(PetscObjectChangeTypeName((PetscObject)mat, MATMPIDENSE)); 1671 PetscFunctionReturn(PETSC_SUCCESS); 1672 } 1673 1674 /*MC 1675 MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices. 1676 1677 This matrix type is identical to `MATSEQDENSE` when constructed with a single process communicator, 1678 and `MATMPIDENSE` otherwise. 1679 1680 Options Database Key: 1681 . -mat_type dense - sets the matrix type to `MATDENSE` during a call to `MatSetFromOptions()` 1682 1683 Level: beginner 1684 1685 .seealso: [](ch_matrices), `Mat`, `MATSEQDENSE`, `MATMPIDENSE`, `MATDENSECUDA`, `MATDENSEHIP` 1686 M*/ 1687 1688 /*@C 1689 MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries 1690 1691 Collective 1692 1693 Input Parameters: 1694 + B - the matrix 1695 - data - optional location of matrix data. Set to `NULL` for PETSc 1696 to control all matrix memory allocation. 1697 1698 Level: intermediate 1699 1700 Notes: 1701 The dense format is fully compatible with standard Fortran 1702 storage by columns. 1703 1704 The data input variable is intended primarily for Fortran programmers 1705 who wish to allocate their own matrix memory space. Most users should 1706 set `data` to `NULL`. 1707 1708 .seealso: [](ch_matrices), `Mat`, `MATMPIDENSE`, `MatCreate()`, `MatCreateSeqDense()`, `MatSetValues()` 1709 @*/ 1710 PetscErrorCode MatMPIDenseSetPreallocation(Mat B, PetscScalar *data) 1711 { 1712 PetscFunctionBegin; 1713 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 1714 PetscTryMethod(B, "MatMPIDenseSetPreallocation_C", (Mat, PetscScalar *), (B, data)); 1715 PetscFunctionReturn(PETSC_SUCCESS); 1716 } 1717 1718 /*@ 1719 MatDensePlaceArray - Allows one to replace the array in a `MATDENSE` matrix with an 1720 array provided by the user. This is useful to avoid copying an array 1721 into a matrix 1722 1723 Not Collective 1724 1725 Input Parameters: 1726 + mat - the matrix 1727 - array - the array in column major order 1728 1729 Level: developer 1730 1731 Note: 1732 You can return to the original array with a call to `MatDenseResetArray()`. The user is responsible for freeing this array; it will not be 1733 freed when the matrix is destroyed. 1734 1735 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseGetArray()`, `MatDenseResetArray()`, `VecPlaceArray()`, `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`, `VecResetArray()`, 1736 `MatDenseReplaceArray()` 1737 @*/ 1738 PetscErrorCode MatDensePlaceArray(Mat mat, const PetscScalar *array) 1739 { 1740 PetscFunctionBegin; 1741 PetscValidHeaderSpecific(mat, MAT_CLASSID, 1); 1742 PetscUseMethod(mat, "MatDensePlaceArray_C", (Mat, const PetscScalar *), (mat, array)); 1743 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 1744 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 1745 mat->offloadmask = PETSC_OFFLOAD_CPU; 1746 #endif 1747 PetscFunctionReturn(PETSC_SUCCESS); 1748 } 1749 1750 /*@ 1751 MatDenseResetArray - Resets the matrix array to that it previously had before the call to `MatDensePlaceArray()` 1752 1753 Not Collective 1754 1755 Input Parameter: 1756 . mat - the matrix 1757 1758 Level: developer 1759 1760 Note: 1761 You can only call this after a call to `MatDensePlaceArray()` 1762 1763 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseGetArray()`, `MatDensePlaceArray()`, `VecPlaceArray()`, `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`, `VecResetArray()` 1764 @*/ 1765 PetscErrorCode MatDenseResetArray(Mat mat) 1766 { 1767 PetscFunctionBegin; 1768 PetscValidHeaderSpecific(mat, MAT_CLASSID, 1); 1769 PetscUseMethod(mat, "MatDenseResetArray_C", (Mat), (mat)); 1770 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 1771 PetscFunctionReturn(PETSC_SUCCESS); 1772 } 1773 1774 /*@ 1775 MatDenseReplaceArray - Allows one to replace the array in a dense matrix with an 1776 array provided by the user. This is useful to avoid copying an array 1777 into a matrix 1778 1779 Not Collective 1780 1781 Input Parameters: 1782 + mat - the matrix 1783 - array - the array in column major order 1784 1785 Level: developer 1786 1787 Note: 1788 The memory passed in MUST be obtained with `PetscMalloc()` and CANNOT be 1789 freed by the user. It will be freed when the matrix is destroyed. 1790 1791 .seealso: [](ch_matrices), `Mat`, `MatDensePlaceArray()`, `MatDenseGetArray()`, `VecReplaceArray()` 1792 @*/ 1793 PetscErrorCode MatDenseReplaceArray(Mat mat, const PetscScalar *array) 1794 { 1795 PetscFunctionBegin; 1796 PetscValidHeaderSpecific(mat, MAT_CLASSID, 1); 1797 PetscUseMethod(mat, "MatDenseReplaceArray_C", (Mat, const PetscScalar *), (mat, array)); 1798 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 1799 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 1800 mat->offloadmask = PETSC_OFFLOAD_CPU; 1801 #endif 1802 PetscFunctionReturn(PETSC_SUCCESS); 1803 } 1804 1805 /*@C 1806 MatCreateDense - Creates a matrix in `MATDENSE` format. 1807 1808 Collective 1809 1810 Input Parameters: 1811 + comm - MPI communicator 1812 . m - number of local rows (or `PETSC_DECIDE` to have calculated if `M` is given) 1813 . n - number of local columns (or `PETSC_DECIDE` to have calculated if `N` is given) 1814 . M - number of global rows (or `PETSC_DECIDE` to have calculated if `m` is given) 1815 . N - number of global columns (or `PETSC_DECIDE` to have calculated if `n` is given) 1816 - data - optional location of matrix data. Set data to `NULL` (`PETSC_NULL_SCALAR` for Fortran users) for PETSc 1817 to control all matrix memory allocation. 1818 1819 Output Parameter: 1820 . A - the matrix 1821 1822 Level: intermediate 1823 1824 Notes: 1825 The dense format is fully compatible with standard Fortran 1826 storage by columns. 1827 1828 Although local portions of the matrix are stored in column-major 1829 order, the matrix is partitioned across MPI ranks by row. 1830 1831 The data input variable is intended primarily for Fortran programmers 1832 who wish to allocate their own matrix memory space. Most users should 1833 set `data` to `NULL` (`PETSC_NULL_SCALAR` for Fortran users). 1834 1835 The user MUST specify either the local or global matrix dimensions 1836 (possibly both). 1837 1838 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatCreate()`, `MatCreateSeqDense()`, `MatSetValues()` 1839 @*/ 1840 PetscErrorCode MatCreateDense(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscScalar *data, Mat *A) 1841 { 1842 PetscFunctionBegin; 1843 PetscCall(MatCreate(comm, A)); 1844 PetscCall(MatSetSizes(*A, m, n, M, N)); 1845 PetscCall(MatSetType(*A, MATDENSE)); 1846 PetscCall(MatSeqDenseSetPreallocation(*A, data)); 1847 PetscCall(MatMPIDenseSetPreallocation(*A, data)); 1848 PetscFunctionReturn(PETSC_SUCCESS); 1849 } 1850 1851 static PetscErrorCode MatDuplicate_MPIDense(Mat A, MatDuplicateOption cpvalues, Mat *newmat) 1852 { 1853 Mat mat; 1854 Mat_MPIDense *a, *oldmat = (Mat_MPIDense *)A->data; 1855 1856 PetscFunctionBegin; 1857 *newmat = NULL; 1858 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat)); 1859 PetscCall(MatSetSizes(mat, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 1860 PetscCall(MatSetType(mat, ((PetscObject)A)->type_name)); 1861 a = (Mat_MPIDense *)mat->data; 1862 1863 mat->factortype = A->factortype; 1864 mat->assembled = PETSC_TRUE; 1865 mat->preallocated = PETSC_TRUE; 1866 1867 mat->insertmode = NOT_SET_VALUES; 1868 a->donotstash = oldmat->donotstash; 1869 1870 PetscCall(PetscLayoutReference(A->rmap, &mat->rmap)); 1871 PetscCall(PetscLayoutReference(A->cmap, &mat->cmap)); 1872 1873 PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A)); 1874 1875 *newmat = mat; 1876 PetscFunctionReturn(PETSC_SUCCESS); 1877 } 1878 1879 static PetscErrorCode MatLoad_MPIDense(Mat newMat, PetscViewer viewer) 1880 { 1881 PetscBool isbinary; 1882 #if defined(PETSC_HAVE_HDF5) 1883 PetscBool ishdf5; 1884 #endif 1885 1886 PetscFunctionBegin; 1887 PetscValidHeaderSpecific(newMat, MAT_CLASSID, 1); 1888 PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2); 1889 /* force binary viewer to load .info file if it has not yet done so */ 1890 PetscCall(PetscViewerSetUp(viewer)); 1891 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 1892 #if defined(PETSC_HAVE_HDF5) 1893 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5)); 1894 #endif 1895 if (isbinary) { 1896 PetscCall(MatLoad_Dense_Binary(newMat, viewer)); 1897 #if defined(PETSC_HAVE_HDF5) 1898 } else if (ishdf5) { 1899 PetscCall(MatLoad_Dense_HDF5(newMat, viewer)); 1900 #endif 1901 } else SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name); 1902 PetscFunctionReturn(PETSC_SUCCESS); 1903 } 1904 1905 static PetscErrorCode MatEqual_MPIDense(Mat A, Mat B, PetscBool *flag) 1906 { 1907 Mat_MPIDense *matB = (Mat_MPIDense *)B->data, *matA = (Mat_MPIDense *)A->data; 1908 Mat a, b; 1909 1910 PetscFunctionBegin; 1911 a = matA->A; 1912 b = matB->A; 1913 PetscCall(MatEqual(a, b, flag)); 1914 PetscCall(MPIU_Allreduce(MPI_IN_PLACE, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A))); 1915 PetscFunctionReturn(PETSC_SUCCESS); 1916 } 1917 1918 static PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(void *data) 1919 { 1920 Mat_TransMatMultDense *atb = (Mat_TransMatMultDense *)data; 1921 1922 PetscFunctionBegin; 1923 PetscCall(PetscFree2(atb->sendbuf, atb->recvcounts)); 1924 PetscCall(MatDestroy(&atb->atb)); 1925 PetscCall(PetscFree(atb)); 1926 PetscFunctionReturn(PETSC_SUCCESS); 1927 } 1928 1929 static PetscErrorCode MatDestroy_MatMatTransMult_MPIDense_MPIDense(void *data) 1930 { 1931 Mat_MatTransMultDense *abt = (Mat_MatTransMultDense *)data; 1932 1933 PetscFunctionBegin; 1934 PetscCall(PetscFree2(abt->buf[0], abt->buf[1])); 1935 PetscCall(PetscFree2(abt->recvcounts, abt->recvdispls)); 1936 PetscCall(PetscFree(abt)); 1937 PetscFunctionReturn(PETSC_SUCCESS); 1938 } 1939 1940 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C) 1941 { 1942 Mat_MPIDense *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data; 1943 Mat_TransMatMultDense *atb; 1944 MPI_Comm comm; 1945 PetscMPIInt size, *recvcounts; 1946 PetscScalar *carray, *sendbuf; 1947 const PetscScalar *atbarray; 1948 PetscInt i, cN = C->cmap->N, proc, k, j, lda; 1949 const PetscInt *ranges; 1950 1951 PetscFunctionBegin; 1952 MatCheckProduct(C, 3); 1953 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 1954 atb = (Mat_TransMatMultDense *)C->product->data; 1955 recvcounts = atb->recvcounts; 1956 sendbuf = atb->sendbuf; 1957 1958 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 1959 PetscCallMPI(MPI_Comm_size(comm, &size)); 1960 1961 /* compute atbarray = aseq^T * bseq */ 1962 PetscCall(MatTransposeMatMult(a->A, b->A, atb->atb ? MAT_REUSE_MATRIX : MAT_INITIAL_MATRIX, PETSC_DEFAULT, &atb->atb)); 1963 1964 PetscCall(MatGetOwnershipRanges(C, &ranges)); 1965 1966 /* arrange atbarray into sendbuf */ 1967 PetscCall(MatDenseGetArrayRead(atb->atb, &atbarray)); 1968 PetscCall(MatDenseGetLDA(atb->atb, &lda)); 1969 for (proc = 0, k = 0; proc < size; proc++) { 1970 for (j = 0; j < cN; j++) { 1971 for (i = ranges[proc]; i < ranges[proc + 1]; i++) sendbuf[k++] = atbarray[i + j * lda]; 1972 } 1973 } 1974 PetscCall(MatDenseRestoreArrayRead(atb->atb, &atbarray)); 1975 1976 /* sum all atbarray to local values of C */ 1977 PetscCall(MatDenseGetArrayWrite(c->A, &carray)); 1978 PetscCallMPI(MPI_Reduce_scatter(sendbuf, carray, recvcounts, MPIU_SCALAR, MPIU_SUM, comm)); 1979 PetscCall(MatDenseRestoreArrayWrite(c->A, &carray)); 1980 PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY)); 1981 PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY)); 1982 PetscFunctionReturn(PETSC_SUCCESS); 1983 } 1984 1985 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C) 1986 { 1987 MPI_Comm comm; 1988 PetscMPIInt size; 1989 PetscInt cm = A->cmap->n, cM, cN = B->cmap->N; 1990 Mat_TransMatMultDense *atb; 1991 PetscBool cisdense = PETSC_FALSE; 1992 PetscInt i; 1993 const PetscInt *ranges; 1994 1995 PetscFunctionBegin; 1996 MatCheckProduct(C, 4); 1997 PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty"); 1998 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 1999 if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) { 2000 SETERRQ(comm, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend); 2001 } 2002 2003 /* create matrix product C */ 2004 PetscCall(MatSetSizes(C, cm, B->cmap->n, A->cmap->N, B->cmap->N)); 2005 #if defined(PETSC_HAVE_CUDA) 2006 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSECUDA, "")); 2007 #endif 2008 #if defined(PETSC_HAVE_HIP) 2009 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSEHIP, "")); 2010 #endif 2011 if (!cisdense) PetscCall(MatSetType(C, ((PetscObject)A)->type_name)); 2012 PetscCall(MatSetUp(C)); 2013 2014 /* create data structure for reuse C */ 2015 PetscCallMPI(MPI_Comm_size(comm, &size)); 2016 PetscCall(PetscNew(&atb)); 2017 cM = C->rmap->N; 2018 PetscCall(PetscMalloc2(cM * cN, &atb->sendbuf, size, &atb->recvcounts)); 2019 PetscCall(MatGetOwnershipRanges(C, &ranges)); 2020 for (i = 0; i < size; i++) atb->recvcounts[i] = (ranges[i + 1] - ranges[i]) * cN; 2021 2022 C->product->data = atb; 2023 C->product->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense; 2024 PetscFunctionReturn(PETSC_SUCCESS); 2025 } 2026 2027 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C) 2028 { 2029 MPI_Comm comm; 2030 PetscMPIInt i, size; 2031 PetscInt maxRows, bufsiz; 2032 PetscMPIInt tag; 2033 PetscInt alg; 2034 Mat_MatTransMultDense *abt; 2035 Mat_Product *product = C->product; 2036 PetscBool flg; 2037 2038 PetscFunctionBegin; 2039 MatCheckProduct(C, 4); 2040 PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty"); 2041 /* check local size of A and B */ 2042 PetscCheck(A->cmap->n == B->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local column dimensions are incompatible, A (%" PetscInt_FMT ") != B (%" PetscInt_FMT ")", A->cmap->n, B->cmap->n); 2043 2044 PetscCall(PetscStrcmp(product->alg, "allgatherv", &flg)); 2045 alg = flg ? 0 : 1; 2046 2047 /* setup matrix product C */ 2048 PetscCall(MatSetSizes(C, A->rmap->n, B->rmap->n, A->rmap->N, B->rmap->N)); 2049 PetscCall(MatSetType(C, MATMPIDENSE)); 2050 PetscCall(MatSetUp(C)); 2051 PetscCall(PetscObjectGetNewTag((PetscObject)C, &tag)); 2052 2053 /* create data structure for reuse C */ 2054 PetscCall(PetscObjectGetComm((PetscObject)C, &comm)); 2055 PetscCallMPI(MPI_Comm_size(comm, &size)); 2056 PetscCall(PetscNew(&abt)); 2057 abt->tag = tag; 2058 abt->alg = alg; 2059 switch (alg) { 2060 case 1: /* alg: "cyclic" */ 2061 for (maxRows = 0, i = 0; i < size; i++) maxRows = PetscMax(maxRows, (B->rmap->range[i + 1] - B->rmap->range[i])); 2062 bufsiz = A->cmap->N * maxRows; 2063 PetscCall(PetscMalloc2(bufsiz, &(abt->buf[0]), bufsiz, &(abt->buf[1]))); 2064 break; 2065 default: /* alg: "allgatherv" */ 2066 PetscCall(PetscMalloc2(B->rmap->n * B->cmap->N, &(abt->buf[0]), B->rmap->N * B->cmap->N, &(abt->buf[1]))); 2067 PetscCall(PetscMalloc2(size, &(abt->recvcounts), size + 1, &(abt->recvdispls))); 2068 for (i = 0; i <= size; i++) abt->recvdispls[i] = B->rmap->range[i] * A->cmap->N; 2069 for (i = 0; i < size; i++) abt->recvcounts[i] = abt->recvdispls[i + 1] - abt->recvdispls[i]; 2070 break; 2071 } 2072 2073 C->product->data = abt; 2074 C->product->destroy = MatDestroy_MatMatTransMult_MPIDense_MPIDense; 2075 C->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_MPIDense_MPIDense; 2076 PetscFunctionReturn(PETSC_SUCCESS); 2077 } 2078 2079 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(Mat A, Mat B, Mat C) 2080 { 2081 Mat_MPIDense *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data; 2082 Mat_MatTransMultDense *abt; 2083 MPI_Comm comm; 2084 PetscMPIInt rank, size, sendsiz, recvsiz, sendto, recvfrom, recvisfrom; 2085 PetscScalar *sendbuf, *recvbuf = NULL, *cv; 2086 PetscInt i, cK = A->cmap->N, k, j, bn; 2087 PetscScalar _DOne = 1.0, _DZero = 0.0; 2088 const PetscScalar *av, *bv; 2089 PetscBLASInt cm, cn, ck, alda, blda = 0, clda; 2090 MPI_Request reqs[2]; 2091 const PetscInt *ranges; 2092 2093 PetscFunctionBegin; 2094 MatCheckProduct(C, 3); 2095 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 2096 abt = (Mat_MatTransMultDense *)C->product->data; 2097 PetscCall(PetscObjectGetComm((PetscObject)C, &comm)); 2098 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 2099 PetscCallMPI(MPI_Comm_size(comm, &size)); 2100 PetscCall(MatDenseGetArrayRead(a->A, &av)); 2101 PetscCall(MatDenseGetArrayRead(b->A, &bv)); 2102 PetscCall(MatDenseGetArrayWrite(c->A, &cv)); 2103 PetscCall(MatDenseGetLDA(a->A, &i)); 2104 PetscCall(PetscBLASIntCast(i, &alda)); 2105 PetscCall(MatDenseGetLDA(b->A, &i)); 2106 PetscCall(PetscBLASIntCast(i, &blda)); 2107 PetscCall(MatDenseGetLDA(c->A, &i)); 2108 PetscCall(PetscBLASIntCast(i, &clda)); 2109 PetscCall(MatGetOwnershipRanges(B, &ranges)); 2110 bn = B->rmap->n; 2111 if (blda == bn) { 2112 sendbuf = (PetscScalar *)bv; 2113 } else { 2114 sendbuf = abt->buf[0]; 2115 for (k = 0, i = 0; i < cK; i++) { 2116 for (j = 0; j < bn; j++, k++) sendbuf[k] = bv[i * blda + j]; 2117 } 2118 } 2119 if (size > 1) { 2120 sendto = (rank + size - 1) % size; 2121 recvfrom = (rank + size + 1) % size; 2122 } else { 2123 sendto = recvfrom = 0; 2124 } 2125 PetscCall(PetscBLASIntCast(cK, &ck)); 2126 PetscCall(PetscBLASIntCast(c->A->rmap->n, &cm)); 2127 recvisfrom = rank; 2128 for (i = 0; i < size; i++) { 2129 /* we have finished receiving in sending, bufs can be read/modified */ 2130 PetscInt nextrecvisfrom = (recvisfrom + 1) % size; /* which process the next recvbuf will originate on */ 2131 PetscInt nextbn = ranges[nextrecvisfrom + 1] - ranges[nextrecvisfrom]; 2132 2133 if (nextrecvisfrom != rank) { 2134 /* start the cyclic sends from sendbuf, to recvbuf (which will switch to sendbuf) */ 2135 sendsiz = cK * bn; 2136 recvsiz = cK * nextbn; 2137 recvbuf = (i & 1) ? abt->buf[0] : abt->buf[1]; 2138 PetscCallMPI(MPI_Isend(sendbuf, sendsiz, MPIU_SCALAR, sendto, abt->tag, comm, &reqs[0])); 2139 PetscCallMPI(MPI_Irecv(recvbuf, recvsiz, MPIU_SCALAR, recvfrom, abt->tag, comm, &reqs[1])); 2140 } 2141 2142 /* local aseq * sendbuf^T */ 2143 PetscCall(PetscBLASIntCast(ranges[recvisfrom + 1] - ranges[recvisfrom], &cn)); 2144 if (cm && cn && ck) PetscCallBLAS("BLASgemm", BLASgemm_("N", "T", &cm, &cn, &ck, &_DOne, av, &alda, sendbuf, &cn, &_DZero, cv + clda * ranges[recvisfrom], &clda)); 2145 2146 if (nextrecvisfrom != rank) { 2147 /* wait for the sends and receives to complete, swap sendbuf and recvbuf */ 2148 PetscCallMPI(MPI_Waitall(2, reqs, MPI_STATUSES_IGNORE)); 2149 } 2150 bn = nextbn; 2151 recvisfrom = nextrecvisfrom; 2152 sendbuf = recvbuf; 2153 } 2154 PetscCall(MatDenseRestoreArrayRead(a->A, &av)); 2155 PetscCall(MatDenseRestoreArrayRead(b->A, &bv)); 2156 PetscCall(MatDenseRestoreArrayWrite(c->A, &cv)); 2157 PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY)); 2158 PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY)); 2159 PetscFunctionReturn(PETSC_SUCCESS); 2160 } 2161 2162 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(Mat A, Mat B, Mat C) 2163 { 2164 Mat_MPIDense *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data; 2165 Mat_MatTransMultDense *abt; 2166 MPI_Comm comm; 2167 PetscMPIInt size; 2168 PetscScalar *cv, *sendbuf, *recvbuf; 2169 const PetscScalar *av, *bv; 2170 PetscInt blda, i, cK = A->cmap->N, k, j, bn; 2171 PetscScalar _DOne = 1.0, _DZero = 0.0; 2172 PetscBLASInt cm, cn, ck, alda, clda; 2173 2174 PetscFunctionBegin; 2175 MatCheckProduct(C, 3); 2176 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 2177 abt = (Mat_MatTransMultDense *)C->product->data; 2178 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 2179 PetscCallMPI(MPI_Comm_size(comm, &size)); 2180 PetscCall(MatDenseGetArrayRead(a->A, &av)); 2181 PetscCall(MatDenseGetArrayRead(b->A, &bv)); 2182 PetscCall(MatDenseGetArrayWrite(c->A, &cv)); 2183 PetscCall(MatDenseGetLDA(a->A, &i)); 2184 PetscCall(PetscBLASIntCast(i, &alda)); 2185 PetscCall(MatDenseGetLDA(b->A, &blda)); 2186 PetscCall(MatDenseGetLDA(c->A, &i)); 2187 PetscCall(PetscBLASIntCast(i, &clda)); 2188 /* copy transpose of B into buf[0] */ 2189 bn = B->rmap->n; 2190 sendbuf = abt->buf[0]; 2191 recvbuf = abt->buf[1]; 2192 for (k = 0, j = 0; j < bn; j++) { 2193 for (i = 0; i < cK; i++, k++) sendbuf[k] = bv[i * blda + j]; 2194 } 2195 PetscCall(MatDenseRestoreArrayRead(b->A, &bv)); 2196 PetscCallMPI(MPI_Allgatherv(sendbuf, bn * cK, MPIU_SCALAR, recvbuf, abt->recvcounts, abt->recvdispls, MPIU_SCALAR, comm)); 2197 PetscCall(PetscBLASIntCast(cK, &ck)); 2198 PetscCall(PetscBLASIntCast(c->A->rmap->n, &cm)); 2199 PetscCall(PetscBLASIntCast(c->A->cmap->n, &cn)); 2200 if (cm && cn && ck) PetscCallBLAS("BLASgemm", BLASgemm_("N", "N", &cm, &cn, &ck, &_DOne, av, &alda, recvbuf, &ck, &_DZero, cv, &clda)); 2201 PetscCall(MatDenseRestoreArrayRead(a->A, &av)); 2202 PetscCall(MatDenseRestoreArrayRead(b->A, &bv)); 2203 PetscCall(MatDenseRestoreArrayWrite(c->A, &cv)); 2204 PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY)); 2205 PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY)); 2206 PetscFunctionReturn(PETSC_SUCCESS); 2207 } 2208 2209 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C) 2210 { 2211 Mat_MatTransMultDense *abt; 2212 2213 PetscFunctionBegin; 2214 MatCheckProduct(C, 3); 2215 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 2216 abt = (Mat_MatTransMultDense *)C->product->data; 2217 switch (abt->alg) { 2218 case 1: 2219 PetscCall(MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(A, B, C)); 2220 break; 2221 default: 2222 PetscCall(MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(A, B, C)); 2223 break; 2224 } 2225 PetscFunctionReturn(PETSC_SUCCESS); 2226 } 2227 2228 #if defined(PETSC_HAVE_ELEMENTAL) 2229 static PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(void *data) 2230 { 2231 Mat_MatMultDense *ab = (Mat_MatMultDense *)data; 2232 2233 PetscFunctionBegin; 2234 PetscCall(MatDestroy(&ab->Ce)); 2235 PetscCall(MatDestroy(&ab->Ae)); 2236 PetscCall(MatDestroy(&ab->Be)); 2237 PetscCall(PetscFree(ab)); 2238 PetscFunctionReturn(PETSC_SUCCESS); 2239 } 2240 2241 PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C) 2242 { 2243 Mat_MatMultDense *ab; 2244 2245 PetscFunctionBegin; 2246 MatCheckProduct(C, 3); 2247 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Missing product data"); 2248 ab = (Mat_MatMultDense *)C->product->data; 2249 PetscCall(MatConvert_MPIDense_Elemental(A, MATELEMENTAL, MAT_REUSE_MATRIX, &ab->Ae)); 2250 PetscCall(MatConvert_MPIDense_Elemental(B, MATELEMENTAL, MAT_REUSE_MATRIX, &ab->Be)); 2251 PetscCall(MatMatMultNumeric_Elemental(ab->Ae, ab->Be, ab->Ce)); 2252 PetscCall(MatConvert(ab->Ce, MATMPIDENSE, MAT_REUSE_MATRIX, &C)); 2253 PetscFunctionReturn(PETSC_SUCCESS); 2254 } 2255 2256 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C) 2257 { 2258 Mat Ae, Be, Ce; 2259 Mat_MatMultDense *ab; 2260 2261 PetscFunctionBegin; 2262 MatCheckProduct(C, 4); 2263 PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty"); 2264 /* check local size of A and B */ 2265 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 2266 SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend); 2267 } 2268 2269 /* create elemental matrices Ae and Be */ 2270 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &Ae)); 2271 PetscCall(MatSetSizes(Ae, PETSC_DECIDE, PETSC_DECIDE, A->rmap->N, A->cmap->N)); 2272 PetscCall(MatSetType(Ae, MATELEMENTAL)); 2273 PetscCall(MatSetUp(Ae)); 2274 PetscCall(MatSetOption(Ae, MAT_ROW_ORIENTED, PETSC_FALSE)); 2275 2276 PetscCall(MatCreate(PetscObjectComm((PetscObject)B), &Be)); 2277 PetscCall(MatSetSizes(Be, PETSC_DECIDE, PETSC_DECIDE, B->rmap->N, B->cmap->N)); 2278 PetscCall(MatSetType(Be, MATELEMENTAL)); 2279 PetscCall(MatSetUp(Be)); 2280 PetscCall(MatSetOption(Be, MAT_ROW_ORIENTED, PETSC_FALSE)); 2281 2282 /* compute symbolic Ce = Ae*Be */ 2283 PetscCall(MatCreate(PetscObjectComm((PetscObject)C), &Ce)); 2284 PetscCall(MatMatMultSymbolic_Elemental(Ae, Be, fill, Ce)); 2285 2286 /* setup C */ 2287 PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, PETSC_DECIDE, PETSC_DECIDE)); 2288 PetscCall(MatSetType(C, MATDENSE)); 2289 PetscCall(MatSetUp(C)); 2290 2291 /* create data structure for reuse Cdense */ 2292 PetscCall(PetscNew(&ab)); 2293 ab->Ae = Ae; 2294 ab->Be = Be; 2295 ab->Ce = Ce; 2296 2297 C->product->data = ab; 2298 C->product->destroy = MatDestroy_MatMatMult_MPIDense_MPIDense; 2299 C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense; 2300 PetscFunctionReturn(PETSC_SUCCESS); 2301 } 2302 #endif 2303 2304 #if defined(PETSC_HAVE_ELEMENTAL) 2305 static PetscErrorCode MatProductSetFromOptions_MPIDense_AB(Mat C) 2306 { 2307 PetscFunctionBegin; 2308 C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIDense; 2309 C->ops->productsymbolic = MatProductSymbolic_AB; 2310 PetscFunctionReturn(PETSC_SUCCESS); 2311 } 2312 #endif 2313 2314 static PetscErrorCode MatProductSetFromOptions_MPIDense_AtB(Mat C) 2315 { 2316 Mat_Product *product = C->product; 2317 Mat A = product->A, B = product->B; 2318 2319 PetscFunctionBegin; 2320 if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) 2321 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend); 2322 C->ops->transposematmultsymbolic = MatTransposeMatMultSymbolic_MPIDense_MPIDense; 2323 C->ops->productsymbolic = MatProductSymbolic_AtB; 2324 PetscFunctionReturn(PETSC_SUCCESS); 2325 } 2326 2327 static PetscErrorCode MatProductSetFromOptions_MPIDense_ABt(Mat C) 2328 { 2329 Mat_Product *product = C->product; 2330 const char *algTypes[2] = {"allgatherv", "cyclic"}; 2331 PetscInt alg, nalg = 2; 2332 PetscBool flg = PETSC_FALSE; 2333 2334 PetscFunctionBegin; 2335 /* Set default algorithm */ 2336 alg = 0; /* default is allgatherv */ 2337 PetscCall(PetscStrcmp(product->alg, "default", &flg)); 2338 if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg])); 2339 2340 /* Get runtime option */ 2341 if (product->api_user) { 2342 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatMatTransposeMult", "Mat"); 2343 PetscCall(PetscOptionsEList("-matmattransmult_mpidense_mpidense_via", "Algorithmic approach", "MatMatTransposeMult", algTypes, nalg, algTypes[alg], &alg, &flg)); 2344 PetscOptionsEnd(); 2345 } else { 2346 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_ABt", "Mat"); 2347 PetscCall(PetscOptionsEList("-mat_product_algorithm", "Algorithmic approach", "MatProduct_ABt", algTypes, nalg, algTypes[alg], &alg, &flg)); 2348 PetscOptionsEnd(); 2349 } 2350 if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg])); 2351 2352 C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_MPIDense_MPIDense; 2353 C->ops->productsymbolic = MatProductSymbolic_ABt; 2354 PetscFunctionReturn(PETSC_SUCCESS); 2355 } 2356 2357 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense(Mat C) 2358 { 2359 Mat_Product *product = C->product; 2360 2361 PetscFunctionBegin; 2362 switch (product->type) { 2363 #if defined(PETSC_HAVE_ELEMENTAL) 2364 case MATPRODUCT_AB: 2365 PetscCall(MatProductSetFromOptions_MPIDense_AB(C)); 2366 break; 2367 #endif 2368 case MATPRODUCT_AtB: 2369 PetscCall(MatProductSetFromOptions_MPIDense_AtB(C)); 2370 break; 2371 case MATPRODUCT_ABt: 2372 PetscCall(MatProductSetFromOptions_MPIDense_ABt(C)); 2373 break; 2374 default: 2375 break; 2376 } 2377 PetscFunctionReturn(PETSC_SUCCESS); 2378 } 2379