1 /* 2 Basic functions for basic parallel dense matrices. 3 Portions of this code are under: 4 Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. 5 */ 6 7 #include <../src/mat/impls/dense/mpi/mpidense.h> /*I "petscmat.h" I*/ 8 #include <../src/mat/impls/aij/mpi/mpiaij.h> 9 #include <petscblaslapack.h> 10 11 /*@ 12 MatDenseGetLocalMatrix - For a `MATMPIDENSE` or `MATSEQDENSE` matrix returns the sequential 13 matrix that represents the operator. For sequential matrices it returns itself. 14 15 Input Parameter: 16 . A - the sequential or MPI `MATDENSE` matrix 17 18 Output Parameter: 19 . B - the inner matrix 20 21 Level: intermediate 22 23 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MATMPIDENSE`, `MATSEQDENSE` 24 @*/ 25 PetscErrorCode MatDenseGetLocalMatrix(Mat A, Mat *B) 26 { 27 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 28 PetscBool flg; 29 30 PetscFunctionBegin; 31 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 32 PetscAssertPointer(B, 2); 33 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIDENSE, &flg)); 34 if (flg) *B = mat->A; 35 else { 36 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQDENSE, &flg)); 37 PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Not for matrix type %s", ((PetscObject)A)->type_name); 38 *B = A; 39 } 40 PetscFunctionReturn(PETSC_SUCCESS); 41 } 42 43 static PetscErrorCode MatCopy_MPIDense(Mat A, Mat B, MatStructure s) 44 { 45 Mat_MPIDense *Amat = (Mat_MPIDense *)A->data; 46 Mat_MPIDense *Bmat = (Mat_MPIDense *)B->data; 47 48 PetscFunctionBegin; 49 PetscCall(MatCopy(Amat->A, Bmat->A, s)); 50 PetscFunctionReturn(PETSC_SUCCESS); 51 } 52 53 PetscErrorCode MatShift_MPIDense(Mat A, PetscScalar alpha) 54 { 55 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 56 PetscInt j, lda, rstart = A->rmap->rstart, rend = A->rmap->rend, rend2; 57 PetscScalar *v; 58 59 PetscFunctionBegin; 60 PetscCall(MatDenseGetArray(mat->A, &v)); 61 PetscCall(MatDenseGetLDA(mat->A, &lda)); 62 rend2 = PetscMin(rend, A->cmap->N); 63 if (rend2 > rstart) { 64 for (j = rstart; j < rend2; j++) v[j - rstart + j * lda] += alpha; 65 PetscCall(PetscLogFlops(rend2 - rstart)); 66 } 67 PetscCall(MatDenseRestoreArray(mat->A, &v)); 68 PetscFunctionReturn(PETSC_SUCCESS); 69 } 70 71 static PetscErrorCode MatGetRow_MPIDense(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 72 { 73 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 74 PetscInt lrow, rstart = A->rmap->rstart, rend = A->rmap->rend; 75 76 PetscFunctionBegin; 77 PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "only local rows"); 78 lrow = row - rstart; 79 PetscCall(MatGetRow(mat->A, lrow, nz, (const PetscInt **)idx, (const PetscScalar **)v)); 80 PetscFunctionReturn(PETSC_SUCCESS); 81 } 82 83 static PetscErrorCode MatRestoreRow_MPIDense(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 84 { 85 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 86 PetscInt lrow, rstart = A->rmap->rstart, rend = A->rmap->rend; 87 88 PetscFunctionBegin; 89 PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "only local rows"); 90 lrow = row - rstart; 91 PetscCall(MatRestoreRow(mat->A, lrow, nz, (const PetscInt **)idx, (const PetscScalar **)v)); 92 PetscFunctionReturn(PETSC_SUCCESS); 93 } 94 95 static PetscErrorCode MatGetDiagonalBlock_MPIDense(Mat A, Mat *a) 96 { 97 Mat_MPIDense *mdn = (Mat_MPIDense *)A->data; 98 PetscInt m = A->rmap->n, rstart = A->rmap->rstart; 99 PetscScalar *array; 100 MPI_Comm comm; 101 PetscBool flg; 102 Mat B; 103 104 PetscFunctionBegin; 105 PetscCall(MatHasCongruentLayouts(A, &flg)); 106 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only square matrices supported."); 107 PetscCall(PetscObjectQuery((PetscObject)A, "DiagonalBlock", (PetscObject *)&B)); 108 if (!B) { /* This should use MatDenseGetSubMatrix (not create), but we would need a call like MatRestoreDiagonalBlock */ 109 #if PetscDefined(HAVE_CUDA) 110 PetscCall(PetscObjectTypeCompare((PetscObject)mdn->A, MATSEQDENSECUDA, &flg)); 111 PetscCheck(!flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature", MATSEQDENSECUDA); 112 #elif PetscDefined(HAVE_HIP) 113 PetscCall(PetscObjectTypeCompare((PetscObject)mdn->A, MATSEQDENSEHIP, &flg)); 114 PetscCheck(!flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature", MATSEQDENSEHIP); 115 #endif 116 PetscCall(PetscObjectGetComm((PetscObject)(mdn->A), &comm)); 117 PetscCall(MatCreate(comm, &B)); 118 PetscCall(MatSetSizes(B, m, m, m, m)); 119 PetscCall(MatSetType(B, ((PetscObject)mdn->A)->type_name)); 120 PetscCall(MatDenseGetArrayRead(mdn->A, (const PetscScalar **)&array)); 121 PetscCall(MatSeqDenseSetPreallocation(B, array + m * rstart)); 122 PetscCall(MatDenseRestoreArrayRead(mdn->A, (const PetscScalar **)&array)); 123 PetscCall(PetscObjectCompose((PetscObject)A, "DiagonalBlock", (PetscObject)B)); 124 *a = B; 125 PetscCall(MatDestroy(&B)); 126 } else *a = B; 127 PetscFunctionReturn(PETSC_SUCCESS); 128 } 129 130 static PetscErrorCode MatSetValues_MPIDense(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], const PetscScalar v[], InsertMode addv) 131 { 132 Mat_MPIDense *A = (Mat_MPIDense *)mat->data; 133 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend, row; 134 PetscBool roworiented = A->roworiented; 135 136 PetscFunctionBegin; 137 for (i = 0; i < m; i++) { 138 if (idxm[i] < 0) continue; 139 PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large"); 140 if (idxm[i] >= rstart && idxm[i] < rend) { 141 row = idxm[i] - rstart; 142 if (roworiented) { 143 PetscCall(MatSetValues(A->A, 1, &row, n, idxn, v + i * n, addv)); 144 } else { 145 for (j = 0; j < n; j++) { 146 if (idxn[j] < 0) continue; 147 PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large"); 148 PetscCall(MatSetValues(A->A, 1, &row, 1, &idxn[j], v + i + j * m, addv)); 149 } 150 } 151 } else if (!A->donotstash) { 152 mat->assembled = PETSC_FALSE; 153 if (roworiented) { 154 PetscCall(MatStashValuesRow_Private(&mat->stash, idxm[i], n, idxn, v + i * n, PETSC_FALSE)); 155 } else { 156 PetscCall(MatStashValuesCol_Private(&mat->stash, idxm[i], n, idxn, v + i, m, PETSC_FALSE)); 157 } 158 } 159 } 160 PetscFunctionReturn(PETSC_SUCCESS); 161 } 162 163 static PetscErrorCode MatGetValues_MPIDense(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[]) 164 { 165 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 166 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend, row; 167 168 PetscFunctionBegin; 169 for (i = 0; i < m; i++) { 170 if (idxm[i] < 0) continue; /* negative row */ 171 PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large"); 172 if (idxm[i] >= rstart && idxm[i] < rend) { 173 row = idxm[i] - rstart; 174 for (j = 0; j < n; j++) { 175 if (idxn[j] < 0) continue; /* negative column */ 176 PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large"); 177 PetscCall(MatGetValues(mdn->A, 1, &row, 1, &idxn[j], v + i * n + j)); 178 } 179 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported"); 180 } 181 PetscFunctionReturn(PETSC_SUCCESS); 182 } 183 184 static PetscErrorCode MatDenseGetLDA_MPIDense(Mat A, PetscInt *lda) 185 { 186 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 187 188 PetscFunctionBegin; 189 PetscCall(MatDenseGetLDA(a->A, lda)); 190 PetscFunctionReturn(PETSC_SUCCESS); 191 } 192 193 static PetscErrorCode MatDenseSetLDA_MPIDense(Mat A, PetscInt lda) 194 { 195 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 196 MatType mtype = MATSEQDENSE; 197 198 PetscFunctionBegin; 199 if (!a->A) { 200 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 201 PetscCall(PetscLayoutSetUp(A->rmap)); 202 PetscCall(PetscLayoutSetUp(A->cmap)); 203 PetscCall(MatCreate(PETSC_COMM_SELF, &a->A)); 204 PetscCall(MatSetSizes(a->A, A->rmap->n, A->cmap->N, A->rmap->n, A->cmap->N)); 205 #if PetscDefined(HAVE_CUDA) 206 PetscBool iscuda; 207 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIDENSECUDA, &iscuda)); 208 if (iscuda) mtype = MATSEQDENSECUDA; 209 #elif PetscDefined(HAVE_HIP) 210 PetscBool iship; 211 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIDENSEHIP, &iship)); 212 if (iship) mtype = MATSEQDENSEHIP; 213 #endif 214 PetscCall(MatSetType(a->A, mtype)); 215 } 216 PetscCall(MatDenseSetLDA(a->A, lda)); 217 PetscFunctionReturn(PETSC_SUCCESS); 218 } 219 220 static PetscErrorCode MatDenseGetArray_MPIDense(Mat A, PetscScalar **array) 221 { 222 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 223 224 PetscFunctionBegin; 225 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 226 PetscCall(MatDenseGetArray(a->A, array)); 227 PetscFunctionReturn(PETSC_SUCCESS); 228 } 229 230 static PetscErrorCode MatDenseGetArrayRead_MPIDense(Mat A, const PetscScalar **array) 231 { 232 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 233 234 PetscFunctionBegin; 235 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 236 PetscCall(MatDenseGetArrayRead(a->A, array)); 237 PetscFunctionReturn(PETSC_SUCCESS); 238 } 239 240 static PetscErrorCode MatDenseGetArrayWrite_MPIDense(Mat A, PetscScalar **array) 241 { 242 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 243 244 PetscFunctionBegin; 245 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 246 PetscCall(MatDenseGetArrayWrite(a->A, array)); 247 PetscFunctionReturn(PETSC_SUCCESS); 248 } 249 250 static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A, const PetscScalar *array) 251 { 252 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 253 254 PetscFunctionBegin; 255 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 256 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 257 PetscCall(MatDensePlaceArray(a->A, array)); 258 PetscFunctionReturn(PETSC_SUCCESS); 259 } 260 261 static PetscErrorCode MatDenseResetArray_MPIDense(Mat A) 262 { 263 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 264 265 PetscFunctionBegin; 266 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 267 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 268 PetscCall(MatDenseResetArray(a->A)); 269 PetscFunctionReturn(PETSC_SUCCESS); 270 } 271 272 static PetscErrorCode MatDenseReplaceArray_MPIDense(Mat A, const PetscScalar *array) 273 { 274 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 275 276 PetscFunctionBegin; 277 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 278 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 279 PetscCall(MatDenseReplaceArray(a->A, array)); 280 PetscFunctionReturn(PETSC_SUCCESS); 281 } 282 283 static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A, IS isrow, IS iscol, MatReuse scall, Mat *B) 284 { 285 Mat_MPIDense *mat = (Mat_MPIDense *)A->data, *newmatd; 286 PetscInt lda, i, j, rstart, rend, nrows, ncols, Ncols, nlrows, nlcols; 287 const PetscInt *irow, *icol; 288 const PetscScalar *v; 289 PetscScalar *bv; 290 Mat newmat; 291 IS iscol_local; 292 MPI_Comm comm_is, comm_mat; 293 294 PetscFunctionBegin; 295 PetscCall(PetscObjectGetComm((PetscObject)A, &comm_mat)); 296 PetscCall(PetscObjectGetComm((PetscObject)iscol, &comm_is)); 297 PetscCheck(comm_mat == comm_is, PETSC_COMM_SELF, PETSC_ERR_ARG_NOTSAMECOMM, "IS communicator must match matrix communicator"); 298 299 PetscCall(ISAllGather(iscol, &iscol_local)); 300 PetscCall(ISGetIndices(isrow, &irow)); 301 PetscCall(ISGetIndices(iscol_local, &icol)); 302 PetscCall(ISGetLocalSize(isrow, &nrows)); 303 PetscCall(ISGetLocalSize(iscol, &ncols)); 304 PetscCall(ISGetSize(iscol, &Ncols)); /* global number of columns, size of iscol_local */ 305 306 /* No parallel redistribution currently supported! Should really check each index set 307 to confirm that it is OK. ... Currently supports only submatrix same partitioning as 308 original matrix! */ 309 310 PetscCall(MatGetLocalSize(A, &nlrows, &nlcols)); 311 PetscCall(MatGetOwnershipRange(A, &rstart, &rend)); 312 313 /* Check submatrix call */ 314 if (scall == MAT_REUSE_MATRIX) { 315 /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */ 316 /* Really need to test rows and column sizes! */ 317 newmat = *B; 318 } else { 319 /* Create and fill new matrix */ 320 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &newmat)); 321 PetscCall(MatSetSizes(newmat, nrows, ncols, PETSC_DECIDE, Ncols)); 322 PetscCall(MatSetType(newmat, ((PetscObject)A)->type_name)); 323 PetscCall(MatMPIDenseSetPreallocation(newmat, NULL)); 324 } 325 326 /* Now extract the data pointers and do the copy, column at a time */ 327 newmatd = (Mat_MPIDense *)newmat->data; 328 PetscCall(MatDenseGetArray(newmatd->A, &bv)); 329 PetscCall(MatDenseGetArrayRead(mat->A, &v)); 330 PetscCall(MatDenseGetLDA(mat->A, &lda)); 331 for (i = 0; i < Ncols; i++) { 332 const PetscScalar *av = v + lda * icol[i]; 333 for (j = 0; j < nrows; j++) *bv++ = av[irow[j] - rstart]; 334 } 335 PetscCall(MatDenseRestoreArrayRead(mat->A, &v)); 336 PetscCall(MatDenseRestoreArray(newmatd->A, &bv)); 337 338 /* Assemble the matrices so that the correct flags are set */ 339 PetscCall(MatAssemblyBegin(newmat, MAT_FINAL_ASSEMBLY)); 340 PetscCall(MatAssemblyEnd(newmat, MAT_FINAL_ASSEMBLY)); 341 342 /* Free work space */ 343 PetscCall(ISRestoreIndices(isrow, &irow)); 344 PetscCall(ISRestoreIndices(iscol_local, &icol)); 345 PetscCall(ISDestroy(&iscol_local)); 346 *B = newmat; 347 PetscFunctionReturn(PETSC_SUCCESS); 348 } 349 350 static PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A, PetscScalar **array) 351 { 352 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 353 354 PetscFunctionBegin; 355 PetscCall(MatDenseRestoreArray(a->A, array)); 356 PetscFunctionReturn(PETSC_SUCCESS); 357 } 358 359 static PetscErrorCode MatDenseRestoreArrayRead_MPIDense(Mat A, const PetscScalar **array) 360 { 361 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 362 363 PetscFunctionBegin; 364 PetscCall(MatDenseRestoreArrayRead(a->A, array)); 365 PetscFunctionReturn(PETSC_SUCCESS); 366 } 367 368 static PetscErrorCode MatDenseRestoreArrayWrite_MPIDense(Mat A, PetscScalar **array) 369 { 370 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 371 372 PetscFunctionBegin; 373 PetscCall(MatDenseRestoreArrayWrite(a->A, array)); 374 PetscFunctionReturn(PETSC_SUCCESS); 375 } 376 377 static PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat, MatAssemblyType mode) 378 { 379 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 380 PetscInt nstash, reallocs; 381 382 PetscFunctionBegin; 383 if (mdn->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS); 384 385 PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range)); 386 PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs)); 387 PetscCall(PetscInfo(mdn->A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 388 PetscFunctionReturn(PETSC_SUCCESS); 389 } 390 391 static PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat, MatAssemblyType mode) 392 { 393 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 394 PetscInt i, *row, *col, flg, j, rstart, ncols; 395 PetscMPIInt n; 396 PetscScalar *val; 397 398 PetscFunctionBegin; 399 if (!mdn->donotstash && !mat->nooffprocentries) { 400 /* wait on receives */ 401 while (1) { 402 PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg)); 403 if (!flg) break; 404 405 for (i = 0; i < n;) { 406 /* Now identify the consecutive vals belonging to the same row */ 407 for (j = i, rstart = row[j]; j < n; j++) { 408 if (row[j] != rstart) break; 409 } 410 if (j < n) ncols = j - i; 411 else ncols = n - i; 412 /* Now assemble all these values with a single function call */ 413 PetscCall(MatSetValues_MPIDense(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode)); 414 i = j; 415 } 416 } 417 PetscCall(MatStashScatterEnd_Private(&mat->stash)); 418 } 419 420 PetscCall(MatAssemblyBegin(mdn->A, mode)); 421 PetscCall(MatAssemblyEnd(mdn->A, mode)); 422 PetscFunctionReturn(PETSC_SUCCESS); 423 } 424 425 static PetscErrorCode MatZeroEntries_MPIDense(Mat A) 426 { 427 Mat_MPIDense *l = (Mat_MPIDense *)A->data; 428 429 PetscFunctionBegin; 430 PetscCall(MatZeroEntries(l->A)); 431 PetscFunctionReturn(PETSC_SUCCESS); 432 } 433 434 static PetscErrorCode MatZeroRows_MPIDense(Mat A, PetscInt n, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 435 { 436 Mat_MPIDense *l = (Mat_MPIDense *)A->data; 437 PetscInt i, len, *lrows; 438 439 PetscFunctionBegin; 440 /* get locally owned rows */ 441 PetscCall(PetscLayoutMapLocal(A->rmap, n, rows, &len, &lrows, NULL)); 442 /* fix right hand side if needed */ 443 if (x && b) { 444 const PetscScalar *xx; 445 PetscScalar *bb; 446 447 PetscCall(VecGetArrayRead(x, &xx)); 448 PetscCall(VecGetArrayWrite(b, &bb)); 449 for (i = 0; i < len; ++i) bb[lrows[i]] = diag * xx[lrows[i]]; 450 PetscCall(VecRestoreArrayRead(x, &xx)); 451 PetscCall(VecRestoreArrayWrite(b, &bb)); 452 } 453 PetscCall(MatZeroRows(l->A, len, lrows, 0.0, NULL, NULL)); 454 if (diag != 0.0) { 455 Vec d; 456 457 PetscCall(MatCreateVecs(A, NULL, &d)); 458 PetscCall(VecSet(d, diag)); 459 PetscCall(MatDiagonalSet(A, d, INSERT_VALUES)); 460 PetscCall(VecDestroy(&d)); 461 } 462 PetscCall(PetscFree(lrows)); 463 PetscFunctionReturn(PETSC_SUCCESS); 464 } 465 466 PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat, Vec, Vec); 467 PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat, Vec, Vec, Vec); 468 PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat, Vec, Vec); 469 PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat, Vec, Vec, Vec); 470 471 static PetscErrorCode MatMult_MPIDense(Mat mat, Vec xx, Vec yy) 472 { 473 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 474 const PetscScalar *ax; 475 PetscScalar *ay; 476 PetscMemType axmtype, aymtype; 477 478 PetscFunctionBegin; 479 if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat)); 480 PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype)); 481 PetscCall(VecGetArrayAndMemType(mdn->lvec, &ay, &aymtype)); 482 PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE)); 483 PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE)); 484 PetscCall(VecRestoreArrayAndMemType(mdn->lvec, &ay)); 485 PetscCall(VecRestoreArrayReadAndMemType(xx, &ax)); 486 PetscCall((*mdn->A->ops->mult)(mdn->A, mdn->lvec, yy)); 487 PetscFunctionReturn(PETSC_SUCCESS); 488 } 489 490 static PetscErrorCode MatMultAdd_MPIDense(Mat mat, Vec xx, Vec yy, Vec zz) 491 { 492 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 493 const PetscScalar *ax; 494 PetscScalar *ay; 495 PetscMemType axmtype, aymtype; 496 497 PetscFunctionBegin; 498 if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat)); 499 PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype)); 500 PetscCall(VecGetArrayAndMemType(mdn->lvec, &ay, &aymtype)); 501 PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE)); 502 PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE)); 503 PetscCall(VecRestoreArrayAndMemType(mdn->lvec, &ay)); 504 PetscCall(VecRestoreArrayReadAndMemType(xx, &ax)); 505 PetscCall((*mdn->A->ops->multadd)(mdn->A, mdn->lvec, yy, zz)); 506 PetscFunctionReturn(PETSC_SUCCESS); 507 } 508 509 static PetscErrorCode MatMultTranspose_MPIDense(Mat A, Vec xx, Vec yy) 510 { 511 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 512 const PetscScalar *ax; 513 PetscScalar *ay; 514 PetscMemType axmtype, aymtype; 515 516 PetscFunctionBegin; 517 if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A)); 518 PetscCall(VecSet(yy, 0.0)); 519 PetscCall((*a->A->ops->multtranspose)(a->A, xx, a->lvec)); 520 PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype)); 521 PetscCall(VecGetArrayAndMemType(yy, &ay, &aymtype)); 522 PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM)); 523 PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM)); 524 PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax)); 525 PetscCall(VecRestoreArrayAndMemType(yy, &ay)); 526 PetscFunctionReturn(PETSC_SUCCESS); 527 } 528 529 static PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A, Vec xx, Vec yy, Vec zz) 530 { 531 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 532 const PetscScalar *ax; 533 PetscScalar *ay; 534 PetscMemType axmtype, aymtype; 535 536 PetscFunctionBegin; 537 if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A)); 538 PetscCall(VecCopy(yy, zz)); 539 PetscCall((*a->A->ops->multtranspose)(a->A, xx, a->lvec)); 540 PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype)); 541 PetscCall(VecGetArrayAndMemType(zz, &ay, &aymtype)); 542 PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM)); 543 PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM)); 544 PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax)); 545 PetscCall(VecRestoreArrayAndMemType(zz, &ay)); 546 PetscFunctionReturn(PETSC_SUCCESS); 547 } 548 549 PetscErrorCode MatGetDiagonal_MPIDense(Mat A, Vec v) 550 { 551 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 552 PetscInt lda, len, i, nl, ng, m = A->rmap->n, radd; 553 PetscScalar *x; 554 const PetscScalar *av; 555 556 PetscFunctionBegin; 557 PetscCall(VecGetArray(v, &x)); 558 PetscCall(VecGetSize(v, &ng)); 559 PetscCheck(ng == A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming mat and vec"); 560 PetscCall(VecGetLocalSize(v, &nl)); 561 len = PetscMin(a->A->rmap->n, a->A->cmap->n); 562 radd = A->rmap->rstart * m; 563 PetscCall(MatDenseGetArrayRead(a->A, &av)); 564 PetscCall(MatDenseGetLDA(a->A, &lda)); 565 for (i = 0; i < len; i++) x[i] = av[radd + i * lda + i]; 566 PetscCall(MatDenseRestoreArrayRead(a->A, &av)); 567 PetscCall(PetscArrayzero(x + i, nl - i)); 568 PetscCall(VecRestoreArray(v, &x)); 569 PetscFunctionReturn(PETSC_SUCCESS); 570 } 571 572 static PetscErrorCode MatDestroy_MPIDense(Mat mat) 573 { 574 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 575 576 PetscFunctionBegin; 577 PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N)); 578 PetscCall(MatStashDestroy_Private(&mat->stash)); 579 PetscCheck(!mdn->vecinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 580 PetscCheck(!mdn->matinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 581 PetscCall(MatDestroy(&mdn->A)); 582 PetscCall(VecDestroy(&mdn->lvec)); 583 PetscCall(PetscSFDestroy(&mdn->Mvctx)); 584 PetscCall(VecDestroy(&mdn->cvec)); 585 PetscCall(MatDestroy(&mdn->cmat)); 586 587 PetscCall(PetscFree(mat->data)); 588 PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL)); 589 590 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetLDA_C", NULL)); 591 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseSetLDA_C", NULL)); 592 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArray_C", NULL)); 593 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArray_C", NULL)); 594 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayRead_C", NULL)); 595 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayRead_C", NULL)); 596 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayWrite_C", NULL)); 597 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayWrite_C", NULL)); 598 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDensePlaceArray_C", NULL)); 599 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseResetArray_C", NULL)); 600 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseReplaceArray_C", NULL)); 601 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", NULL)); 602 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpiaij_C", NULL)); 603 #if defined(PETSC_HAVE_ELEMENTAL) 604 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_elemental_C", NULL)); 605 #endif 606 #if defined(PETSC_HAVE_SCALAPACK) 607 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_scalapack_C", NULL)); 608 #endif 609 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIDenseSetPreallocation_C", NULL)); 610 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidense_C", NULL)); 611 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaij_C", NULL)); 612 #if defined(PETSC_HAVE_CUDA) 613 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidense_C", NULL)); 614 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijcusparse_C", NULL)); 615 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensecuda_C", NULL)); 616 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidensecuda_mpidense_C", NULL)); 617 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidensecuda_C", NULL)); 618 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C", NULL)); 619 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensecuda_mpiaij_C", NULL)); 620 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C", NULL)); 621 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArray_C", NULL)); 622 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArrayRead_C", NULL)); 623 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArrayWrite_C", NULL)); 624 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArray_C", NULL)); 625 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArrayRead_C", NULL)); 626 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArrayWrite_C", NULL)); 627 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAPlaceArray_C", NULL)); 628 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAResetArray_C", NULL)); 629 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAReplaceArray_C", NULL)); 630 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDASetPreallocation_C", NULL)); 631 #endif 632 #if defined(PETSC_HAVE_HIP) 633 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidense_C", NULL)); 634 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijhipsparse_C", NULL)); 635 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensehip_C", NULL)); 636 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidensehip_mpidense_C", NULL)); 637 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidensehip_C", NULL)); 638 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidensehip_C", NULL)); 639 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensehip_mpiaij_C", NULL)); 640 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensehip_mpiaijhipsparse_C", NULL)); 641 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArray_C", NULL)); 642 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArrayRead_C", NULL)); 643 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArrayWrite_C", NULL)); 644 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArray_C", NULL)); 645 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArrayRead_C", NULL)); 646 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArrayWrite_C", NULL)); 647 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPPlaceArray_C", NULL)); 648 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPResetArray_C", NULL)); 649 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPReplaceArray_C", NULL)); 650 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPSetPreallocation_C", NULL)); 651 #endif 652 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumn_C", NULL)); 653 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumn_C", NULL)); 654 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", NULL)); 655 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", NULL)); 656 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", NULL)); 657 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", NULL)); 658 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", NULL)); 659 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", NULL)); 660 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetSubMatrix_C", NULL)); 661 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreSubMatrix_C", NULL)); 662 663 PetscCall(PetscObjectCompose((PetscObject)mat, "DiagonalBlock", NULL)); 664 PetscFunctionReturn(PETSC_SUCCESS); 665 } 666 667 PETSC_INTERN PetscErrorCode MatView_SeqDense(Mat, PetscViewer); 668 669 #include <petscdraw.h> 670 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat, PetscViewer viewer) 671 { 672 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 673 PetscMPIInt rank; 674 PetscViewerType vtype; 675 PetscBool iascii, isdraw; 676 PetscViewer sviewer; 677 PetscViewerFormat format; 678 679 PetscFunctionBegin; 680 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 681 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 682 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 683 if (iascii) { 684 PetscCall(PetscViewerGetType(viewer, &vtype)); 685 PetscCall(PetscViewerGetFormat(viewer, &format)); 686 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 687 MatInfo info; 688 PetscCall(MatGetInfo(mat, MAT_LOCAL, &info)); 689 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 690 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %" PetscInt_FMT " \n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, 691 (PetscInt)info.memory)); 692 PetscCall(PetscViewerFlush(viewer)); 693 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 694 if (mdn->Mvctx) PetscCall(PetscSFView(mdn->Mvctx, viewer)); 695 PetscFunctionReturn(PETSC_SUCCESS); 696 } else if (format == PETSC_VIEWER_ASCII_INFO) { 697 PetscFunctionReturn(PETSC_SUCCESS); 698 } 699 } else if (isdraw) { 700 PetscDraw draw; 701 PetscBool isnull; 702 703 PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw)); 704 PetscCall(PetscDrawIsNull(draw, &isnull)); 705 if (isnull) PetscFunctionReturn(PETSC_SUCCESS); 706 } 707 708 { 709 /* assemble the entire matrix onto first processor. */ 710 Mat A; 711 PetscInt M = mat->rmap->N, N = mat->cmap->N, m, row, i, nz; 712 PetscInt *cols; 713 PetscScalar *vals; 714 715 PetscCall(MatCreate(PetscObjectComm((PetscObject)mat), &A)); 716 if (rank == 0) { 717 PetscCall(MatSetSizes(A, M, N, M, N)); 718 } else { 719 PetscCall(MatSetSizes(A, 0, 0, M, N)); 720 } 721 /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */ 722 PetscCall(MatSetType(A, MATMPIDENSE)); 723 PetscCall(MatMPIDenseSetPreallocation(A, NULL)); 724 725 /* Copy the matrix ... This isn't the most efficient means, 726 but it's quick for now */ 727 A->insertmode = INSERT_VALUES; 728 729 row = mat->rmap->rstart; 730 m = mdn->A->rmap->n; 731 for (i = 0; i < m; i++) { 732 PetscCall(MatGetRow_MPIDense(mat, row, &nz, &cols, &vals)); 733 PetscCall(MatSetValues_MPIDense(A, 1, &row, nz, cols, vals, INSERT_VALUES)); 734 PetscCall(MatRestoreRow_MPIDense(mat, row, &nz, &cols, &vals)); 735 row++; 736 } 737 738 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 739 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 740 PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 741 if (rank == 0) { 742 PetscCall(PetscObjectSetName((PetscObject)((Mat_MPIDense *)(A->data))->A, ((PetscObject)mat)->name)); 743 PetscCall(MatView_SeqDense(((Mat_MPIDense *)(A->data))->A, sviewer)); 744 } 745 PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 746 PetscCall(PetscViewerFlush(viewer)); 747 PetscCall(MatDestroy(&A)); 748 } 749 PetscFunctionReturn(PETSC_SUCCESS); 750 } 751 752 static PetscErrorCode MatView_MPIDense(Mat mat, PetscViewer viewer) 753 { 754 PetscBool iascii, isbinary, isdraw, issocket; 755 756 PetscFunctionBegin; 757 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 758 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 759 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket)); 760 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 761 762 if (iascii || issocket || isdraw) { 763 PetscCall(MatView_MPIDense_ASCIIorDraworSocket(mat, viewer)); 764 } else if (isbinary) PetscCall(MatView_Dense_Binary(mat, viewer)); 765 PetscFunctionReturn(PETSC_SUCCESS); 766 } 767 768 static PetscErrorCode MatGetInfo_MPIDense(Mat A, MatInfoType flag, MatInfo *info) 769 { 770 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 771 Mat mdn = mat->A; 772 PetscLogDouble isend[5], irecv[5]; 773 774 PetscFunctionBegin; 775 info->block_size = 1.0; 776 777 PetscCall(MatGetInfo(mdn, MAT_LOCAL, info)); 778 779 isend[0] = info->nz_used; 780 isend[1] = info->nz_allocated; 781 isend[2] = info->nz_unneeded; 782 isend[3] = info->memory; 783 isend[4] = info->mallocs; 784 if (flag == MAT_LOCAL) { 785 info->nz_used = isend[0]; 786 info->nz_allocated = isend[1]; 787 info->nz_unneeded = isend[2]; 788 info->memory = isend[3]; 789 info->mallocs = isend[4]; 790 } else if (flag == MAT_GLOBAL_MAX) { 791 PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)A))); 792 793 info->nz_used = irecv[0]; 794 info->nz_allocated = irecv[1]; 795 info->nz_unneeded = irecv[2]; 796 info->memory = irecv[3]; 797 info->mallocs = irecv[4]; 798 } else if (flag == MAT_GLOBAL_SUM) { 799 PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)A))); 800 801 info->nz_used = irecv[0]; 802 info->nz_allocated = irecv[1]; 803 info->nz_unneeded = irecv[2]; 804 info->memory = irecv[3]; 805 info->mallocs = irecv[4]; 806 } 807 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 808 info->fill_ratio_needed = 0; 809 info->factor_mallocs = 0; 810 PetscFunctionReturn(PETSC_SUCCESS); 811 } 812 813 static PetscErrorCode MatSetOption_MPIDense(Mat A, MatOption op, PetscBool flg) 814 { 815 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 816 817 PetscFunctionBegin; 818 switch (op) { 819 case MAT_NEW_NONZERO_LOCATIONS: 820 case MAT_NEW_NONZERO_LOCATION_ERR: 821 case MAT_NEW_NONZERO_ALLOCATION_ERR: 822 MatCheckPreallocated(A, 1); 823 PetscCall(MatSetOption(a->A, op, flg)); 824 break; 825 case MAT_ROW_ORIENTED: 826 MatCheckPreallocated(A, 1); 827 a->roworiented = flg; 828 PetscCall(MatSetOption(a->A, op, flg)); 829 break; 830 case MAT_FORCE_DIAGONAL_ENTRIES: 831 case MAT_KEEP_NONZERO_PATTERN: 832 case MAT_USE_HASH_TABLE: 833 case MAT_SORTED_FULL: 834 PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op])); 835 break; 836 case MAT_IGNORE_OFF_PROC_ENTRIES: 837 a->donotstash = flg; 838 break; 839 case MAT_SYMMETRIC: 840 case MAT_STRUCTURALLY_SYMMETRIC: 841 case MAT_HERMITIAN: 842 case MAT_SYMMETRY_ETERNAL: 843 case MAT_STRUCTURAL_SYMMETRY_ETERNAL: 844 case MAT_SPD: 845 case MAT_IGNORE_LOWER_TRIANGULAR: 846 case MAT_IGNORE_ZERO_ENTRIES: 847 case MAT_SPD_ETERNAL: 848 /* if the diagonal matrix is square it inherits some of the properties above */ 849 PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op])); 850 break; 851 default: 852 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %s", MatOptions[op]); 853 } 854 PetscFunctionReturn(PETSC_SUCCESS); 855 } 856 857 static PetscErrorCode MatDiagonalScale_MPIDense(Mat A, Vec ll, Vec rr) 858 { 859 Mat_MPIDense *mdn = (Mat_MPIDense *)A->data; 860 const PetscScalar *l; 861 PetscScalar x, *v, *vv, *r; 862 PetscInt i, j, s2a, s3a, s2, s3, m = mdn->A->rmap->n, n = mdn->A->cmap->n, lda; 863 864 PetscFunctionBegin; 865 PetscCall(MatDenseGetArray(mdn->A, &vv)); 866 PetscCall(MatDenseGetLDA(mdn->A, &lda)); 867 PetscCall(MatGetLocalSize(A, &s2, &s3)); 868 if (ll) { 869 PetscCall(VecGetLocalSize(ll, &s2a)); 870 PetscCheck(s2a == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Left scaling vector non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT, s2a, s2); 871 PetscCall(VecGetArrayRead(ll, &l)); 872 for (i = 0; i < m; i++) { 873 x = l[i]; 874 v = vv + i; 875 for (j = 0; j < n; j++) { 876 (*v) *= x; 877 v += lda; 878 } 879 } 880 PetscCall(VecRestoreArrayRead(ll, &l)); 881 PetscCall(PetscLogFlops(1.0 * n * m)); 882 } 883 if (rr) { 884 const PetscScalar *ar; 885 886 PetscCall(VecGetLocalSize(rr, &s3a)); 887 PetscCheck(s3a == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Right scaling vec non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT ".", s3a, s3); 888 PetscCall(VecGetArrayRead(rr, &ar)); 889 if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A)); 890 PetscCall(VecGetArray(mdn->lvec, &r)); 891 PetscCall(PetscSFBcastBegin(mdn->Mvctx, MPIU_SCALAR, ar, r, MPI_REPLACE)); 892 PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ar, r, MPI_REPLACE)); 893 PetscCall(VecRestoreArrayRead(rr, &ar)); 894 for (i = 0; i < n; i++) { 895 x = r[i]; 896 v = vv + i * lda; 897 for (j = 0; j < m; j++) (*v++) *= x; 898 } 899 PetscCall(VecRestoreArray(mdn->lvec, &r)); 900 PetscCall(PetscLogFlops(1.0 * n * m)); 901 } 902 PetscCall(MatDenseRestoreArray(mdn->A, &vv)); 903 PetscFunctionReturn(PETSC_SUCCESS); 904 } 905 906 static PetscErrorCode MatNorm_MPIDense(Mat A, NormType type, PetscReal *nrm) 907 { 908 Mat_MPIDense *mdn = (Mat_MPIDense *)A->data; 909 PetscInt i, j; 910 PetscMPIInt size; 911 PetscReal sum = 0.0; 912 const PetscScalar *av, *v; 913 914 PetscFunctionBegin; 915 PetscCall(MatDenseGetArrayRead(mdn->A, &av)); 916 v = av; 917 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 918 if (size == 1) { 919 PetscCall(MatNorm(mdn->A, type, nrm)); 920 } else { 921 if (type == NORM_FROBENIUS) { 922 for (i = 0; i < mdn->A->cmap->n * mdn->A->rmap->n; i++) { 923 sum += PetscRealPart(PetscConj(*v) * (*v)); 924 v++; 925 } 926 PetscCall(MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A))); 927 *nrm = PetscSqrtReal(*nrm); 928 PetscCall(PetscLogFlops(2.0 * mdn->A->cmap->n * mdn->A->rmap->n)); 929 } else if (type == NORM_1) { 930 PetscReal *tmp, *tmp2; 931 PetscCall(PetscCalloc2(A->cmap->N, &tmp, A->cmap->N, &tmp2)); 932 *nrm = 0.0; 933 v = av; 934 for (j = 0; j < mdn->A->cmap->n; j++) { 935 for (i = 0; i < mdn->A->rmap->n; i++) { 936 tmp[j] += PetscAbsScalar(*v); 937 v++; 938 } 939 } 940 PetscCall(MPIU_Allreduce(tmp, tmp2, A->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A))); 941 for (j = 0; j < A->cmap->N; j++) { 942 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 943 } 944 PetscCall(PetscFree2(tmp, tmp2)); 945 PetscCall(PetscLogFlops(A->cmap->n * A->rmap->n)); 946 } else if (type == NORM_INFINITY) { /* max row norm */ 947 PetscReal ntemp; 948 PetscCall(MatNorm(mdn->A, type, &ntemp)); 949 PetscCall(MPIU_Allreduce(&ntemp, nrm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A))); 950 } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "No support for two norm"); 951 } 952 PetscCall(MatDenseRestoreArrayRead(mdn->A, &av)); 953 PetscFunctionReturn(PETSC_SUCCESS); 954 } 955 956 static PetscErrorCode MatTranspose_MPIDense(Mat A, MatReuse reuse, Mat *matout) 957 { 958 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 959 Mat B; 960 PetscInt M = A->rmap->N, N = A->cmap->N, m, n, *rwork, rstart = A->rmap->rstart; 961 PetscInt j, i, lda; 962 PetscScalar *v; 963 964 PetscFunctionBegin; 965 if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout)); 966 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) { 967 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 968 PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M)); 969 PetscCall(MatSetType(B, ((PetscObject)A)->type_name)); 970 PetscCall(MatMPIDenseSetPreallocation(B, NULL)); 971 } else B = *matout; 972 973 m = a->A->rmap->n; 974 n = a->A->cmap->n; 975 PetscCall(MatDenseGetArrayRead(a->A, (const PetscScalar **)&v)); 976 PetscCall(MatDenseGetLDA(a->A, &lda)); 977 PetscCall(PetscMalloc1(m, &rwork)); 978 for (i = 0; i < m; i++) rwork[i] = rstart + i; 979 for (j = 0; j < n; j++) { 980 PetscCall(MatSetValues(B, 1, &j, m, rwork, v, INSERT_VALUES)); 981 v += lda; 982 } 983 PetscCall(MatDenseRestoreArrayRead(a->A, (const PetscScalar **)&v)); 984 PetscCall(PetscFree(rwork)); 985 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 986 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 987 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 988 *matout = B; 989 } else { 990 PetscCall(MatHeaderMerge(A, &B)); 991 } 992 PetscFunctionReturn(PETSC_SUCCESS); 993 } 994 995 static PetscErrorCode MatDuplicate_MPIDense(Mat, MatDuplicateOption, Mat *); 996 PETSC_INTERN PetscErrorCode MatScale_MPIDense(Mat, PetscScalar); 997 998 static PetscErrorCode MatSetUp_MPIDense(Mat A) 999 { 1000 PetscFunctionBegin; 1001 PetscCall(PetscLayoutSetUp(A->rmap)); 1002 PetscCall(PetscLayoutSetUp(A->cmap)); 1003 if (!A->preallocated) PetscCall(MatMPIDenseSetPreallocation(A, NULL)); 1004 PetscFunctionReturn(PETSC_SUCCESS); 1005 } 1006 1007 static PetscErrorCode MatAXPY_MPIDense(Mat Y, PetscScalar alpha, Mat X, MatStructure str) 1008 { 1009 Mat_MPIDense *A = (Mat_MPIDense *)Y->data, *B = (Mat_MPIDense *)X->data; 1010 1011 PetscFunctionBegin; 1012 PetscCall(MatAXPY(A->A, alpha, B->A, str)); 1013 PetscFunctionReturn(PETSC_SUCCESS); 1014 } 1015 1016 static PetscErrorCode MatConjugate_MPIDense(Mat mat) 1017 { 1018 Mat_MPIDense *a = (Mat_MPIDense *)mat->data; 1019 1020 PetscFunctionBegin; 1021 PetscCall(MatConjugate(a->A)); 1022 PetscFunctionReturn(PETSC_SUCCESS); 1023 } 1024 1025 static PetscErrorCode MatRealPart_MPIDense(Mat A) 1026 { 1027 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1028 1029 PetscFunctionBegin; 1030 PetscCall(MatRealPart(a->A)); 1031 PetscFunctionReturn(PETSC_SUCCESS); 1032 } 1033 1034 static PetscErrorCode MatImaginaryPart_MPIDense(Mat A) 1035 { 1036 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1037 1038 PetscFunctionBegin; 1039 PetscCall(MatImaginaryPart(a->A)); 1040 PetscFunctionReturn(PETSC_SUCCESS); 1041 } 1042 1043 static PetscErrorCode MatGetColumnVector_MPIDense(Mat A, Vec v, PetscInt col) 1044 { 1045 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1046 1047 PetscFunctionBegin; 1048 PetscCheck(a->A, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Missing local matrix"); 1049 PetscCheck(a->A->ops->getcolumnvector, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Missing get column operation"); 1050 PetscCall((*a->A->ops->getcolumnvector)(a->A, v, col)); 1051 PetscFunctionReturn(PETSC_SUCCESS); 1052 } 1053 1054 PETSC_INTERN PetscErrorCode MatGetColumnReductions_SeqDense(Mat, PetscInt, PetscReal *); 1055 1056 static PetscErrorCode MatGetColumnReductions_MPIDense(Mat A, PetscInt type, PetscReal *reductions) 1057 { 1058 PetscInt i, m, n; 1059 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1060 PetscReal *work; 1061 1062 PetscFunctionBegin; 1063 PetscCall(MatGetSize(A, &m, &n)); 1064 PetscCall(PetscMalloc1(n, &work)); 1065 if (type == REDUCTION_MEAN_REALPART) { 1066 PetscCall(MatGetColumnReductions_SeqDense(a->A, (PetscInt)REDUCTION_SUM_REALPART, work)); 1067 } else if (type == REDUCTION_MEAN_IMAGINARYPART) { 1068 PetscCall(MatGetColumnReductions_SeqDense(a->A, (PetscInt)REDUCTION_SUM_IMAGINARYPART, work)); 1069 } else { 1070 PetscCall(MatGetColumnReductions_SeqDense(a->A, type, work)); 1071 } 1072 if (type == NORM_2) { 1073 for (i = 0; i < n; i++) work[i] *= work[i]; 1074 } 1075 if (type == NORM_INFINITY) { 1076 PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_MAX, A->hdr.comm)); 1077 } else { 1078 PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_SUM, A->hdr.comm)); 1079 } 1080 PetscCall(PetscFree(work)); 1081 if (type == NORM_2) { 1082 for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]); 1083 } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 1084 for (i = 0; i < n; i++) reductions[i] /= m; 1085 } 1086 PetscFunctionReturn(PETSC_SUCCESS); 1087 } 1088 1089 static PetscErrorCode MatSetRandom_MPIDense(Mat x, PetscRandom rctx) 1090 { 1091 Mat_MPIDense *d = (Mat_MPIDense *)x->data; 1092 1093 PetscFunctionBegin; 1094 PetscCall(MatSetRandom(d->A, rctx)); 1095 #if defined(PETSC_HAVE_DEVICE) 1096 x->offloadmask = d->A->offloadmask; 1097 #endif 1098 PetscFunctionReturn(PETSC_SUCCESS); 1099 } 1100 1101 static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A, PetscBool *missing, PetscInt *d) 1102 { 1103 PetscFunctionBegin; 1104 *missing = PETSC_FALSE; 1105 PetscFunctionReturn(PETSC_SUCCESS); 1106 } 1107 1108 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat, Mat, PetscReal, Mat); 1109 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat, Mat, Mat); 1110 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat, Mat, PetscReal, Mat); 1111 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat, Mat, Mat); 1112 static PetscErrorCode MatEqual_MPIDense(Mat, Mat, PetscBool *); 1113 static PetscErrorCode MatLoad_MPIDense(Mat, PetscViewer); 1114 1115 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense, 1116 MatGetRow_MPIDense, 1117 MatRestoreRow_MPIDense, 1118 MatMult_MPIDense, 1119 /* 4*/ MatMultAdd_MPIDense, 1120 MatMultTranspose_MPIDense, 1121 MatMultTransposeAdd_MPIDense, 1122 NULL, 1123 NULL, 1124 NULL, 1125 /* 10*/ NULL, 1126 NULL, 1127 NULL, 1128 NULL, 1129 MatTranspose_MPIDense, 1130 /* 15*/ MatGetInfo_MPIDense, 1131 MatEqual_MPIDense, 1132 MatGetDiagonal_MPIDense, 1133 MatDiagonalScale_MPIDense, 1134 MatNorm_MPIDense, 1135 /* 20*/ MatAssemblyBegin_MPIDense, 1136 MatAssemblyEnd_MPIDense, 1137 MatSetOption_MPIDense, 1138 MatZeroEntries_MPIDense, 1139 /* 24*/ MatZeroRows_MPIDense, 1140 NULL, 1141 NULL, 1142 NULL, 1143 NULL, 1144 /* 29*/ MatSetUp_MPIDense, 1145 NULL, 1146 NULL, 1147 MatGetDiagonalBlock_MPIDense, 1148 NULL, 1149 /* 34*/ MatDuplicate_MPIDense, 1150 NULL, 1151 NULL, 1152 NULL, 1153 NULL, 1154 /* 39*/ MatAXPY_MPIDense, 1155 MatCreateSubMatrices_MPIDense, 1156 NULL, 1157 MatGetValues_MPIDense, 1158 MatCopy_MPIDense, 1159 /* 44*/ NULL, 1160 MatScale_MPIDense, 1161 MatShift_MPIDense, 1162 NULL, 1163 NULL, 1164 /* 49*/ MatSetRandom_MPIDense, 1165 NULL, 1166 NULL, 1167 NULL, 1168 NULL, 1169 /* 54*/ NULL, 1170 NULL, 1171 NULL, 1172 NULL, 1173 NULL, 1174 /* 59*/ MatCreateSubMatrix_MPIDense, 1175 MatDestroy_MPIDense, 1176 MatView_MPIDense, 1177 NULL, 1178 NULL, 1179 /* 64*/ NULL, 1180 NULL, 1181 NULL, 1182 NULL, 1183 NULL, 1184 /* 69*/ NULL, 1185 NULL, 1186 NULL, 1187 NULL, 1188 NULL, 1189 /* 74*/ NULL, 1190 NULL, 1191 NULL, 1192 NULL, 1193 NULL, 1194 /* 79*/ NULL, 1195 NULL, 1196 NULL, 1197 NULL, 1198 /* 83*/ MatLoad_MPIDense, 1199 NULL, 1200 NULL, 1201 NULL, 1202 NULL, 1203 NULL, 1204 /* 89*/ NULL, 1205 NULL, 1206 NULL, 1207 NULL, 1208 NULL, 1209 /* 94*/ NULL, 1210 NULL, 1211 MatMatTransposeMultSymbolic_MPIDense_MPIDense, 1212 MatMatTransposeMultNumeric_MPIDense_MPIDense, 1213 NULL, 1214 /* 99*/ MatProductSetFromOptions_MPIDense, 1215 NULL, 1216 NULL, 1217 MatConjugate_MPIDense, 1218 NULL, 1219 /*104*/ NULL, 1220 MatRealPart_MPIDense, 1221 MatImaginaryPart_MPIDense, 1222 NULL, 1223 NULL, 1224 /*109*/ NULL, 1225 NULL, 1226 NULL, 1227 MatGetColumnVector_MPIDense, 1228 MatMissingDiagonal_MPIDense, 1229 /*114*/ NULL, 1230 NULL, 1231 NULL, 1232 NULL, 1233 NULL, 1234 /*119*/ NULL, 1235 NULL, 1236 NULL, 1237 NULL, 1238 NULL, 1239 /*124*/ NULL, 1240 MatGetColumnReductions_MPIDense, 1241 NULL, 1242 NULL, 1243 NULL, 1244 /*129*/ NULL, 1245 NULL, 1246 MatTransposeMatMultSymbolic_MPIDense_MPIDense, 1247 MatTransposeMatMultNumeric_MPIDense_MPIDense, 1248 NULL, 1249 /*134*/ NULL, 1250 NULL, 1251 NULL, 1252 NULL, 1253 NULL, 1254 /*139*/ NULL, 1255 NULL, 1256 NULL, 1257 NULL, 1258 NULL, 1259 MatCreateMPIMatConcatenateSeqMat_MPIDense, 1260 /*145*/ NULL, 1261 NULL, 1262 NULL, 1263 NULL, 1264 NULL, 1265 /*150*/ NULL, 1266 NULL}; 1267 1268 static PetscErrorCode MatMPIDenseSetPreallocation_MPIDense(Mat mat, PetscScalar *data) 1269 { 1270 Mat_MPIDense *a = (Mat_MPIDense *)mat->data; 1271 MatType mtype = MATSEQDENSE; 1272 1273 PetscFunctionBegin; 1274 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1275 PetscCall(PetscLayoutSetUp(mat->rmap)); 1276 PetscCall(PetscLayoutSetUp(mat->cmap)); 1277 if (!a->A) { 1278 PetscCall(MatCreate(PETSC_COMM_SELF, &a->A)); 1279 PetscCall(MatSetSizes(a->A, mat->rmap->n, mat->cmap->N, mat->rmap->n, mat->cmap->N)); 1280 } 1281 #if defined(PETSC_HAVE_CUDA) 1282 PetscBool iscuda; 1283 PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPIDENSECUDA, &iscuda)); 1284 if (iscuda) mtype = MATSEQDENSECUDA; 1285 #endif 1286 #if defined(PETSC_HAVE_HIP) 1287 PetscBool iship; 1288 PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPIDENSEHIP, &iship)); 1289 if (iship) mtype = MATSEQDENSEHIP; 1290 #endif 1291 PetscCall(MatSetType(a->A, mtype)); 1292 PetscCall(MatSeqDenseSetPreallocation(a->A, data)); 1293 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 1294 mat->offloadmask = a->A->offloadmask; 1295 #endif 1296 mat->preallocated = PETSC_TRUE; 1297 mat->assembled = PETSC_TRUE; 1298 PetscFunctionReturn(PETSC_SUCCESS); 1299 } 1300 1301 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIDense(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 1302 { 1303 Mat B, C; 1304 1305 PetscFunctionBegin; 1306 PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &C)); 1307 PetscCall(MatConvert_SeqAIJ_SeqDense(C, MATSEQDENSE, MAT_INITIAL_MATRIX, &B)); 1308 PetscCall(MatDestroy(&C)); 1309 if (reuse == MAT_REUSE_MATRIX) { 1310 C = *newmat; 1311 } else C = NULL; 1312 PetscCall(MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A), B, A->cmap->n, !C ? MAT_INITIAL_MATRIX : MAT_REUSE_MATRIX, &C)); 1313 PetscCall(MatDestroy(&B)); 1314 if (reuse == MAT_INPLACE_MATRIX) { 1315 PetscCall(MatHeaderReplace(A, &C)); 1316 } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C; 1317 PetscFunctionReturn(PETSC_SUCCESS); 1318 } 1319 1320 static PetscErrorCode MatConvert_MPIDense_MPIAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 1321 { 1322 Mat B, C; 1323 1324 PetscFunctionBegin; 1325 PetscCall(MatDenseGetLocalMatrix(A, &C)); 1326 PetscCall(MatConvert_SeqDense_SeqAIJ(C, MATSEQAIJ, MAT_INITIAL_MATRIX, &B)); 1327 if (reuse == MAT_REUSE_MATRIX) { 1328 C = *newmat; 1329 } else C = NULL; 1330 PetscCall(MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A), B, A->cmap->n, !C ? MAT_INITIAL_MATRIX : MAT_REUSE_MATRIX, &C)); 1331 PetscCall(MatDestroy(&B)); 1332 if (reuse == MAT_INPLACE_MATRIX) { 1333 PetscCall(MatHeaderReplace(A, &C)); 1334 } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C; 1335 PetscFunctionReturn(PETSC_SUCCESS); 1336 } 1337 1338 #if defined(PETSC_HAVE_ELEMENTAL) 1339 PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 1340 { 1341 Mat mat_elemental; 1342 PetscScalar *v; 1343 PetscInt m = A->rmap->n, N = A->cmap->N, rstart = A->rmap->rstart, i, *rows, *cols; 1344 1345 PetscFunctionBegin; 1346 if (reuse == MAT_REUSE_MATRIX) { 1347 mat_elemental = *newmat; 1348 PetscCall(MatZeroEntries(*newmat)); 1349 } else { 1350 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental)); 1351 PetscCall(MatSetSizes(mat_elemental, PETSC_DECIDE, PETSC_DECIDE, A->rmap->N, A->cmap->N)); 1352 PetscCall(MatSetType(mat_elemental, MATELEMENTAL)); 1353 PetscCall(MatSetUp(mat_elemental)); 1354 PetscCall(MatSetOption(mat_elemental, MAT_ROW_ORIENTED, PETSC_FALSE)); 1355 } 1356 1357 PetscCall(PetscMalloc2(m, &rows, N, &cols)); 1358 for (i = 0; i < N; i++) cols[i] = i; 1359 for (i = 0; i < m; i++) rows[i] = rstart + i; 1360 1361 /* PETSc-Elemental interface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */ 1362 PetscCall(MatDenseGetArray(A, &v)); 1363 PetscCall(MatSetValues(mat_elemental, m, rows, N, cols, v, ADD_VALUES)); 1364 PetscCall(MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY)); 1365 PetscCall(MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY)); 1366 PetscCall(MatDenseRestoreArray(A, &v)); 1367 PetscCall(PetscFree2(rows, cols)); 1368 1369 if (reuse == MAT_INPLACE_MATRIX) { 1370 PetscCall(MatHeaderReplace(A, &mat_elemental)); 1371 } else { 1372 *newmat = mat_elemental; 1373 } 1374 PetscFunctionReturn(PETSC_SUCCESS); 1375 } 1376 #endif 1377 1378 static PetscErrorCode MatDenseGetColumn_MPIDense(Mat A, PetscInt col, PetscScalar **vals) 1379 { 1380 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 1381 1382 PetscFunctionBegin; 1383 PetscCall(MatDenseGetColumn(mat->A, col, vals)); 1384 PetscFunctionReturn(PETSC_SUCCESS); 1385 } 1386 1387 static PetscErrorCode MatDenseRestoreColumn_MPIDense(Mat A, PetscScalar **vals) 1388 { 1389 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 1390 1391 PetscFunctionBegin; 1392 PetscCall(MatDenseRestoreColumn(mat->A, vals)); 1393 PetscFunctionReturn(PETSC_SUCCESS); 1394 } 1395 1396 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIDense(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat) 1397 { 1398 Mat_MPIDense *mat; 1399 PetscInt m, nloc, N; 1400 1401 PetscFunctionBegin; 1402 PetscCall(MatGetSize(inmat, &m, &N)); 1403 PetscCall(MatGetLocalSize(inmat, NULL, &nloc)); 1404 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 1405 PetscInt sum; 1406 1407 if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnership(comm, &n, &N)); 1408 /* Check sum(n) = N */ 1409 PetscCall(MPIU_Allreduce(&n, &sum, 1, MPIU_INT, MPI_SUM, comm)); 1410 PetscCheck(sum == N, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT, sum, N); 1411 1412 PetscCall(MatCreateDense(comm, m, n, PETSC_DETERMINE, N, NULL, outmat)); 1413 PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 1414 } 1415 1416 /* numeric phase */ 1417 mat = (Mat_MPIDense *)(*outmat)->data; 1418 PetscCall(MatCopy(inmat, mat->A, SAME_NONZERO_PATTERN)); 1419 PetscFunctionReturn(PETSC_SUCCESS); 1420 } 1421 1422 PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat A, PetscInt col, Vec *v) 1423 { 1424 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1425 PetscInt lda; 1426 1427 PetscFunctionBegin; 1428 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1429 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1430 if (!a->cvec) PetscCall(MatDenseCreateColumnVec_Private(A, &a->cvec)); 1431 a->vecinuse = col + 1; 1432 PetscCall(MatDenseGetLDA(a->A, &lda)); 1433 PetscCall(MatDenseGetArray(a->A, (PetscScalar **)&a->ptrinuse)); 1434 PetscCall(VecPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda)); 1435 *v = a->cvec; 1436 PetscFunctionReturn(PETSC_SUCCESS); 1437 } 1438 1439 PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat A, PetscInt col, Vec *v) 1440 { 1441 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1442 1443 PetscFunctionBegin; 1444 PetscCheck(a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first"); 1445 PetscCheck(a->cvec, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing internal column vector"); 1446 a->vecinuse = 0; 1447 PetscCall(MatDenseRestoreArray(a->A, (PetscScalar **)&a->ptrinuse)); 1448 PetscCall(VecResetArray(a->cvec)); 1449 if (v) *v = NULL; 1450 PetscFunctionReturn(PETSC_SUCCESS); 1451 } 1452 1453 PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat A, PetscInt col, Vec *v) 1454 { 1455 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1456 PetscInt lda; 1457 1458 PetscFunctionBegin; 1459 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1460 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1461 if (!a->cvec) PetscCall(MatDenseCreateColumnVec_Private(A, &a->cvec)); 1462 a->vecinuse = col + 1; 1463 PetscCall(MatDenseGetLDA(a->A, &lda)); 1464 PetscCall(MatDenseGetArrayRead(a->A, &a->ptrinuse)); 1465 PetscCall(VecPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda)); 1466 PetscCall(VecLockReadPush(a->cvec)); 1467 *v = a->cvec; 1468 PetscFunctionReturn(PETSC_SUCCESS); 1469 } 1470 1471 PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat A, PetscInt col, Vec *v) 1472 { 1473 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1474 1475 PetscFunctionBegin; 1476 PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first"); 1477 PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector"); 1478 a->vecinuse = 0; 1479 PetscCall(MatDenseRestoreArrayRead(a->A, &a->ptrinuse)); 1480 PetscCall(VecLockReadPop(a->cvec)); 1481 PetscCall(VecResetArray(a->cvec)); 1482 if (v) *v = NULL; 1483 PetscFunctionReturn(PETSC_SUCCESS); 1484 } 1485 1486 PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat A, PetscInt col, Vec *v) 1487 { 1488 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1489 PetscInt lda; 1490 1491 PetscFunctionBegin; 1492 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1493 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1494 if (!a->cvec) PetscCall(MatDenseCreateColumnVec_Private(A, &a->cvec)); 1495 a->vecinuse = col + 1; 1496 PetscCall(MatDenseGetLDA(a->A, &lda)); 1497 PetscCall(MatDenseGetArrayWrite(a->A, (PetscScalar **)&a->ptrinuse)); 1498 PetscCall(VecPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda)); 1499 *v = a->cvec; 1500 PetscFunctionReturn(PETSC_SUCCESS); 1501 } 1502 1503 PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat A, PetscInt col, Vec *v) 1504 { 1505 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1506 1507 PetscFunctionBegin; 1508 PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first"); 1509 PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector"); 1510 a->vecinuse = 0; 1511 PetscCall(MatDenseRestoreArrayWrite(a->A, (PetscScalar **)&a->ptrinuse)); 1512 PetscCall(VecResetArray(a->cvec)); 1513 if (v) *v = NULL; 1514 PetscFunctionReturn(PETSC_SUCCESS); 1515 } 1516 1517 static PetscErrorCode MatDenseGetSubMatrix_MPIDense(Mat A, PetscInt rbegin, PetscInt rend, PetscInt cbegin, PetscInt cend, Mat *v) 1518 { 1519 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1520 Mat_MPIDense *c; 1521 MPI_Comm comm; 1522 PetscInt pbegin, pend; 1523 1524 PetscFunctionBegin; 1525 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 1526 PetscCheck(!a->vecinuse, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1527 PetscCheck(!a->matinuse, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1528 pbegin = PetscMax(0, PetscMin(A->rmap->rend, rbegin) - A->rmap->rstart); 1529 pend = PetscMin(A->rmap->n, PetscMax(0, rend - A->rmap->rstart)); 1530 if (!a->cmat) { 1531 PetscCall(MatCreate(comm, &a->cmat)); 1532 PetscCall(MatSetType(a->cmat, ((PetscObject)A)->type_name)); 1533 if (rend - rbegin == A->rmap->N) PetscCall(PetscLayoutReference(A->rmap, &a->cmat->rmap)); 1534 else { 1535 PetscCall(PetscLayoutSetLocalSize(a->cmat->rmap, pend - pbegin)); 1536 PetscCall(PetscLayoutSetSize(a->cmat->rmap, rend - rbegin)); 1537 PetscCall(PetscLayoutSetUp(a->cmat->rmap)); 1538 } 1539 PetscCall(PetscLayoutSetSize(a->cmat->cmap, cend - cbegin)); 1540 PetscCall(PetscLayoutSetUp(a->cmat->cmap)); 1541 } else { 1542 PetscBool same = (PetscBool)(rend - rbegin == a->cmat->rmap->N); 1543 if (same && a->cmat->rmap->N != A->rmap->N) { 1544 same = (PetscBool)(pend - pbegin == a->cmat->rmap->n); 1545 PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &same, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A))); 1546 } 1547 if (!same) { 1548 PetscCall(PetscLayoutDestroy(&a->cmat->rmap)); 1549 PetscCall(PetscLayoutCreate(comm, &a->cmat->rmap)); 1550 PetscCall(PetscLayoutSetLocalSize(a->cmat->rmap, pend - pbegin)); 1551 PetscCall(PetscLayoutSetSize(a->cmat->rmap, rend - rbegin)); 1552 PetscCall(PetscLayoutSetUp(a->cmat->rmap)); 1553 } 1554 if (cend - cbegin != a->cmat->cmap->N) { 1555 PetscCall(PetscLayoutDestroy(&a->cmat->cmap)); 1556 PetscCall(PetscLayoutCreate(comm, &a->cmat->cmap)); 1557 PetscCall(PetscLayoutSetSize(a->cmat->cmap, cend - cbegin)); 1558 PetscCall(PetscLayoutSetUp(a->cmat->cmap)); 1559 } 1560 } 1561 c = (Mat_MPIDense *)a->cmat->data; 1562 PetscCheck(!c->A, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1563 PetscCall(MatDenseGetSubMatrix(a->A, pbegin, pend, cbegin, cend, &c->A)); 1564 1565 a->cmat->preallocated = PETSC_TRUE; 1566 a->cmat->assembled = PETSC_TRUE; 1567 #if defined(PETSC_HAVE_DEVICE) 1568 a->cmat->offloadmask = c->A->offloadmask; 1569 #endif 1570 a->matinuse = cbegin + 1; 1571 *v = a->cmat; 1572 PetscFunctionReturn(PETSC_SUCCESS); 1573 } 1574 1575 static PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat A, Mat *v) 1576 { 1577 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1578 Mat_MPIDense *c; 1579 1580 PetscFunctionBegin; 1581 PetscCheck(a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetSubMatrix() first"); 1582 PetscCheck(a->cmat, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal matrix"); 1583 PetscCheck(*v == a->cmat, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Not the matrix obtained from MatDenseGetSubMatrix()"); 1584 a->matinuse = 0; 1585 c = (Mat_MPIDense *)a->cmat->data; 1586 PetscCall(MatDenseRestoreSubMatrix(a->A, &c->A)); 1587 if (v) *v = NULL; 1588 #if defined(PETSC_HAVE_DEVICE) 1589 A->offloadmask = a->A->offloadmask; 1590 #endif 1591 PetscFunctionReturn(PETSC_SUCCESS); 1592 } 1593 1594 /*MC 1595 MATMPIDENSE - MATMPIDENSE = "mpidense" - A matrix type to be used for distributed dense matrices. 1596 1597 Options Database Key: 1598 . -mat_type mpidense - sets the matrix type to `MATMPIDENSE` during a call to `MatSetFromOptions()` 1599 1600 Level: beginner 1601 1602 .seealso: [](ch_matrices), `Mat`, `MatCreateDense()`, `MATSEQDENSE`, `MATDENSE` 1603 M*/ 1604 PetscErrorCode MatCreate_MPIDense(Mat mat) 1605 { 1606 Mat_MPIDense *a; 1607 1608 PetscFunctionBegin; 1609 PetscCall(PetscNew(&a)); 1610 mat->data = (void *)a; 1611 mat->ops[0] = MatOps_Values; 1612 1613 mat->insertmode = NOT_SET_VALUES; 1614 1615 /* build cache for off array entries formed */ 1616 a->donotstash = PETSC_FALSE; 1617 1618 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)mat), 1, &mat->stash)); 1619 1620 /* stuff used for matrix vector multiply */ 1621 a->lvec = NULL; 1622 a->Mvctx = NULL; 1623 a->roworiented = PETSC_TRUE; 1624 1625 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetLDA_C", MatDenseGetLDA_MPIDense)); 1626 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseSetLDA_C", MatDenseSetLDA_MPIDense)); 1627 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArray_C", MatDenseGetArray_MPIDense)); 1628 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArray_C", MatDenseRestoreArray_MPIDense)); 1629 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayRead_C", MatDenseGetArrayRead_MPIDense)); 1630 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayRead_C", MatDenseRestoreArrayRead_MPIDense)); 1631 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayWrite_C", MatDenseGetArrayWrite_MPIDense)); 1632 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayWrite_C", MatDenseRestoreArrayWrite_MPIDense)); 1633 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDensePlaceArray_C", MatDensePlaceArray_MPIDense)); 1634 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseResetArray_C", MatDenseResetArray_MPIDense)); 1635 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseReplaceArray_C", MatDenseReplaceArray_MPIDense)); 1636 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", MatDenseGetColumnVec_MPIDense)); 1637 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", MatDenseRestoreColumnVec_MPIDense)); 1638 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", MatDenseGetColumnVecRead_MPIDense)); 1639 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", MatDenseRestoreColumnVecRead_MPIDense)); 1640 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", MatDenseGetColumnVecWrite_MPIDense)); 1641 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", MatDenseRestoreColumnVecWrite_MPIDense)); 1642 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetSubMatrix_C", MatDenseGetSubMatrix_MPIDense)); 1643 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreSubMatrix_C", MatDenseRestoreSubMatrix_MPIDense)); 1644 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", MatConvert_MPIAIJ_MPIDense)); 1645 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpiaij_C", MatConvert_MPIDense_MPIAIJ)); 1646 #if defined(PETSC_HAVE_ELEMENTAL) 1647 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_elemental_C", MatConvert_MPIDense_Elemental)); 1648 #endif 1649 #if defined(PETSC_HAVE_SCALAPACK) 1650 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_scalapack_C", MatConvert_Dense_ScaLAPACK)); 1651 #endif 1652 #if defined(PETSC_HAVE_CUDA) 1653 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensecuda_C", MatConvert_MPIDense_MPIDenseCUDA)); 1654 #endif 1655 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIDenseSetPreallocation_C", MatMPIDenseSetPreallocation_MPIDense)); 1656 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense)); 1657 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaij_C", MatProductSetFromOptions_MPIDense_MPIAIJ)); 1658 #if defined(PETSC_HAVE_CUDA) 1659 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense)); 1660 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijcusparse_C", MatProductSetFromOptions_MPIDense_MPIAIJ)); 1661 #endif 1662 #if defined(PETSC_HAVE_HIP) 1663 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensehip_C", MatConvert_MPIDense_MPIDenseHIP)); 1664 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense)); 1665 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijhipsparse_C", MatProductSetFromOptions_MPIDense_MPIAIJ)); 1666 #endif 1667 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumn_C", MatDenseGetColumn_MPIDense)); 1668 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumn_C", MatDenseRestoreColumn_MPIDense)); 1669 PetscCall(PetscObjectChangeTypeName((PetscObject)mat, MATMPIDENSE)); 1670 PetscFunctionReturn(PETSC_SUCCESS); 1671 } 1672 1673 /*MC 1674 MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices. 1675 1676 This matrix type is identical to `MATSEQDENSE` when constructed with a single process communicator, 1677 and `MATMPIDENSE` otherwise. 1678 1679 Options Database Key: 1680 . -mat_type dense - sets the matrix type to `MATDENSE` during a call to `MatSetFromOptions()` 1681 1682 Level: beginner 1683 1684 .seealso: [](ch_matrices), `Mat`, `MATSEQDENSE`, `MATMPIDENSE`, `MATDENSECUDA`, `MATDENSEHIP` 1685 M*/ 1686 1687 /*@C 1688 MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries 1689 1690 Collective 1691 1692 Input Parameters: 1693 + B - the matrix 1694 - data - optional location of matrix data. Set to `NULL` for PETSc 1695 to control all matrix memory allocation. 1696 1697 Level: intermediate 1698 1699 Notes: 1700 The dense format is fully compatible with standard Fortran 1701 storage by columns. 1702 1703 The data input variable is intended primarily for Fortran programmers 1704 who wish to allocate their own matrix memory space. Most users should 1705 set `data` to `NULL`. 1706 1707 .seealso: [](ch_matrices), `Mat`, `MATMPIDENSE`, `MatCreate()`, `MatCreateSeqDense()`, `MatSetValues()` 1708 @*/ 1709 PetscErrorCode MatMPIDenseSetPreallocation(Mat B, PetscScalar *data) 1710 { 1711 PetscFunctionBegin; 1712 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 1713 PetscTryMethod(B, "MatMPIDenseSetPreallocation_C", (Mat, PetscScalar *), (B, data)); 1714 PetscFunctionReturn(PETSC_SUCCESS); 1715 } 1716 1717 /*@ 1718 MatDensePlaceArray - Allows one to replace the array in a `MATDENSE` matrix with an 1719 array provided by the user. This is useful to avoid copying an array 1720 into a matrix 1721 1722 Not Collective 1723 1724 Input Parameters: 1725 + mat - the matrix 1726 - array - the array in column major order 1727 1728 Level: developer 1729 1730 Note: 1731 You can return to the original array with a call to `MatDenseResetArray()`. The user is responsible for freeing this array; it will not be 1732 freed when the matrix is destroyed. 1733 1734 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseGetArray()`, `MatDenseResetArray()`, `VecPlaceArray()`, `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`, `VecResetArray()`, 1735 `MatDenseReplaceArray()` 1736 @*/ 1737 PetscErrorCode MatDensePlaceArray(Mat mat, const PetscScalar *array) 1738 { 1739 PetscFunctionBegin; 1740 PetscValidHeaderSpecific(mat, MAT_CLASSID, 1); 1741 PetscUseMethod(mat, "MatDensePlaceArray_C", (Mat, const PetscScalar *), (mat, array)); 1742 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 1743 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 1744 mat->offloadmask = PETSC_OFFLOAD_CPU; 1745 #endif 1746 PetscFunctionReturn(PETSC_SUCCESS); 1747 } 1748 1749 /*@ 1750 MatDenseResetArray - Resets the matrix array to that it previously had before the call to `MatDensePlaceArray()` 1751 1752 Not Collective 1753 1754 Input Parameter: 1755 . mat - the matrix 1756 1757 Level: developer 1758 1759 Note: 1760 You can only call this after a call to `MatDensePlaceArray()` 1761 1762 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseGetArray()`, `MatDensePlaceArray()`, `VecPlaceArray()`, `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`, `VecResetArray()` 1763 @*/ 1764 PetscErrorCode MatDenseResetArray(Mat mat) 1765 { 1766 PetscFunctionBegin; 1767 PetscValidHeaderSpecific(mat, MAT_CLASSID, 1); 1768 PetscUseMethod(mat, "MatDenseResetArray_C", (Mat), (mat)); 1769 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 1770 PetscFunctionReturn(PETSC_SUCCESS); 1771 } 1772 1773 /*@ 1774 MatDenseReplaceArray - Allows one to replace the array in a dense matrix with an 1775 array provided by the user. This is useful to avoid copying an array 1776 into a matrix 1777 1778 Not Collective 1779 1780 Input Parameters: 1781 + mat - the matrix 1782 - array - the array in column major order 1783 1784 Level: developer 1785 1786 Note: 1787 The memory passed in MUST be obtained with `PetscMalloc()` and CANNOT be 1788 freed by the user. It will be freed when the matrix is destroyed. 1789 1790 .seealso: [](ch_matrices), `Mat`, `MatDensePlaceArray()`, `MatDenseGetArray()`, `VecReplaceArray()` 1791 @*/ 1792 PetscErrorCode MatDenseReplaceArray(Mat mat, const PetscScalar *array) 1793 { 1794 PetscFunctionBegin; 1795 PetscValidHeaderSpecific(mat, MAT_CLASSID, 1); 1796 PetscUseMethod(mat, "MatDenseReplaceArray_C", (Mat, const PetscScalar *), (mat, array)); 1797 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 1798 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 1799 mat->offloadmask = PETSC_OFFLOAD_CPU; 1800 #endif 1801 PetscFunctionReturn(PETSC_SUCCESS); 1802 } 1803 1804 /*@C 1805 MatCreateDense - Creates a matrix in `MATDENSE` format. 1806 1807 Collective 1808 1809 Input Parameters: 1810 + comm - MPI communicator 1811 . m - number of local rows (or `PETSC_DECIDE` to have calculated if `M` is given) 1812 . n - number of local columns (or `PETSC_DECIDE` to have calculated if `N` is given) 1813 . M - number of global rows (or `PETSC_DECIDE` to have calculated if `m` is given) 1814 . N - number of global columns (or `PETSC_DECIDE` to have calculated if `n` is given) 1815 - data - optional location of matrix data. Set data to `NULL` (`PETSC_NULL_SCALAR` for Fortran users) for PETSc 1816 to control all matrix memory allocation. 1817 1818 Output Parameter: 1819 . A - the matrix 1820 1821 Level: intermediate 1822 1823 Notes: 1824 The dense format is fully compatible with standard Fortran 1825 storage by columns. 1826 1827 Although local portions of the matrix are stored in column-major 1828 order, the matrix is partitioned across MPI ranks by row. 1829 1830 The data input variable is intended primarily for Fortran programmers 1831 who wish to allocate their own matrix memory space. Most users should 1832 set `data` to `NULL` (`PETSC_NULL_SCALAR` for Fortran users). 1833 1834 The user MUST specify either the local or global matrix dimensions 1835 (possibly both). 1836 1837 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatCreate()`, `MatCreateSeqDense()`, `MatSetValues()` 1838 @*/ 1839 PetscErrorCode MatCreateDense(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscScalar *data, Mat *A) 1840 { 1841 PetscFunctionBegin; 1842 PetscCall(MatCreate(comm, A)); 1843 PetscCall(MatSetSizes(*A, m, n, M, N)); 1844 PetscCall(MatSetType(*A, MATDENSE)); 1845 PetscCall(MatSeqDenseSetPreallocation(*A, data)); 1846 PetscCall(MatMPIDenseSetPreallocation(*A, data)); 1847 PetscFunctionReturn(PETSC_SUCCESS); 1848 } 1849 1850 static PetscErrorCode MatDuplicate_MPIDense(Mat A, MatDuplicateOption cpvalues, Mat *newmat) 1851 { 1852 Mat mat; 1853 Mat_MPIDense *a, *oldmat = (Mat_MPIDense *)A->data; 1854 1855 PetscFunctionBegin; 1856 *newmat = NULL; 1857 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat)); 1858 PetscCall(MatSetSizes(mat, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 1859 PetscCall(MatSetType(mat, ((PetscObject)A)->type_name)); 1860 a = (Mat_MPIDense *)mat->data; 1861 1862 mat->factortype = A->factortype; 1863 mat->assembled = PETSC_TRUE; 1864 mat->preallocated = PETSC_TRUE; 1865 1866 mat->insertmode = NOT_SET_VALUES; 1867 a->donotstash = oldmat->donotstash; 1868 1869 PetscCall(PetscLayoutReference(A->rmap, &mat->rmap)); 1870 PetscCall(PetscLayoutReference(A->cmap, &mat->cmap)); 1871 1872 PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A)); 1873 1874 *newmat = mat; 1875 PetscFunctionReturn(PETSC_SUCCESS); 1876 } 1877 1878 static PetscErrorCode MatLoad_MPIDense(Mat newMat, PetscViewer viewer) 1879 { 1880 PetscBool isbinary; 1881 #if defined(PETSC_HAVE_HDF5) 1882 PetscBool ishdf5; 1883 #endif 1884 1885 PetscFunctionBegin; 1886 PetscValidHeaderSpecific(newMat, MAT_CLASSID, 1); 1887 PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2); 1888 /* force binary viewer to load .info file if it has not yet done so */ 1889 PetscCall(PetscViewerSetUp(viewer)); 1890 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 1891 #if defined(PETSC_HAVE_HDF5) 1892 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5)); 1893 #endif 1894 if (isbinary) { 1895 PetscCall(MatLoad_Dense_Binary(newMat, viewer)); 1896 #if defined(PETSC_HAVE_HDF5) 1897 } else if (ishdf5) { 1898 PetscCall(MatLoad_Dense_HDF5(newMat, viewer)); 1899 #endif 1900 } else SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name); 1901 PetscFunctionReturn(PETSC_SUCCESS); 1902 } 1903 1904 static PetscErrorCode MatEqual_MPIDense(Mat A, Mat B, PetscBool *flag) 1905 { 1906 Mat_MPIDense *matB = (Mat_MPIDense *)B->data, *matA = (Mat_MPIDense *)A->data; 1907 Mat a, b; 1908 1909 PetscFunctionBegin; 1910 a = matA->A; 1911 b = matB->A; 1912 PetscCall(MatEqual(a, b, flag)); 1913 PetscCall(MPIU_Allreduce(MPI_IN_PLACE, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A))); 1914 PetscFunctionReturn(PETSC_SUCCESS); 1915 } 1916 1917 static PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(void *data) 1918 { 1919 Mat_TransMatMultDense *atb = (Mat_TransMatMultDense *)data; 1920 1921 PetscFunctionBegin; 1922 PetscCall(PetscFree2(atb->sendbuf, atb->recvcounts)); 1923 PetscCall(MatDestroy(&atb->atb)); 1924 PetscCall(PetscFree(atb)); 1925 PetscFunctionReturn(PETSC_SUCCESS); 1926 } 1927 1928 static PetscErrorCode MatDestroy_MatMatTransMult_MPIDense_MPIDense(void *data) 1929 { 1930 Mat_MatTransMultDense *abt = (Mat_MatTransMultDense *)data; 1931 1932 PetscFunctionBegin; 1933 PetscCall(PetscFree2(abt->buf[0], abt->buf[1])); 1934 PetscCall(PetscFree2(abt->recvcounts, abt->recvdispls)); 1935 PetscCall(PetscFree(abt)); 1936 PetscFunctionReturn(PETSC_SUCCESS); 1937 } 1938 1939 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C) 1940 { 1941 Mat_MPIDense *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data; 1942 Mat_TransMatMultDense *atb; 1943 MPI_Comm comm; 1944 PetscMPIInt size, *recvcounts; 1945 PetscScalar *carray, *sendbuf; 1946 const PetscScalar *atbarray; 1947 PetscInt i, cN = C->cmap->N, proc, k, j, lda; 1948 const PetscInt *ranges; 1949 1950 PetscFunctionBegin; 1951 MatCheckProduct(C, 3); 1952 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 1953 atb = (Mat_TransMatMultDense *)C->product->data; 1954 recvcounts = atb->recvcounts; 1955 sendbuf = atb->sendbuf; 1956 1957 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 1958 PetscCallMPI(MPI_Comm_size(comm, &size)); 1959 1960 /* compute atbarray = aseq^T * bseq */ 1961 PetscCall(MatTransposeMatMult(a->A, b->A, atb->atb ? MAT_REUSE_MATRIX : MAT_INITIAL_MATRIX, PETSC_DEFAULT, &atb->atb)); 1962 1963 PetscCall(MatGetOwnershipRanges(C, &ranges)); 1964 1965 /* arrange atbarray into sendbuf */ 1966 PetscCall(MatDenseGetArrayRead(atb->atb, &atbarray)); 1967 PetscCall(MatDenseGetLDA(atb->atb, &lda)); 1968 for (proc = 0, k = 0; proc < size; proc++) { 1969 for (j = 0; j < cN; j++) { 1970 for (i = ranges[proc]; i < ranges[proc + 1]; i++) sendbuf[k++] = atbarray[i + j * lda]; 1971 } 1972 } 1973 PetscCall(MatDenseRestoreArrayRead(atb->atb, &atbarray)); 1974 1975 /* sum all atbarray to local values of C */ 1976 PetscCall(MatDenseGetArrayWrite(c->A, &carray)); 1977 PetscCallMPI(MPI_Reduce_scatter(sendbuf, carray, recvcounts, MPIU_SCALAR, MPIU_SUM, comm)); 1978 PetscCall(MatDenseRestoreArrayWrite(c->A, &carray)); 1979 PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY)); 1980 PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY)); 1981 PetscFunctionReturn(PETSC_SUCCESS); 1982 } 1983 1984 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C) 1985 { 1986 MPI_Comm comm; 1987 PetscMPIInt size; 1988 PetscInt cm = A->cmap->n, cM, cN = B->cmap->N; 1989 Mat_TransMatMultDense *atb; 1990 PetscBool cisdense = PETSC_FALSE; 1991 PetscInt i; 1992 const PetscInt *ranges; 1993 1994 PetscFunctionBegin; 1995 MatCheckProduct(C, 4); 1996 PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty"); 1997 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 1998 if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) { 1999 SETERRQ(comm, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend); 2000 } 2001 2002 /* create matrix product C */ 2003 PetscCall(MatSetSizes(C, cm, B->cmap->n, A->cmap->N, B->cmap->N)); 2004 #if defined(PETSC_HAVE_CUDA) 2005 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSECUDA, "")); 2006 #endif 2007 #if defined(PETSC_HAVE_HIP) 2008 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSEHIP, "")); 2009 #endif 2010 if (!cisdense) PetscCall(MatSetType(C, ((PetscObject)A)->type_name)); 2011 PetscCall(MatSetUp(C)); 2012 2013 /* create data structure for reuse C */ 2014 PetscCallMPI(MPI_Comm_size(comm, &size)); 2015 PetscCall(PetscNew(&atb)); 2016 cM = C->rmap->N; 2017 PetscCall(PetscMalloc2(cM * cN, &atb->sendbuf, size, &atb->recvcounts)); 2018 PetscCall(MatGetOwnershipRanges(C, &ranges)); 2019 for (i = 0; i < size; i++) atb->recvcounts[i] = (ranges[i + 1] - ranges[i]) * cN; 2020 2021 C->product->data = atb; 2022 C->product->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense; 2023 PetscFunctionReturn(PETSC_SUCCESS); 2024 } 2025 2026 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C) 2027 { 2028 MPI_Comm comm; 2029 PetscMPIInt i, size; 2030 PetscInt maxRows, bufsiz; 2031 PetscMPIInt tag; 2032 PetscInt alg; 2033 Mat_MatTransMultDense *abt; 2034 Mat_Product *product = C->product; 2035 PetscBool flg; 2036 2037 PetscFunctionBegin; 2038 MatCheckProduct(C, 4); 2039 PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty"); 2040 /* check local size of A and B */ 2041 PetscCheck(A->cmap->n == B->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local column dimensions are incompatible, A (%" PetscInt_FMT ") != B (%" PetscInt_FMT ")", A->cmap->n, B->cmap->n); 2042 2043 PetscCall(PetscStrcmp(product->alg, "allgatherv", &flg)); 2044 alg = flg ? 0 : 1; 2045 2046 /* setup matrix product C */ 2047 PetscCall(MatSetSizes(C, A->rmap->n, B->rmap->n, A->rmap->N, B->rmap->N)); 2048 PetscCall(MatSetType(C, MATMPIDENSE)); 2049 PetscCall(MatSetUp(C)); 2050 PetscCall(PetscObjectGetNewTag((PetscObject)C, &tag)); 2051 2052 /* create data structure for reuse C */ 2053 PetscCall(PetscObjectGetComm((PetscObject)C, &comm)); 2054 PetscCallMPI(MPI_Comm_size(comm, &size)); 2055 PetscCall(PetscNew(&abt)); 2056 abt->tag = tag; 2057 abt->alg = alg; 2058 switch (alg) { 2059 case 1: /* alg: "cyclic" */ 2060 for (maxRows = 0, i = 0; i < size; i++) maxRows = PetscMax(maxRows, (B->rmap->range[i + 1] - B->rmap->range[i])); 2061 bufsiz = A->cmap->N * maxRows; 2062 PetscCall(PetscMalloc2(bufsiz, &(abt->buf[0]), bufsiz, &(abt->buf[1]))); 2063 break; 2064 default: /* alg: "allgatherv" */ 2065 PetscCall(PetscMalloc2(B->rmap->n * B->cmap->N, &(abt->buf[0]), B->rmap->N * B->cmap->N, &(abt->buf[1]))); 2066 PetscCall(PetscMalloc2(size, &(abt->recvcounts), size + 1, &(abt->recvdispls))); 2067 for (i = 0; i <= size; i++) abt->recvdispls[i] = B->rmap->range[i] * A->cmap->N; 2068 for (i = 0; i < size; i++) abt->recvcounts[i] = abt->recvdispls[i + 1] - abt->recvdispls[i]; 2069 break; 2070 } 2071 2072 C->product->data = abt; 2073 C->product->destroy = MatDestroy_MatMatTransMult_MPIDense_MPIDense; 2074 C->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_MPIDense_MPIDense; 2075 PetscFunctionReturn(PETSC_SUCCESS); 2076 } 2077 2078 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(Mat A, Mat B, Mat C) 2079 { 2080 Mat_MPIDense *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data; 2081 Mat_MatTransMultDense *abt; 2082 MPI_Comm comm; 2083 PetscMPIInt rank, size, sendsiz, recvsiz, sendto, recvfrom, recvisfrom; 2084 PetscScalar *sendbuf, *recvbuf = NULL, *cv; 2085 PetscInt i, cK = A->cmap->N, k, j, bn; 2086 PetscScalar _DOne = 1.0, _DZero = 0.0; 2087 const PetscScalar *av, *bv; 2088 PetscBLASInt cm, cn, ck, alda, blda = 0, clda; 2089 MPI_Request reqs[2]; 2090 const PetscInt *ranges; 2091 2092 PetscFunctionBegin; 2093 MatCheckProduct(C, 3); 2094 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 2095 abt = (Mat_MatTransMultDense *)C->product->data; 2096 PetscCall(PetscObjectGetComm((PetscObject)C, &comm)); 2097 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 2098 PetscCallMPI(MPI_Comm_size(comm, &size)); 2099 PetscCall(MatDenseGetArrayRead(a->A, &av)); 2100 PetscCall(MatDenseGetArrayRead(b->A, &bv)); 2101 PetscCall(MatDenseGetArrayWrite(c->A, &cv)); 2102 PetscCall(MatDenseGetLDA(a->A, &i)); 2103 PetscCall(PetscBLASIntCast(i, &alda)); 2104 PetscCall(MatDenseGetLDA(b->A, &i)); 2105 PetscCall(PetscBLASIntCast(i, &blda)); 2106 PetscCall(MatDenseGetLDA(c->A, &i)); 2107 PetscCall(PetscBLASIntCast(i, &clda)); 2108 PetscCall(MatGetOwnershipRanges(B, &ranges)); 2109 bn = B->rmap->n; 2110 if (blda == bn) { 2111 sendbuf = (PetscScalar *)bv; 2112 } else { 2113 sendbuf = abt->buf[0]; 2114 for (k = 0, i = 0; i < cK; i++) { 2115 for (j = 0; j < bn; j++, k++) sendbuf[k] = bv[i * blda + j]; 2116 } 2117 } 2118 if (size > 1) { 2119 sendto = (rank + size - 1) % size; 2120 recvfrom = (rank + size + 1) % size; 2121 } else { 2122 sendto = recvfrom = 0; 2123 } 2124 PetscCall(PetscBLASIntCast(cK, &ck)); 2125 PetscCall(PetscBLASIntCast(c->A->rmap->n, &cm)); 2126 recvisfrom = rank; 2127 for (i = 0; i < size; i++) { 2128 /* we have finished receiving in sending, bufs can be read/modified */ 2129 PetscInt nextrecvisfrom = (recvisfrom + 1) % size; /* which process the next recvbuf will originate on */ 2130 PetscInt nextbn = ranges[nextrecvisfrom + 1] - ranges[nextrecvisfrom]; 2131 2132 if (nextrecvisfrom != rank) { 2133 /* start the cyclic sends from sendbuf, to recvbuf (which will switch to sendbuf) */ 2134 sendsiz = cK * bn; 2135 recvsiz = cK * nextbn; 2136 recvbuf = (i & 1) ? abt->buf[0] : abt->buf[1]; 2137 PetscCallMPI(MPI_Isend(sendbuf, sendsiz, MPIU_SCALAR, sendto, abt->tag, comm, &reqs[0])); 2138 PetscCallMPI(MPI_Irecv(recvbuf, recvsiz, MPIU_SCALAR, recvfrom, abt->tag, comm, &reqs[1])); 2139 } 2140 2141 /* local aseq * sendbuf^T */ 2142 PetscCall(PetscBLASIntCast(ranges[recvisfrom + 1] - ranges[recvisfrom], &cn)); 2143 if (cm && cn && ck) PetscCallBLAS("BLASgemm", BLASgemm_("N", "T", &cm, &cn, &ck, &_DOne, av, &alda, sendbuf, &cn, &_DZero, cv + clda * ranges[recvisfrom], &clda)); 2144 2145 if (nextrecvisfrom != rank) { 2146 /* wait for the sends and receives to complete, swap sendbuf and recvbuf */ 2147 PetscCallMPI(MPI_Waitall(2, reqs, MPI_STATUSES_IGNORE)); 2148 } 2149 bn = nextbn; 2150 recvisfrom = nextrecvisfrom; 2151 sendbuf = recvbuf; 2152 } 2153 PetscCall(MatDenseRestoreArrayRead(a->A, &av)); 2154 PetscCall(MatDenseRestoreArrayRead(b->A, &bv)); 2155 PetscCall(MatDenseRestoreArrayWrite(c->A, &cv)); 2156 PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY)); 2157 PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY)); 2158 PetscFunctionReturn(PETSC_SUCCESS); 2159 } 2160 2161 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(Mat A, Mat B, Mat C) 2162 { 2163 Mat_MPIDense *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data; 2164 Mat_MatTransMultDense *abt; 2165 MPI_Comm comm; 2166 PetscMPIInt size; 2167 PetscScalar *cv, *sendbuf, *recvbuf; 2168 const PetscScalar *av, *bv; 2169 PetscInt blda, i, cK = A->cmap->N, k, j, bn; 2170 PetscScalar _DOne = 1.0, _DZero = 0.0; 2171 PetscBLASInt cm, cn, ck, alda, clda; 2172 2173 PetscFunctionBegin; 2174 MatCheckProduct(C, 3); 2175 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 2176 abt = (Mat_MatTransMultDense *)C->product->data; 2177 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 2178 PetscCallMPI(MPI_Comm_size(comm, &size)); 2179 PetscCall(MatDenseGetArrayRead(a->A, &av)); 2180 PetscCall(MatDenseGetArrayRead(b->A, &bv)); 2181 PetscCall(MatDenseGetArrayWrite(c->A, &cv)); 2182 PetscCall(MatDenseGetLDA(a->A, &i)); 2183 PetscCall(PetscBLASIntCast(i, &alda)); 2184 PetscCall(MatDenseGetLDA(b->A, &blda)); 2185 PetscCall(MatDenseGetLDA(c->A, &i)); 2186 PetscCall(PetscBLASIntCast(i, &clda)); 2187 /* copy transpose of B into buf[0] */ 2188 bn = B->rmap->n; 2189 sendbuf = abt->buf[0]; 2190 recvbuf = abt->buf[1]; 2191 for (k = 0, j = 0; j < bn; j++) { 2192 for (i = 0; i < cK; i++, k++) sendbuf[k] = bv[i * blda + j]; 2193 } 2194 PetscCall(MatDenseRestoreArrayRead(b->A, &bv)); 2195 PetscCallMPI(MPI_Allgatherv(sendbuf, bn * cK, MPIU_SCALAR, recvbuf, abt->recvcounts, abt->recvdispls, MPIU_SCALAR, comm)); 2196 PetscCall(PetscBLASIntCast(cK, &ck)); 2197 PetscCall(PetscBLASIntCast(c->A->rmap->n, &cm)); 2198 PetscCall(PetscBLASIntCast(c->A->cmap->n, &cn)); 2199 if (cm && cn && ck) PetscCallBLAS("BLASgemm", BLASgemm_("N", "N", &cm, &cn, &ck, &_DOne, av, &alda, recvbuf, &ck, &_DZero, cv, &clda)); 2200 PetscCall(MatDenseRestoreArrayRead(a->A, &av)); 2201 PetscCall(MatDenseRestoreArrayRead(b->A, &bv)); 2202 PetscCall(MatDenseRestoreArrayWrite(c->A, &cv)); 2203 PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY)); 2204 PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY)); 2205 PetscFunctionReturn(PETSC_SUCCESS); 2206 } 2207 2208 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C) 2209 { 2210 Mat_MatTransMultDense *abt; 2211 2212 PetscFunctionBegin; 2213 MatCheckProduct(C, 3); 2214 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 2215 abt = (Mat_MatTransMultDense *)C->product->data; 2216 switch (abt->alg) { 2217 case 1: 2218 PetscCall(MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(A, B, C)); 2219 break; 2220 default: 2221 PetscCall(MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(A, B, C)); 2222 break; 2223 } 2224 PetscFunctionReturn(PETSC_SUCCESS); 2225 } 2226 2227 #if defined(PETSC_HAVE_ELEMENTAL) 2228 static PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(void *data) 2229 { 2230 Mat_MatMultDense *ab = (Mat_MatMultDense *)data; 2231 2232 PetscFunctionBegin; 2233 PetscCall(MatDestroy(&ab->Ce)); 2234 PetscCall(MatDestroy(&ab->Ae)); 2235 PetscCall(MatDestroy(&ab->Be)); 2236 PetscCall(PetscFree(ab)); 2237 PetscFunctionReturn(PETSC_SUCCESS); 2238 } 2239 2240 PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C) 2241 { 2242 Mat_MatMultDense *ab; 2243 2244 PetscFunctionBegin; 2245 MatCheckProduct(C, 3); 2246 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Missing product data"); 2247 ab = (Mat_MatMultDense *)C->product->data; 2248 PetscCall(MatConvert_MPIDense_Elemental(A, MATELEMENTAL, MAT_REUSE_MATRIX, &ab->Ae)); 2249 PetscCall(MatConvert_MPIDense_Elemental(B, MATELEMENTAL, MAT_REUSE_MATRIX, &ab->Be)); 2250 PetscCall(MatMatMultNumeric_Elemental(ab->Ae, ab->Be, ab->Ce)); 2251 PetscCall(MatConvert(ab->Ce, MATMPIDENSE, MAT_REUSE_MATRIX, &C)); 2252 PetscFunctionReturn(PETSC_SUCCESS); 2253 } 2254 2255 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C) 2256 { 2257 Mat Ae, Be, Ce; 2258 Mat_MatMultDense *ab; 2259 2260 PetscFunctionBegin; 2261 MatCheckProduct(C, 4); 2262 PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty"); 2263 /* check local size of A and B */ 2264 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 2265 SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend); 2266 } 2267 2268 /* create elemental matrices Ae and Be */ 2269 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &Ae)); 2270 PetscCall(MatSetSizes(Ae, PETSC_DECIDE, PETSC_DECIDE, A->rmap->N, A->cmap->N)); 2271 PetscCall(MatSetType(Ae, MATELEMENTAL)); 2272 PetscCall(MatSetUp(Ae)); 2273 PetscCall(MatSetOption(Ae, MAT_ROW_ORIENTED, PETSC_FALSE)); 2274 2275 PetscCall(MatCreate(PetscObjectComm((PetscObject)B), &Be)); 2276 PetscCall(MatSetSizes(Be, PETSC_DECIDE, PETSC_DECIDE, B->rmap->N, B->cmap->N)); 2277 PetscCall(MatSetType(Be, MATELEMENTAL)); 2278 PetscCall(MatSetUp(Be)); 2279 PetscCall(MatSetOption(Be, MAT_ROW_ORIENTED, PETSC_FALSE)); 2280 2281 /* compute symbolic Ce = Ae*Be */ 2282 PetscCall(MatCreate(PetscObjectComm((PetscObject)C), &Ce)); 2283 PetscCall(MatMatMultSymbolic_Elemental(Ae, Be, fill, Ce)); 2284 2285 /* setup C */ 2286 PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, PETSC_DECIDE, PETSC_DECIDE)); 2287 PetscCall(MatSetType(C, MATDENSE)); 2288 PetscCall(MatSetUp(C)); 2289 2290 /* create data structure for reuse Cdense */ 2291 PetscCall(PetscNew(&ab)); 2292 ab->Ae = Ae; 2293 ab->Be = Be; 2294 ab->Ce = Ce; 2295 2296 C->product->data = ab; 2297 C->product->destroy = MatDestroy_MatMatMult_MPIDense_MPIDense; 2298 C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense; 2299 PetscFunctionReturn(PETSC_SUCCESS); 2300 } 2301 #endif 2302 2303 #if defined(PETSC_HAVE_ELEMENTAL) 2304 static PetscErrorCode MatProductSetFromOptions_MPIDense_AB(Mat C) 2305 { 2306 PetscFunctionBegin; 2307 C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIDense; 2308 C->ops->productsymbolic = MatProductSymbolic_AB; 2309 PetscFunctionReturn(PETSC_SUCCESS); 2310 } 2311 #endif 2312 2313 static PetscErrorCode MatProductSetFromOptions_MPIDense_AtB(Mat C) 2314 { 2315 Mat_Product *product = C->product; 2316 Mat A = product->A, B = product->B; 2317 2318 PetscFunctionBegin; 2319 if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) 2320 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend); 2321 C->ops->transposematmultsymbolic = MatTransposeMatMultSymbolic_MPIDense_MPIDense; 2322 C->ops->productsymbolic = MatProductSymbolic_AtB; 2323 PetscFunctionReturn(PETSC_SUCCESS); 2324 } 2325 2326 static PetscErrorCode MatProductSetFromOptions_MPIDense_ABt(Mat C) 2327 { 2328 Mat_Product *product = C->product; 2329 const char *algTypes[2] = {"allgatherv", "cyclic"}; 2330 PetscInt alg, nalg = 2; 2331 PetscBool flg = PETSC_FALSE; 2332 2333 PetscFunctionBegin; 2334 /* Set default algorithm */ 2335 alg = 0; /* default is allgatherv */ 2336 PetscCall(PetscStrcmp(product->alg, "default", &flg)); 2337 if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg])); 2338 2339 /* Get runtime option */ 2340 if (product->api_user) { 2341 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatMatTransposeMult", "Mat"); 2342 PetscCall(PetscOptionsEList("-matmattransmult_mpidense_mpidense_via", "Algorithmic approach", "MatMatTransposeMult", algTypes, nalg, algTypes[alg], &alg, &flg)); 2343 PetscOptionsEnd(); 2344 } else { 2345 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_ABt", "Mat"); 2346 PetscCall(PetscOptionsEList("-mat_product_algorithm", "Algorithmic approach", "MatProduct_ABt", algTypes, nalg, algTypes[alg], &alg, &flg)); 2347 PetscOptionsEnd(); 2348 } 2349 if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg])); 2350 2351 C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_MPIDense_MPIDense; 2352 C->ops->productsymbolic = MatProductSymbolic_ABt; 2353 PetscFunctionReturn(PETSC_SUCCESS); 2354 } 2355 2356 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense(Mat C) 2357 { 2358 Mat_Product *product = C->product; 2359 2360 PetscFunctionBegin; 2361 switch (product->type) { 2362 #if defined(PETSC_HAVE_ELEMENTAL) 2363 case MATPRODUCT_AB: 2364 PetscCall(MatProductSetFromOptions_MPIDense_AB(C)); 2365 break; 2366 #endif 2367 case MATPRODUCT_AtB: 2368 PetscCall(MatProductSetFromOptions_MPIDense_AtB(C)); 2369 break; 2370 case MATPRODUCT_ABt: 2371 PetscCall(MatProductSetFromOptions_MPIDense_ABt(C)); 2372 break; 2373 default: 2374 break; 2375 } 2376 PetscFunctionReturn(PETSC_SUCCESS); 2377 } 2378