1 2 /* 3 Basic functions for basic parallel dense matrices. 4 Portions of this code are under: 5 Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. 6 */ 7 8 #include <../src/mat/impls/dense/mpi/mpidense.h> /*I "petscmat.h" I*/ 9 #include <../src/mat/impls/aij/mpi/mpiaij.h> 10 #include <petscblaslapack.h> 11 12 /*@ 13 MatDenseGetLocalMatrix - For a `MATMPIDENSE` or `MATSEQDENSE` matrix returns the sequential 14 matrix that represents the operator. For sequential matrices it returns itself. 15 16 Input Parameter: 17 . A - the sequential or MPI `MATDENSE` matrix 18 19 Output Parameter: 20 . B - the inner matrix 21 22 Level: intermediate 23 24 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MATMPIDENSE`, `MATSEQDENSE` 25 @*/ 26 PetscErrorCode MatDenseGetLocalMatrix(Mat A, Mat *B) 27 { 28 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 29 PetscBool flg; 30 31 PetscFunctionBegin; 32 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 33 PetscAssertPointer(B, 2); 34 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIDENSE, &flg)); 35 if (flg) *B = mat->A; 36 else { 37 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQDENSE, &flg)); 38 PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Not for matrix type %s", ((PetscObject)A)->type_name); 39 *B = A; 40 } 41 PetscFunctionReturn(PETSC_SUCCESS); 42 } 43 44 static PetscErrorCode MatCopy_MPIDense(Mat A, Mat B, MatStructure s) 45 { 46 Mat_MPIDense *Amat = (Mat_MPIDense *)A->data; 47 Mat_MPIDense *Bmat = (Mat_MPIDense *)B->data; 48 49 PetscFunctionBegin; 50 PetscCall(MatCopy(Amat->A, Bmat->A, s)); 51 PetscFunctionReturn(PETSC_SUCCESS); 52 } 53 54 PetscErrorCode MatShift_MPIDense(Mat A, PetscScalar alpha) 55 { 56 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 57 PetscInt j, lda, rstart = A->rmap->rstart, rend = A->rmap->rend, rend2; 58 PetscScalar *v; 59 60 PetscFunctionBegin; 61 PetscCall(MatDenseGetArray(mat->A, &v)); 62 PetscCall(MatDenseGetLDA(mat->A, &lda)); 63 rend2 = PetscMin(rend, A->cmap->N); 64 if (rend2 > rstart) { 65 for (j = rstart; j < rend2; j++) v[j - rstart + j * lda] += alpha; 66 PetscCall(PetscLogFlops(rend2 - rstart)); 67 } 68 PetscCall(MatDenseRestoreArray(mat->A, &v)); 69 PetscFunctionReturn(PETSC_SUCCESS); 70 } 71 72 static PetscErrorCode MatGetRow_MPIDense(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 73 { 74 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 75 PetscInt lrow, rstart = A->rmap->rstart, rend = A->rmap->rend; 76 77 PetscFunctionBegin; 78 PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "only local rows"); 79 lrow = row - rstart; 80 PetscCall(MatGetRow(mat->A, lrow, nz, (const PetscInt **)idx, (const PetscScalar **)v)); 81 PetscFunctionReturn(PETSC_SUCCESS); 82 } 83 84 static PetscErrorCode MatRestoreRow_MPIDense(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 85 { 86 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 87 PetscInt lrow, rstart = A->rmap->rstart, rend = A->rmap->rend; 88 89 PetscFunctionBegin; 90 PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "only local rows"); 91 lrow = row - rstart; 92 PetscCall(MatRestoreRow(mat->A, lrow, nz, (const PetscInt **)idx, (const PetscScalar **)v)); 93 PetscFunctionReturn(PETSC_SUCCESS); 94 } 95 96 static PetscErrorCode MatGetDiagonalBlock_MPIDense(Mat A, Mat *a) 97 { 98 Mat_MPIDense *mdn = (Mat_MPIDense *)A->data; 99 PetscInt m = A->rmap->n, rstart = A->rmap->rstart; 100 PetscScalar *array; 101 MPI_Comm comm; 102 PetscBool flg; 103 Mat B; 104 105 PetscFunctionBegin; 106 PetscCall(MatHasCongruentLayouts(A, &flg)); 107 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only square matrices supported."); 108 PetscCall(PetscObjectQuery((PetscObject)A, "DiagonalBlock", (PetscObject *)&B)); 109 if (!B) { /* This should use MatDenseGetSubMatrix (not create), but we would need a call like MatRestoreDiagonalBlock */ 110 #if PetscDefined(HAVE_CUDA) 111 PetscCall(PetscObjectTypeCompare((PetscObject)mdn->A, MATSEQDENSECUDA, &flg)); 112 PetscCheck(!flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature", MATSEQDENSECUDA); 113 #elif PetscDefined(HAVE_HIP) 114 PetscCall(PetscObjectTypeCompare((PetscObject)mdn->A, MATSEQDENSEHIP, &flg)); 115 PetscCheck(!flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature", MATSEQDENSEHIP); 116 #endif 117 PetscCall(PetscObjectGetComm((PetscObject)(mdn->A), &comm)); 118 PetscCall(MatCreate(comm, &B)); 119 PetscCall(MatSetSizes(B, m, m, m, m)); 120 PetscCall(MatSetType(B, ((PetscObject)mdn->A)->type_name)); 121 PetscCall(MatDenseGetArrayRead(mdn->A, (const PetscScalar **)&array)); 122 PetscCall(MatSeqDenseSetPreallocation(B, array + m * rstart)); 123 PetscCall(MatDenseRestoreArrayRead(mdn->A, (const PetscScalar **)&array)); 124 PetscCall(PetscObjectCompose((PetscObject)A, "DiagonalBlock", (PetscObject)B)); 125 *a = B; 126 PetscCall(MatDestroy(&B)); 127 } else *a = B; 128 PetscFunctionReturn(PETSC_SUCCESS); 129 } 130 131 static PetscErrorCode MatSetValues_MPIDense(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], const PetscScalar v[], InsertMode addv) 132 { 133 Mat_MPIDense *A = (Mat_MPIDense *)mat->data; 134 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend, row; 135 PetscBool roworiented = A->roworiented; 136 137 PetscFunctionBegin; 138 for (i = 0; i < m; i++) { 139 if (idxm[i] < 0) continue; 140 PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large"); 141 if (idxm[i] >= rstart && idxm[i] < rend) { 142 row = idxm[i] - rstart; 143 if (roworiented) { 144 PetscCall(MatSetValues(A->A, 1, &row, n, idxn, v + i * n, addv)); 145 } else { 146 for (j = 0; j < n; j++) { 147 if (idxn[j] < 0) continue; 148 PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large"); 149 PetscCall(MatSetValues(A->A, 1, &row, 1, &idxn[j], v + i + j * m, addv)); 150 } 151 } 152 } else if (!A->donotstash) { 153 mat->assembled = PETSC_FALSE; 154 if (roworiented) { 155 PetscCall(MatStashValuesRow_Private(&mat->stash, idxm[i], n, idxn, v + i * n, PETSC_FALSE)); 156 } else { 157 PetscCall(MatStashValuesCol_Private(&mat->stash, idxm[i], n, idxn, v + i, m, PETSC_FALSE)); 158 } 159 } 160 } 161 PetscFunctionReturn(PETSC_SUCCESS); 162 } 163 164 static PetscErrorCode MatGetValues_MPIDense(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[]) 165 { 166 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 167 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend, row; 168 169 PetscFunctionBegin; 170 for (i = 0; i < m; i++) { 171 if (idxm[i] < 0) continue; /* negative row */ 172 PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large"); 173 if (idxm[i] >= rstart && idxm[i] < rend) { 174 row = idxm[i] - rstart; 175 for (j = 0; j < n; j++) { 176 if (idxn[j] < 0) continue; /* negative column */ 177 PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large"); 178 PetscCall(MatGetValues(mdn->A, 1, &row, 1, &idxn[j], v + i * n + j)); 179 } 180 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported"); 181 } 182 PetscFunctionReturn(PETSC_SUCCESS); 183 } 184 185 static PetscErrorCode MatDenseGetLDA_MPIDense(Mat A, PetscInt *lda) 186 { 187 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 188 189 PetscFunctionBegin; 190 PetscCall(MatDenseGetLDA(a->A, lda)); 191 PetscFunctionReturn(PETSC_SUCCESS); 192 } 193 194 static PetscErrorCode MatDenseSetLDA_MPIDense(Mat A, PetscInt lda) 195 { 196 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 197 MatType mtype = MATSEQDENSE; 198 199 PetscFunctionBegin; 200 if (!a->A) { 201 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 202 PetscCall(PetscLayoutSetUp(A->rmap)); 203 PetscCall(PetscLayoutSetUp(A->cmap)); 204 PetscCall(MatCreate(PETSC_COMM_SELF, &a->A)); 205 PetscCall(MatSetSizes(a->A, A->rmap->n, A->cmap->N, A->rmap->n, A->cmap->N)); 206 #if PetscDefined(HAVE_CUDA) 207 PetscBool iscuda; 208 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIDENSECUDA, &iscuda)); 209 if (iscuda) mtype = MATSEQDENSECUDA; 210 #elif PetscDefined(HAVE_HIP) 211 PetscBool iship; 212 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIDENSEHIP, &iship)); 213 if (iship) mtype = MATSEQDENSEHIP; 214 #endif 215 PetscCall(MatSetType(a->A, mtype)); 216 } 217 PetscCall(MatDenseSetLDA(a->A, lda)); 218 PetscFunctionReturn(PETSC_SUCCESS); 219 } 220 221 static PetscErrorCode MatDenseGetArray_MPIDense(Mat A, PetscScalar **array) 222 { 223 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 224 225 PetscFunctionBegin; 226 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 227 PetscCall(MatDenseGetArray(a->A, array)); 228 PetscFunctionReturn(PETSC_SUCCESS); 229 } 230 231 static PetscErrorCode MatDenseGetArrayRead_MPIDense(Mat A, const PetscScalar **array) 232 { 233 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 234 235 PetscFunctionBegin; 236 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 237 PetscCall(MatDenseGetArrayRead(a->A, array)); 238 PetscFunctionReturn(PETSC_SUCCESS); 239 } 240 241 static PetscErrorCode MatDenseGetArrayWrite_MPIDense(Mat A, PetscScalar **array) 242 { 243 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 244 245 PetscFunctionBegin; 246 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 247 PetscCall(MatDenseGetArrayWrite(a->A, array)); 248 PetscFunctionReturn(PETSC_SUCCESS); 249 } 250 251 static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A, const PetscScalar *array) 252 { 253 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 254 255 PetscFunctionBegin; 256 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 257 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 258 PetscCall(MatDensePlaceArray(a->A, array)); 259 PetscFunctionReturn(PETSC_SUCCESS); 260 } 261 262 static PetscErrorCode MatDenseResetArray_MPIDense(Mat A) 263 { 264 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 265 266 PetscFunctionBegin; 267 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 268 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 269 PetscCall(MatDenseResetArray(a->A)); 270 PetscFunctionReturn(PETSC_SUCCESS); 271 } 272 273 static PetscErrorCode MatDenseReplaceArray_MPIDense(Mat A, const PetscScalar *array) 274 { 275 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 276 277 PetscFunctionBegin; 278 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 279 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 280 PetscCall(MatDenseReplaceArray(a->A, array)); 281 PetscFunctionReturn(PETSC_SUCCESS); 282 } 283 284 static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A, IS isrow, IS iscol, MatReuse scall, Mat *B) 285 { 286 Mat_MPIDense *mat = (Mat_MPIDense *)A->data, *newmatd; 287 PetscInt lda, i, j, rstart, rend, nrows, ncols, Ncols, nlrows, nlcols; 288 const PetscInt *irow, *icol; 289 const PetscScalar *v; 290 PetscScalar *bv; 291 Mat newmat; 292 IS iscol_local; 293 MPI_Comm comm_is, comm_mat; 294 295 PetscFunctionBegin; 296 PetscCall(PetscObjectGetComm((PetscObject)A, &comm_mat)); 297 PetscCall(PetscObjectGetComm((PetscObject)iscol, &comm_is)); 298 PetscCheck(comm_mat == comm_is, PETSC_COMM_SELF, PETSC_ERR_ARG_NOTSAMECOMM, "IS communicator must match matrix communicator"); 299 300 PetscCall(ISAllGather(iscol, &iscol_local)); 301 PetscCall(ISGetIndices(isrow, &irow)); 302 PetscCall(ISGetIndices(iscol_local, &icol)); 303 PetscCall(ISGetLocalSize(isrow, &nrows)); 304 PetscCall(ISGetLocalSize(iscol, &ncols)); 305 PetscCall(ISGetSize(iscol, &Ncols)); /* global number of columns, size of iscol_local */ 306 307 /* No parallel redistribution currently supported! Should really check each index set 308 to confirm that it is OK. ... Currently supports only submatrix same partitioning as 309 original matrix! */ 310 311 PetscCall(MatGetLocalSize(A, &nlrows, &nlcols)); 312 PetscCall(MatGetOwnershipRange(A, &rstart, &rend)); 313 314 /* Check submatrix call */ 315 if (scall == MAT_REUSE_MATRIX) { 316 /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */ 317 /* Really need to test rows and column sizes! */ 318 newmat = *B; 319 } else { 320 /* Create and fill new matrix */ 321 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &newmat)); 322 PetscCall(MatSetSizes(newmat, nrows, ncols, PETSC_DECIDE, Ncols)); 323 PetscCall(MatSetType(newmat, ((PetscObject)A)->type_name)); 324 PetscCall(MatMPIDenseSetPreallocation(newmat, NULL)); 325 } 326 327 /* Now extract the data pointers and do the copy, column at a time */ 328 newmatd = (Mat_MPIDense *)newmat->data; 329 PetscCall(MatDenseGetArray(newmatd->A, &bv)); 330 PetscCall(MatDenseGetArrayRead(mat->A, &v)); 331 PetscCall(MatDenseGetLDA(mat->A, &lda)); 332 for (i = 0; i < Ncols; i++) { 333 const PetscScalar *av = v + lda * icol[i]; 334 for (j = 0; j < nrows; j++) *bv++ = av[irow[j] - rstart]; 335 } 336 PetscCall(MatDenseRestoreArrayRead(mat->A, &v)); 337 PetscCall(MatDenseRestoreArray(newmatd->A, &bv)); 338 339 /* Assemble the matrices so that the correct flags are set */ 340 PetscCall(MatAssemblyBegin(newmat, MAT_FINAL_ASSEMBLY)); 341 PetscCall(MatAssemblyEnd(newmat, MAT_FINAL_ASSEMBLY)); 342 343 /* Free work space */ 344 PetscCall(ISRestoreIndices(isrow, &irow)); 345 PetscCall(ISRestoreIndices(iscol_local, &icol)); 346 PetscCall(ISDestroy(&iscol_local)); 347 *B = newmat; 348 PetscFunctionReturn(PETSC_SUCCESS); 349 } 350 351 static PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A, PetscScalar **array) 352 { 353 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 354 355 PetscFunctionBegin; 356 PetscCall(MatDenseRestoreArray(a->A, array)); 357 PetscFunctionReturn(PETSC_SUCCESS); 358 } 359 360 static PetscErrorCode MatDenseRestoreArrayRead_MPIDense(Mat A, const PetscScalar **array) 361 { 362 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 363 364 PetscFunctionBegin; 365 PetscCall(MatDenseRestoreArrayRead(a->A, array)); 366 PetscFunctionReturn(PETSC_SUCCESS); 367 } 368 369 static PetscErrorCode MatDenseRestoreArrayWrite_MPIDense(Mat A, PetscScalar **array) 370 { 371 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 372 373 PetscFunctionBegin; 374 PetscCall(MatDenseRestoreArrayWrite(a->A, array)); 375 PetscFunctionReturn(PETSC_SUCCESS); 376 } 377 378 static PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat, MatAssemblyType mode) 379 { 380 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 381 PetscInt nstash, reallocs; 382 383 PetscFunctionBegin; 384 if (mdn->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS); 385 386 PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range)); 387 PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs)); 388 PetscCall(PetscInfo(mdn->A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 389 PetscFunctionReturn(PETSC_SUCCESS); 390 } 391 392 static PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat, MatAssemblyType mode) 393 { 394 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 395 PetscInt i, *row, *col, flg, j, rstart, ncols; 396 PetscMPIInt n; 397 PetscScalar *val; 398 399 PetscFunctionBegin; 400 if (!mdn->donotstash && !mat->nooffprocentries) { 401 /* wait on receives */ 402 while (1) { 403 PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg)); 404 if (!flg) break; 405 406 for (i = 0; i < n;) { 407 /* Now identify the consecutive vals belonging to the same row */ 408 for (j = i, rstart = row[j]; j < n; j++) { 409 if (row[j] != rstart) break; 410 } 411 if (j < n) ncols = j - i; 412 else ncols = n - i; 413 /* Now assemble all these values with a single function call */ 414 PetscCall(MatSetValues_MPIDense(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode)); 415 i = j; 416 } 417 } 418 PetscCall(MatStashScatterEnd_Private(&mat->stash)); 419 } 420 421 PetscCall(MatAssemblyBegin(mdn->A, mode)); 422 PetscCall(MatAssemblyEnd(mdn->A, mode)); 423 PetscFunctionReturn(PETSC_SUCCESS); 424 } 425 426 static PetscErrorCode MatZeroEntries_MPIDense(Mat A) 427 { 428 Mat_MPIDense *l = (Mat_MPIDense *)A->data; 429 430 PetscFunctionBegin; 431 PetscCall(MatZeroEntries(l->A)); 432 PetscFunctionReturn(PETSC_SUCCESS); 433 } 434 435 static PetscErrorCode MatZeroRows_MPIDense(Mat A, PetscInt n, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 436 { 437 Mat_MPIDense *l = (Mat_MPIDense *)A->data; 438 PetscInt i, len, *lrows; 439 440 PetscFunctionBegin; 441 /* get locally owned rows */ 442 PetscCall(PetscLayoutMapLocal(A->rmap, n, rows, &len, &lrows, NULL)); 443 /* fix right hand side if needed */ 444 if (x && b) { 445 const PetscScalar *xx; 446 PetscScalar *bb; 447 448 PetscCall(VecGetArrayRead(x, &xx)); 449 PetscCall(VecGetArrayWrite(b, &bb)); 450 for (i = 0; i < len; ++i) bb[lrows[i]] = diag * xx[lrows[i]]; 451 PetscCall(VecRestoreArrayRead(x, &xx)); 452 PetscCall(VecRestoreArrayWrite(b, &bb)); 453 } 454 PetscCall(MatZeroRows(l->A, len, lrows, 0.0, NULL, NULL)); 455 if (diag != 0.0) { 456 Vec d; 457 458 PetscCall(MatCreateVecs(A, NULL, &d)); 459 PetscCall(VecSet(d, diag)); 460 PetscCall(MatDiagonalSet(A, d, INSERT_VALUES)); 461 PetscCall(VecDestroy(&d)); 462 } 463 PetscCall(PetscFree(lrows)); 464 PetscFunctionReturn(PETSC_SUCCESS); 465 } 466 467 PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat, Vec, Vec); 468 PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat, Vec, Vec, Vec); 469 PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat, Vec, Vec); 470 PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat, Vec, Vec, Vec); 471 472 static PetscErrorCode MatMult_MPIDense(Mat mat, Vec xx, Vec yy) 473 { 474 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 475 const PetscScalar *ax; 476 PetscScalar *ay; 477 PetscMemType axmtype, aymtype; 478 479 PetscFunctionBegin; 480 if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat)); 481 PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype)); 482 PetscCall(VecGetArrayAndMemType(mdn->lvec, &ay, &aymtype)); 483 PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE)); 484 PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE)); 485 PetscCall(VecRestoreArrayAndMemType(mdn->lvec, &ay)); 486 PetscCall(VecRestoreArrayReadAndMemType(xx, &ax)); 487 PetscCall((*mdn->A->ops->mult)(mdn->A, mdn->lvec, yy)); 488 PetscFunctionReturn(PETSC_SUCCESS); 489 } 490 491 static PetscErrorCode MatMultAdd_MPIDense(Mat mat, Vec xx, Vec yy, Vec zz) 492 { 493 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 494 const PetscScalar *ax; 495 PetscScalar *ay; 496 PetscMemType axmtype, aymtype; 497 498 PetscFunctionBegin; 499 if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat)); 500 PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype)); 501 PetscCall(VecGetArrayAndMemType(mdn->lvec, &ay, &aymtype)); 502 PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE)); 503 PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE)); 504 PetscCall(VecRestoreArrayAndMemType(mdn->lvec, &ay)); 505 PetscCall(VecRestoreArrayReadAndMemType(xx, &ax)); 506 PetscCall((*mdn->A->ops->multadd)(mdn->A, mdn->lvec, yy, zz)); 507 PetscFunctionReturn(PETSC_SUCCESS); 508 } 509 510 static PetscErrorCode MatMultTranspose_MPIDense(Mat A, Vec xx, Vec yy) 511 { 512 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 513 const PetscScalar *ax; 514 PetscScalar *ay; 515 PetscMemType axmtype, aymtype; 516 517 PetscFunctionBegin; 518 if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A)); 519 PetscCall(VecSet(yy, 0.0)); 520 PetscCall((*a->A->ops->multtranspose)(a->A, xx, a->lvec)); 521 PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype)); 522 PetscCall(VecGetArrayAndMemType(yy, &ay, &aymtype)); 523 PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM)); 524 PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM)); 525 PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax)); 526 PetscCall(VecRestoreArrayAndMemType(yy, &ay)); 527 PetscFunctionReturn(PETSC_SUCCESS); 528 } 529 530 static PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A, Vec xx, Vec yy, Vec zz) 531 { 532 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 533 const PetscScalar *ax; 534 PetscScalar *ay; 535 PetscMemType axmtype, aymtype; 536 537 PetscFunctionBegin; 538 if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A)); 539 PetscCall(VecCopy(yy, zz)); 540 PetscCall((*a->A->ops->multtranspose)(a->A, xx, a->lvec)); 541 PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype)); 542 PetscCall(VecGetArrayAndMemType(zz, &ay, &aymtype)); 543 PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM)); 544 PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM)); 545 PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax)); 546 PetscCall(VecRestoreArrayAndMemType(zz, &ay)); 547 PetscFunctionReturn(PETSC_SUCCESS); 548 } 549 550 PetscErrorCode MatGetDiagonal_MPIDense(Mat A, Vec v) 551 { 552 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 553 PetscInt lda, len, i, nl, ng, m = A->rmap->n, radd; 554 PetscScalar *x; 555 const PetscScalar *av; 556 557 PetscFunctionBegin; 558 PetscCall(VecGetArray(v, &x)); 559 PetscCall(VecGetSize(v, &ng)); 560 PetscCheck(ng == A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming mat and vec"); 561 PetscCall(VecGetLocalSize(v, &nl)); 562 len = PetscMin(a->A->rmap->n, a->A->cmap->n); 563 radd = A->rmap->rstart * m; 564 PetscCall(MatDenseGetArrayRead(a->A, &av)); 565 PetscCall(MatDenseGetLDA(a->A, &lda)); 566 for (i = 0; i < len; i++) x[i] = av[radd + i * lda + i]; 567 PetscCall(MatDenseRestoreArrayRead(a->A, &av)); 568 PetscCall(PetscArrayzero(x + i, nl - i)); 569 PetscCall(VecRestoreArray(v, &x)); 570 PetscFunctionReturn(PETSC_SUCCESS); 571 } 572 573 static PetscErrorCode MatDestroy_MPIDense(Mat mat) 574 { 575 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 576 577 PetscFunctionBegin; 578 PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N)); 579 PetscCall(MatStashDestroy_Private(&mat->stash)); 580 PetscCheck(!mdn->vecinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 581 PetscCheck(!mdn->matinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 582 PetscCall(MatDestroy(&mdn->A)); 583 PetscCall(VecDestroy(&mdn->lvec)); 584 PetscCall(PetscSFDestroy(&mdn->Mvctx)); 585 PetscCall(VecDestroy(&mdn->cvec)); 586 PetscCall(MatDestroy(&mdn->cmat)); 587 588 PetscCall(PetscFree(mat->data)); 589 PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL)); 590 591 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetLDA_C", NULL)); 592 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseSetLDA_C", NULL)); 593 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArray_C", NULL)); 594 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArray_C", NULL)); 595 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayRead_C", NULL)); 596 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayRead_C", NULL)); 597 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayWrite_C", NULL)); 598 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayWrite_C", NULL)); 599 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDensePlaceArray_C", NULL)); 600 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseResetArray_C", NULL)); 601 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseReplaceArray_C", NULL)); 602 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", NULL)); 603 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpiaij_C", NULL)); 604 #if defined(PETSC_HAVE_ELEMENTAL) 605 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_elemental_C", NULL)); 606 #endif 607 #if defined(PETSC_HAVE_SCALAPACK) 608 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_scalapack_C", NULL)); 609 #endif 610 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIDenseSetPreallocation_C", NULL)); 611 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidense_C", NULL)); 612 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaij_C", NULL)); 613 #if defined(PETSC_HAVE_CUDA) 614 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidense_C", NULL)); 615 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijcusparse_C", NULL)); 616 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensecuda_C", NULL)); 617 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidensecuda_mpidense_C", NULL)); 618 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidensecuda_C", NULL)); 619 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C", NULL)); 620 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensecuda_mpiaij_C", NULL)); 621 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C", NULL)); 622 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArray_C", NULL)); 623 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArrayRead_C", NULL)); 624 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArrayWrite_C", NULL)); 625 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArray_C", NULL)); 626 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArrayRead_C", NULL)); 627 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArrayWrite_C", NULL)); 628 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAPlaceArray_C", NULL)); 629 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAResetArray_C", NULL)); 630 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAReplaceArray_C", NULL)); 631 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDASetPreallocation_C", NULL)); 632 #endif 633 #if defined(PETSC_HAVE_HIP) 634 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidense_C", NULL)); 635 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijhipsparse_C", NULL)); 636 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensehip_C", NULL)); 637 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidensehip_mpidense_C", NULL)); 638 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidensehip_C", NULL)); 639 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidensehip_C", NULL)); 640 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensehip_mpiaij_C", NULL)); 641 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensehip_mpiaijhipsparse_C", NULL)); 642 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArray_C", NULL)); 643 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArrayRead_C", NULL)); 644 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArrayWrite_C", NULL)); 645 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArray_C", NULL)); 646 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArrayRead_C", NULL)); 647 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArrayWrite_C", NULL)); 648 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPPlaceArray_C", NULL)); 649 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPResetArray_C", NULL)); 650 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPReplaceArray_C", NULL)); 651 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPSetPreallocation_C", NULL)); 652 #endif 653 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumn_C", NULL)); 654 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumn_C", NULL)); 655 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", NULL)); 656 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", NULL)); 657 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", NULL)); 658 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", NULL)); 659 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", NULL)); 660 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", NULL)); 661 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetSubMatrix_C", NULL)); 662 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreSubMatrix_C", NULL)); 663 664 PetscCall(PetscObjectCompose((PetscObject)mat, "DiagonalBlock", NULL)); 665 PetscFunctionReturn(PETSC_SUCCESS); 666 } 667 668 PETSC_INTERN PetscErrorCode MatView_SeqDense(Mat, PetscViewer); 669 670 #include <petscdraw.h> 671 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat, PetscViewer viewer) 672 { 673 Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data; 674 PetscMPIInt rank; 675 PetscViewerType vtype; 676 PetscBool iascii, isdraw; 677 PetscViewer sviewer; 678 PetscViewerFormat format; 679 680 PetscFunctionBegin; 681 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 682 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 683 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 684 if (iascii) { 685 PetscCall(PetscViewerGetType(viewer, &vtype)); 686 PetscCall(PetscViewerGetFormat(viewer, &format)); 687 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 688 MatInfo info; 689 PetscCall(MatGetInfo(mat, MAT_LOCAL, &info)); 690 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 691 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %" PetscInt_FMT " \n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, 692 (PetscInt)info.memory)); 693 PetscCall(PetscViewerFlush(viewer)); 694 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 695 if (mdn->Mvctx) PetscCall(PetscSFView(mdn->Mvctx, viewer)); 696 PetscFunctionReturn(PETSC_SUCCESS); 697 } else if (format == PETSC_VIEWER_ASCII_INFO) { 698 PetscFunctionReturn(PETSC_SUCCESS); 699 } 700 } else if (isdraw) { 701 PetscDraw draw; 702 PetscBool isnull; 703 704 PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw)); 705 PetscCall(PetscDrawIsNull(draw, &isnull)); 706 if (isnull) PetscFunctionReturn(PETSC_SUCCESS); 707 } 708 709 { 710 /* assemble the entire matrix onto first processor. */ 711 Mat A; 712 PetscInt M = mat->rmap->N, N = mat->cmap->N, m, row, i, nz; 713 PetscInt *cols; 714 PetscScalar *vals; 715 716 PetscCall(MatCreate(PetscObjectComm((PetscObject)mat), &A)); 717 if (rank == 0) { 718 PetscCall(MatSetSizes(A, M, N, M, N)); 719 } else { 720 PetscCall(MatSetSizes(A, 0, 0, M, N)); 721 } 722 /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */ 723 PetscCall(MatSetType(A, MATMPIDENSE)); 724 PetscCall(MatMPIDenseSetPreallocation(A, NULL)); 725 726 /* Copy the matrix ... This isn't the most efficient means, 727 but it's quick for now */ 728 A->insertmode = INSERT_VALUES; 729 730 row = mat->rmap->rstart; 731 m = mdn->A->rmap->n; 732 for (i = 0; i < m; i++) { 733 PetscCall(MatGetRow_MPIDense(mat, row, &nz, &cols, &vals)); 734 PetscCall(MatSetValues_MPIDense(A, 1, &row, nz, cols, vals, INSERT_VALUES)); 735 PetscCall(MatRestoreRow_MPIDense(mat, row, &nz, &cols, &vals)); 736 row++; 737 } 738 739 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 740 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 741 PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 742 if (rank == 0) { 743 PetscCall(PetscObjectSetName((PetscObject)((Mat_MPIDense *)(A->data))->A, ((PetscObject)mat)->name)); 744 PetscCall(MatView_SeqDense(((Mat_MPIDense *)(A->data))->A, sviewer)); 745 } 746 PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 747 PetscCall(PetscViewerFlush(viewer)); 748 PetscCall(MatDestroy(&A)); 749 } 750 PetscFunctionReturn(PETSC_SUCCESS); 751 } 752 753 static PetscErrorCode MatView_MPIDense(Mat mat, PetscViewer viewer) 754 { 755 PetscBool iascii, isbinary, isdraw, issocket; 756 757 PetscFunctionBegin; 758 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 759 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 760 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket)); 761 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 762 763 if (iascii || issocket || isdraw) { 764 PetscCall(MatView_MPIDense_ASCIIorDraworSocket(mat, viewer)); 765 } else if (isbinary) PetscCall(MatView_Dense_Binary(mat, viewer)); 766 PetscFunctionReturn(PETSC_SUCCESS); 767 } 768 769 static PetscErrorCode MatGetInfo_MPIDense(Mat A, MatInfoType flag, MatInfo *info) 770 { 771 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 772 Mat mdn = mat->A; 773 PetscLogDouble isend[5], irecv[5]; 774 775 PetscFunctionBegin; 776 info->block_size = 1.0; 777 778 PetscCall(MatGetInfo(mdn, MAT_LOCAL, info)); 779 780 isend[0] = info->nz_used; 781 isend[1] = info->nz_allocated; 782 isend[2] = info->nz_unneeded; 783 isend[3] = info->memory; 784 isend[4] = info->mallocs; 785 if (flag == MAT_LOCAL) { 786 info->nz_used = isend[0]; 787 info->nz_allocated = isend[1]; 788 info->nz_unneeded = isend[2]; 789 info->memory = isend[3]; 790 info->mallocs = isend[4]; 791 } else if (flag == MAT_GLOBAL_MAX) { 792 PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)A))); 793 794 info->nz_used = irecv[0]; 795 info->nz_allocated = irecv[1]; 796 info->nz_unneeded = irecv[2]; 797 info->memory = irecv[3]; 798 info->mallocs = irecv[4]; 799 } else if (flag == MAT_GLOBAL_SUM) { 800 PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)A))); 801 802 info->nz_used = irecv[0]; 803 info->nz_allocated = irecv[1]; 804 info->nz_unneeded = irecv[2]; 805 info->memory = irecv[3]; 806 info->mallocs = irecv[4]; 807 } 808 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 809 info->fill_ratio_needed = 0; 810 info->factor_mallocs = 0; 811 PetscFunctionReturn(PETSC_SUCCESS); 812 } 813 814 static PetscErrorCode MatSetOption_MPIDense(Mat A, MatOption op, PetscBool flg) 815 { 816 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 817 818 PetscFunctionBegin; 819 switch (op) { 820 case MAT_NEW_NONZERO_LOCATIONS: 821 case MAT_NEW_NONZERO_LOCATION_ERR: 822 case MAT_NEW_NONZERO_ALLOCATION_ERR: 823 MatCheckPreallocated(A, 1); 824 PetscCall(MatSetOption(a->A, op, flg)); 825 break; 826 case MAT_ROW_ORIENTED: 827 MatCheckPreallocated(A, 1); 828 a->roworiented = flg; 829 PetscCall(MatSetOption(a->A, op, flg)); 830 break; 831 case MAT_FORCE_DIAGONAL_ENTRIES: 832 case MAT_KEEP_NONZERO_PATTERN: 833 case MAT_USE_HASH_TABLE: 834 case MAT_SORTED_FULL: 835 PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op])); 836 break; 837 case MAT_IGNORE_OFF_PROC_ENTRIES: 838 a->donotstash = flg; 839 break; 840 case MAT_SYMMETRIC: 841 case MAT_STRUCTURALLY_SYMMETRIC: 842 case MAT_HERMITIAN: 843 case MAT_SYMMETRY_ETERNAL: 844 case MAT_STRUCTURAL_SYMMETRY_ETERNAL: 845 case MAT_SPD: 846 case MAT_IGNORE_LOWER_TRIANGULAR: 847 case MAT_IGNORE_ZERO_ENTRIES: 848 case MAT_SPD_ETERNAL: 849 /* if the diagonal matrix is square it inherits some of the properties above */ 850 PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op])); 851 break; 852 default: 853 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %s", MatOptions[op]); 854 } 855 PetscFunctionReturn(PETSC_SUCCESS); 856 } 857 858 static PetscErrorCode MatDiagonalScale_MPIDense(Mat A, Vec ll, Vec rr) 859 { 860 Mat_MPIDense *mdn = (Mat_MPIDense *)A->data; 861 const PetscScalar *l; 862 PetscScalar x, *v, *vv, *r; 863 PetscInt i, j, s2a, s3a, s2, s3, m = mdn->A->rmap->n, n = mdn->A->cmap->n, lda; 864 865 PetscFunctionBegin; 866 PetscCall(MatDenseGetArray(mdn->A, &vv)); 867 PetscCall(MatDenseGetLDA(mdn->A, &lda)); 868 PetscCall(MatGetLocalSize(A, &s2, &s3)); 869 if (ll) { 870 PetscCall(VecGetLocalSize(ll, &s2a)); 871 PetscCheck(s2a == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Left scaling vector non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT, s2a, s2); 872 PetscCall(VecGetArrayRead(ll, &l)); 873 for (i = 0; i < m; i++) { 874 x = l[i]; 875 v = vv + i; 876 for (j = 0; j < n; j++) { 877 (*v) *= x; 878 v += lda; 879 } 880 } 881 PetscCall(VecRestoreArrayRead(ll, &l)); 882 PetscCall(PetscLogFlops(1.0 * n * m)); 883 } 884 if (rr) { 885 const PetscScalar *ar; 886 887 PetscCall(VecGetLocalSize(rr, &s3a)); 888 PetscCheck(s3a == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Right scaling vec non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT ".", s3a, s3); 889 PetscCall(VecGetArrayRead(rr, &ar)); 890 if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A)); 891 PetscCall(VecGetArray(mdn->lvec, &r)); 892 PetscCall(PetscSFBcastBegin(mdn->Mvctx, MPIU_SCALAR, ar, r, MPI_REPLACE)); 893 PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ar, r, MPI_REPLACE)); 894 PetscCall(VecRestoreArrayRead(rr, &ar)); 895 for (i = 0; i < n; i++) { 896 x = r[i]; 897 v = vv + i * lda; 898 for (j = 0; j < m; j++) (*v++) *= x; 899 } 900 PetscCall(VecRestoreArray(mdn->lvec, &r)); 901 PetscCall(PetscLogFlops(1.0 * n * m)); 902 } 903 PetscCall(MatDenseRestoreArray(mdn->A, &vv)); 904 PetscFunctionReturn(PETSC_SUCCESS); 905 } 906 907 static PetscErrorCode MatNorm_MPIDense(Mat A, NormType type, PetscReal *nrm) 908 { 909 Mat_MPIDense *mdn = (Mat_MPIDense *)A->data; 910 PetscInt i, j; 911 PetscMPIInt size; 912 PetscReal sum = 0.0; 913 const PetscScalar *av, *v; 914 915 PetscFunctionBegin; 916 PetscCall(MatDenseGetArrayRead(mdn->A, &av)); 917 v = av; 918 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 919 if (size == 1) { 920 PetscCall(MatNorm(mdn->A, type, nrm)); 921 } else { 922 if (type == NORM_FROBENIUS) { 923 for (i = 0; i < mdn->A->cmap->n * mdn->A->rmap->n; i++) { 924 sum += PetscRealPart(PetscConj(*v) * (*v)); 925 v++; 926 } 927 PetscCall(MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A))); 928 *nrm = PetscSqrtReal(*nrm); 929 PetscCall(PetscLogFlops(2.0 * mdn->A->cmap->n * mdn->A->rmap->n)); 930 } else if (type == NORM_1) { 931 PetscReal *tmp, *tmp2; 932 PetscCall(PetscCalloc2(A->cmap->N, &tmp, A->cmap->N, &tmp2)); 933 *nrm = 0.0; 934 v = av; 935 for (j = 0; j < mdn->A->cmap->n; j++) { 936 for (i = 0; i < mdn->A->rmap->n; i++) { 937 tmp[j] += PetscAbsScalar(*v); 938 v++; 939 } 940 } 941 PetscCall(MPIU_Allreduce(tmp, tmp2, A->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A))); 942 for (j = 0; j < A->cmap->N; j++) { 943 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 944 } 945 PetscCall(PetscFree2(tmp, tmp2)); 946 PetscCall(PetscLogFlops(A->cmap->n * A->rmap->n)); 947 } else if (type == NORM_INFINITY) { /* max row norm */ 948 PetscReal ntemp; 949 PetscCall(MatNorm(mdn->A, type, &ntemp)); 950 PetscCall(MPIU_Allreduce(&ntemp, nrm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A))); 951 } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "No support for two norm"); 952 } 953 PetscCall(MatDenseRestoreArrayRead(mdn->A, &av)); 954 PetscFunctionReturn(PETSC_SUCCESS); 955 } 956 957 static PetscErrorCode MatTranspose_MPIDense(Mat A, MatReuse reuse, Mat *matout) 958 { 959 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 960 Mat B; 961 PetscInt M = A->rmap->N, N = A->cmap->N, m, n, *rwork, rstart = A->rmap->rstart; 962 PetscInt j, i, lda; 963 PetscScalar *v; 964 965 PetscFunctionBegin; 966 if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout)); 967 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) { 968 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 969 PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M)); 970 PetscCall(MatSetType(B, ((PetscObject)A)->type_name)); 971 PetscCall(MatMPIDenseSetPreallocation(B, NULL)); 972 } else B = *matout; 973 974 m = a->A->rmap->n; 975 n = a->A->cmap->n; 976 PetscCall(MatDenseGetArrayRead(a->A, (const PetscScalar **)&v)); 977 PetscCall(MatDenseGetLDA(a->A, &lda)); 978 PetscCall(PetscMalloc1(m, &rwork)); 979 for (i = 0; i < m; i++) rwork[i] = rstart + i; 980 for (j = 0; j < n; j++) { 981 PetscCall(MatSetValues(B, 1, &j, m, rwork, v, INSERT_VALUES)); 982 v += lda; 983 } 984 PetscCall(MatDenseRestoreArrayRead(a->A, (const PetscScalar **)&v)); 985 PetscCall(PetscFree(rwork)); 986 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 987 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 988 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 989 *matout = B; 990 } else { 991 PetscCall(MatHeaderMerge(A, &B)); 992 } 993 PetscFunctionReturn(PETSC_SUCCESS); 994 } 995 996 static PetscErrorCode MatDuplicate_MPIDense(Mat, MatDuplicateOption, Mat *); 997 PETSC_INTERN PetscErrorCode MatScale_MPIDense(Mat, PetscScalar); 998 999 static PetscErrorCode MatSetUp_MPIDense(Mat A) 1000 { 1001 PetscFunctionBegin; 1002 PetscCall(PetscLayoutSetUp(A->rmap)); 1003 PetscCall(PetscLayoutSetUp(A->cmap)); 1004 if (!A->preallocated) PetscCall(MatMPIDenseSetPreallocation(A, NULL)); 1005 PetscFunctionReturn(PETSC_SUCCESS); 1006 } 1007 1008 static PetscErrorCode MatAXPY_MPIDense(Mat Y, PetscScalar alpha, Mat X, MatStructure str) 1009 { 1010 Mat_MPIDense *A = (Mat_MPIDense *)Y->data, *B = (Mat_MPIDense *)X->data; 1011 1012 PetscFunctionBegin; 1013 PetscCall(MatAXPY(A->A, alpha, B->A, str)); 1014 PetscFunctionReturn(PETSC_SUCCESS); 1015 } 1016 1017 static PetscErrorCode MatConjugate_MPIDense(Mat mat) 1018 { 1019 Mat_MPIDense *a = (Mat_MPIDense *)mat->data; 1020 1021 PetscFunctionBegin; 1022 PetscCall(MatConjugate(a->A)); 1023 PetscFunctionReturn(PETSC_SUCCESS); 1024 } 1025 1026 static PetscErrorCode MatRealPart_MPIDense(Mat A) 1027 { 1028 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1029 1030 PetscFunctionBegin; 1031 PetscCall(MatRealPart(a->A)); 1032 PetscFunctionReturn(PETSC_SUCCESS); 1033 } 1034 1035 static PetscErrorCode MatImaginaryPart_MPIDense(Mat A) 1036 { 1037 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1038 1039 PetscFunctionBegin; 1040 PetscCall(MatImaginaryPart(a->A)); 1041 PetscFunctionReturn(PETSC_SUCCESS); 1042 } 1043 1044 static PetscErrorCode MatGetColumnVector_MPIDense(Mat A, Vec v, PetscInt col) 1045 { 1046 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1047 1048 PetscFunctionBegin; 1049 PetscCheck(a->A, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Missing local matrix"); 1050 PetscCheck(a->A->ops->getcolumnvector, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Missing get column operation"); 1051 PetscCall((*a->A->ops->getcolumnvector)(a->A, v, col)); 1052 PetscFunctionReturn(PETSC_SUCCESS); 1053 } 1054 1055 PETSC_INTERN PetscErrorCode MatGetColumnReductions_SeqDense(Mat, PetscInt, PetscReal *); 1056 1057 static PetscErrorCode MatGetColumnReductions_MPIDense(Mat A, PetscInt type, PetscReal *reductions) 1058 { 1059 PetscInt i, m, n; 1060 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1061 PetscReal *work; 1062 1063 PetscFunctionBegin; 1064 PetscCall(MatGetSize(A, &m, &n)); 1065 PetscCall(PetscMalloc1(n, &work)); 1066 if (type == REDUCTION_MEAN_REALPART) { 1067 PetscCall(MatGetColumnReductions_SeqDense(a->A, (PetscInt)REDUCTION_SUM_REALPART, work)); 1068 } else if (type == REDUCTION_MEAN_IMAGINARYPART) { 1069 PetscCall(MatGetColumnReductions_SeqDense(a->A, (PetscInt)REDUCTION_SUM_IMAGINARYPART, work)); 1070 } else { 1071 PetscCall(MatGetColumnReductions_SeqDense(a->A, type, work)); 1072 } 1073 if (type == NORM_2) { 1074 for (i = 0; i < n; i++) work[i] *= work[i]; 1075 } 1076 if (type == NORM_INFINITY) { 1077 PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_MAX, A->hdr.comm)); 1078 } else { 1079 PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_SUM, A->hdr.comm)); 1080 } 1081 PetscCall(PetscFree(work)); 1082 if (type == NORM_2) { 1083 for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]); 1084 } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 1085 for (i = 0; i < n; i++) reductions[i] /= m; 1086 } 1087 PetscFunctionReturn(PETSC_SUCCESS); 1088 } 1089 1090 static PetscErrorCode MatSetRandom_MPIDense(Mat x, PetscRandom rctx) 1091 { 1092 Mat_MPIDense *d = (Mat_MPIDense *)x->data; 1093 1094 PetscFunctionBegin; 1095 PetscCall(MatSetRandom(d->A, rctx)); 1096 #if defined(PETSC_HAVE_DEVICE) 1097 x->offloadmask = d->A->offloadmask; 1098 #endif 1099 PetscFunctionReturn(PETSC_SUCCESS); 1100 } 1101 1102 static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A, PetscBool *missing, PetscInt *d) 1103 { 1104 PetscFunctionBegin; 1105 *missing = PETSC_FALSE; 1106 PetscFunctionReturn(PETSC_SUCCESS); 1107 } 1108 1109 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat, Mat, PetscReal, Mat); 1110 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat, Mat, Mat); 1111 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat, Mat, PetscReal, Mat); 1112 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat, Mat, Mat); 1113 static PetscErrorCode MatEqual_MPIDense(Mat, Mat, PetscBool *); 1114 static PetscErrorCode MatLoad_MPIDense(Mat, PetscViewer); 1115 1116 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense, 1117 MatGetRow_MPIDense, 1118 MatRestoreRow_MPIDense, 1119 MatMult_MPIDense, 1120 /* 4*/ MatMultAdd_MPIDense, 1121 MatMultTranspose_MPIDense, 1122 MatMultTransposeAdd_MPIDense, 1123 NULL, 1124 NULL, 1125 NULL, 1126 /* 10*/ NULL, 1127 NULL, 1128 NULL, 1129 NULL, 1130 MatTranspose_MPIDense, 1131 /* 15*/ MatGetInfo_MPIDense, 1132 MatEqual_MPIDense, 1133 MatGetDiagonal_MPIDense, 1134 MatDiagonalScale_MPIDense, 1135 MatNorm_MPIDense, 1136 /* 20*/ MatAssemblyBegin_MPIDense, 1137 MatAssemblyEnd_MPIDense, 1138 MatSetOption_MPIDense, 1139 MatZeroEntries_MPIDense, 1140 /* 24*/ MatZeroRows_MPIDense, 1141 NULL, 1142 NULL, 1143 NULL, 1144 NULL, 1145 /* 29*/ MatSetUp_MPIDense, 1146 NULL, 1147 NULL, 1148 MatGetDiagonalBlock_MPIDense, 1149 NULL, 1150 /* 34*/ MatDuplicate_MPIDense, 1151 NULL, 1152 NULL, 1153 NULL, 1154 NULL, 1155 /* 39*/ MatAXPY_MPIDense, 1156 MatCreateSubMatrices_MPIDense, 1157 NULL, 1158 MatGetValues_MPIDense, 1159 MatCopy_MPIDense, 1160 /* 44*/ NULL, 1161 MatScale_MPIDense, 1162 MatShift_MPIDense, 1163 NULL, 1164 NULL, 1165 /* 49*/ MatSetRandom_MPIDense, 1166 NULL, 1167 NULL, 1168 NULL, 1169 NULL, 1170 /* 54*/ NULL, 1171 NULL, 1172 NULL, 1173 NULL, 1174 NULL, 1175 /* 59*/ MatCreateSubMatrix_MPIDense, 1176 MatDestroy_MPIDense, 1177 MatView_MPIDense, 1178 NULL, 1179 NULL, 1180 /* 64*/ NULL, 1181 NULL, 1182 NULL, 1183 NULL, 1184 NULL, 1185 /* 69*/ NULL, 1186 NULL, 1187 NULL, 1188 NULL, 1189 NULL, 1190 /* 74*/ NULL, 1191 NULL, 1192 NULL, 1193 NULL, 1194 NULL, 1195 /* 79*/ NULL, 1196 NULL, 1197 NULL, 1198 NULL, 1199 /* 83*/ MatLoad_MPIDense, 1200 NULL, 1201 NULL, 1202 NULL, 1203 NULL, 1204 NULL, 1205 /* 89*/ NULL, 1206 NULL, 1207 NULL, 1208 NULL, 1209 NULL, 1210 /* 94*/ NULL, 1211 NULL, 1212 MatMatTransposeMultSymbolic_MPIDense_MPIDense, 1213 MatMatTransposeMultNumeric_MPIDense_MPIDense, 1214 NULL, 1215 /* 99*/ MatProductSetFromOptions_MPIDense, 1216 NULL, 1217 NULL, 1218 MatConjugate_MPIDense, 1219 NULL, 1220 /*104*/ NULL, 1221 MatRealPart_MPIDense, 1222 MatImaginaryPart_MPIDense, 1223 NULL, 1224 NULL, 1225 /*109*/ NULL, 1226 NULL, 1227 NULL, 1228 MatGetColumnVector_MPIDense, 1229 MatMissingDiagonal_MPIDense, 1230 /*114*/ NULL, 1231 NULL, 1232 NULL, 1233 NULL, 1234 NULL, 1235 /*119*/ NULL, 1236 NULL, 1237 NULL, 1238 NULL, 1239 NULL, 1240 /*124*/ NULL, 1241 MatGetColumnReductions_MPIDense, 1242 NULL, 1243 NULL, 1244 NULL, 1245 /*129*/ NULL, 1246 NULL, 1247 MatTransposeMatMultSymbolic_MPIDense_MPIDense, 1248 MatTransposeMatMultNumeric_MPIDense_MPIDense, 1249 NULL, 1250 /*134*/ NULL, 1251 NULL, 1252 NULL, 1253 NULL, 1254 NULL, 1255 /*139*/ NULL, 1256 NULL, 1257 NULL, 1258 NULL, 1259 NULL, 1260 MatCreateMPIMatConcatenateSeqMat_MPIDense, 1261 /*145*/ NULL, 1262 NULL, 1263 NULL, 1264 NULL, 1265 NULL, 1266 /*150*/ NULL, 1267 NULL}; 1268 1269 static PetscErrorCode MatMPIDenseSetPreallocation_MPIDense(Mat mat, PetscScalar *data) 1270 { 1271 Mat_MPIDense *a = (Mat_MPIDense *)mat->data; 1272 MatType mtype = MATSEQDENSE; 1273 1274 PetscFunctionBegin; 1275 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1276 PetscCall(PetscLayoutSetUp(mat->rmap)); 1277 PetscCall(PetscLayoutSetUp(mat->cmap)); 1278 if (!a->A) { 1279 PetscCall(MatCreate(PETSC_COMM_SELF, &a->A)); 1280 PetscCall(MatSetSizes(a->A, mat->rmap->n, mat->cmap->N, mat->rmap->n, mat->cmap->N)); 1281 } 1282 #if defined(PETSC_HAVE_CUDA) 1283 PetscBool iscuda; 1284 PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPIDENSECUDA, &iscuda)); 1285 if (iscuda) mtype = MATSEQDENSECUDA; 1286 #endif 1287 #if defined(PETSC_HAVE_HIP) 1288 PetscBool iship; 1289 PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPIDENSEHIP, &iship)); 1290 if (iship) mtype = MATSEQDENSEHIP; 1291 #endif 1292 PetscCall(MatSetType(a->A, mtype)); 1293 PetscCall(MatSeqDenseSetPreallocation(a->A, data)); 1294 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 1295 mat->offloadmask = a->A->offloadmask; 1296 #endif 1297 mat->preallocated = PETSC_TRUE; 1298 mat->assembled = PETSC_TRUE; 1299 PetscFunctionReturn(PETSC_SUCCESS); 1300 } 1301 1302 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIDense(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 1303 { 1304 Mat B, C; 1305 1306 PetscFunctionBegin; 1307 PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &C)); 1308 PetscCall(MatConvert_SeqAIJ_SeqDense(C, MATSEQDENSE, MAT_INITIAL_MATRIX, &B)); 1309 PetscCall(MatDestroy(&C)); 1310 if (reuse == MAT_REUSE_MATRIX) { 1311 C = *newmat; 1312 } else C = NULL; 1313 PetscCall(MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A), B, A->cmap->n, !C ? MAT_INITIAL_MATRIX : MAT_REUSE_MATRIX, &C)); 1314 PetscCall(MatDestroy(&B)); 1315 if (reuse == MAT_INPLACE_MATRIX) { 1316 PetscCall(MatHeaderReplace(A, &C)); 1317 } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C; 1318 PetscFunctionReturn(PETSC_SUCCESS); 1319 } 1320 1321 static PetscErrorCode MatConvert_MPIDense_MPIAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 1322 { 1323 Mat B, C; 1324 1325 PetscFunctionBegin; 1326 PetscCall(MatDenseGetLocalMatrix(A, &C)); 1327 PetscCall(MatConvert_SeqDense_SeqAIJ(C, MATSEQAIJ, MAT_INITIAL_MATRIX, &B)); 1328 if (reuse == MAT_REUSE_MATRIX) { 1329 C = *newmat; 1330 } else C = NULL; 1331 PetscCall(MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A), B, A->cmap->n, !C ? MAT_INITIAL_MATRIX : MAT_REUSE_MATRIX, &C)); 1332 PetscCall(MatDestroy(&B)); 1333 if (reuse == MAT_INPLACE_MATRIX) { 1334 PetscCall(MatHeaderReplace(A, &C)); 1335 } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C; 1336 PetscFunctionReturn(PETSC_SUCCESS); 1337 } 1338 1339 #if defined(PETSC_HAVE_ELEMENTAL) 1340 PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 1341 { 1342 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1343 Mat mat_elemental; 1344 PetscScalar *v; 1345 PetscInt m = A->rmap->n, N = A->cmap->N, rstart = A->rmap->rstart, i, *rows, *cols, lda; 1346 1347 PetscFunctionBegin; 1348 if (reuse == MAT_REUSE_MATRIX) { 1349 mat_elemental = *newmat; 1350 PetscCall(MatZeroEntries(*newmat)); 1351 } else { 1352 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental)); 1353 PetscCall(MatSetSizes(mat_elemental, PETSC_DECIDE, PETSC_DECIDE, A->rmap->N, A->cmap->N)); 1354 PetscCall(MatSetType(mat_elemental, MATELEMENTAL)); 1355 PetscCall(MatSetUp(mat_elemental)); 1356 PetscCall(MatSetOption(mat_elemental, MAT_ROW_ORIENTED, PETSC_FALSE)); 1357 } 1358 1359 PetscCall(PetscMalloc2(m, &rows, N, &cols)); 1360 for (i = 0; i < N; i++) cols[i] = i; 1361 for (i = 0; i < m; i++) rows[i] = rstart + i; 1362 1363 /* PETSc-Elemental interface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */ 1364 PetscCall(MatDenseGetArray(A, &v)); 1365 PetscCall(MatDenseGetLDA(a->A, &lda)); 1366 if (lda == m) PetscCall(MatSetValues(mat_elemental, m, rows, N, cols, v, ADD_VALUES)); 1367 else { 1368 for (i = 0; i < N; i++) PetscCall(MatSetValues(mat_elemental, m, rows, 1, &i, v + lda * i, ADD_VALUES)); 1369 } 1370 PetscCall(MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY)); 1371 PetscCall(MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY)); 1372 PetscCall(MatDenseRestoreArray(A, &v)); 1373 PetscCall(PetscFree2(rows, cols)); 1374 1375 if (reuse == MAT_INPLACE_MATRIX) { 1376 PetscCall(MatHeaderReplace(A, &mat_elemental)); 1377 } else { 1378 *newmat = mat_elemental; 1379 } 1380 PetscFunctionReturn(PETSC_SUCCESS); 1381 } 1382 #endif 1383 1384 static PetscErrorCode MatDenseGetColumn_MPIDense(Mat A, PetscInt col, PetscScalar **vals) 1385 { 1386 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 1387 1388 PetscFunctionBegin; 1389 PetscCall(MatDenseGetColumn(mat->A, col, vals)); 1390 PetscFunctionReturn(PETSC_SUCCESS); 1391 } 1392 1393 static PetscErrorCode MatDenseRestoreColumn_MPIDense(Mat A, PetscScalar **vals) 1394 { 1395 Mat_MPIDense *mat = (Mat_MPIDense *)A->data; 1396 1397 PetscFunctionBegin; 1398 PetscCall(MatDenseRestoreColumn(mat->A, vals)); 1399 PetscFunctionReturn(PETSC_SUCCESS); 1400 } 1401 1402 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIDense(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat) 1403 { 1404 Mat_MPIDense *mat; 1405 PetscInt m, nloc, N; 1406 1407 PetscFunctionBegin; 1408 PetscCall(MatGetSize(inmat, &m, &N)); 1409 PetscCall(MatGetLocalSize(inmat, NULL, &nloc)); 1410 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 1411 PetscInt sum; 1412 1413 if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnership(comm, &n, &N)); 1414 /* Check sum(n) = N */ 1415 PetscCall(MPIU_Allreduce(&n, &sum, 1, MPIU_INT, MPI_SUM, comm)); 1416 PetscCheck(sum == N, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT, sum, N); 1417 1418 PetscCall(MatCreateDense(comm, m, n, PETSC_DETERMINE, N, NULL, outmat)); 1419 PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 1420 } 1421 1422 /* numeric phase */ 1423 mat = (Mat_MPIDense *)(*outmat)->data; 1424 PetscCall(MatCopy(inmat, mat->A, SAME_NONZERO_PATTERN)); 1425 PetscFunctionReturn(PETSC_SUCCESS); 1426 } 1427 1428 PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat A, PetscInt col, Vec *v) 1429 { 1430 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1431 PetscInt lda; 1432 1433 PetscFunctionBegin; 1434 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1435 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1436 if (!a->cvec) PetscCall(MatDenseCreateColumnVec_Private(A, &a->cvec)); 1437 a->vecinuse = col + 1; 1438 PetscCall(MatDenseGetLDA(a->A, &lda)); 1439 PetscCall(MatDenseGetArray(a->A, (PetscScalar **)&a->ptrinuse)); 1440 PetscCall(VecPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda)); 1441 *v = a->cvec; 1442 PetscFunctionReturn(PETSC_SUCCESS); 1443 } 1444 1445 PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat A, PetscInt col, Vec *v) 1446 { 1447 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1448 1449 PetscFunctionBegin; 1450 PetscCheck(a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first"); 1451 PetscCheck(a->cvec, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing internal column vector"); 1452 a->vecinuse = 0; 1453 PetscCall(MatDenseRestoreArray(a->A, (PetscScalar **)&a->ptrinuse)); 1454 PetscCall(VecResetArray(a->cvec)); 1455 if (v) *v = NULL; 1456 PetscFunctionReturn(PETSC_SUCCESS); 1457 } 1458 1459 PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat A, PetscInt col, Vec *v) 1460 { 1461 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1462 PetscInt lda; 1463 1464 PetscFunctionBegin; 1465 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1466 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1467 if (!a->cvec) PetscCall(MatDenseCreateColumnVec_Private(A, &a->cvec)); 1468 a->vecinuse = col + 1; 1469 PetscCall(MatDenseGetLDA(a->A, &lda)); 1470 PetscCall(MatDenseGetArrayRead(a->A, &a->ptrinuse)); 1471 PetscCall(VecPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda)); 1472 PetscCall(VecLockReadPush(a->cvec)); 1473 *v = a->cvec; 1474 PetscFunctionReturn(PETSC_SUCCESS); 1475 } 1476 1477 PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat A, PetscInt col, Vec *v) 1478 { 1479 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1480 1481 PetscFunctionBegin; 1482 PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first"); 1483 PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector"); 1484 a->vecinuse = 0; 1485 PetscCall(MatDenseRestoreArrayRead(a->A, &a->ptrinuse)); 1486 PetscCall(VecLockReadPop(a->cvec)); 1487 PetscCall(VecResetArray(a->cvec)); 1488 if (v) *v = NULL; 1489 PetscFunctionReturn(PETSC_SUCCESS); 1490 } 1491 1492 PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat A, PetscInt col, Vec *v) 1493 { 1494 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1495 PetscInt lda; 1496 1497 PetscFunctionBegin; 1498 PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1499 PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1500 if (!a->cvec) PetscCall(MatDenseCreateColumnVec_Private(A, &a->cvec)); 1501 a->vecinuse = col + 1; 1502 PetscCall(MatDenseGetLDA(a->A, &lda)); 1503 PetscCall(MatDenseGetArrayWrite(a->A, (PetscScalar **)&a->ptrinuse)); 1504 PetscCall(VecPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda)); 1505 *v = a->cvec; 1506 PetscFunctionReturn(PETSC_SUCCESS); 1507 } 1508 1509 PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat A, PetscInt col, Vec *v) 1510 { 1511 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1512 1513 PetscFunctionBegin; 1514 PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first"); 1515 PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector"); 1516 a->vecinuse = 0; 1517 PetscCall(MatDenseRestoreArrayWrite(a->A, (PetscScalar **)&a->ptrinuse)); 1518 PetscCall(VecResetArray(a->cvec)); 1519 if (v) *v = NULL; 1520 PetscFunctionReturn(PETSC_SUCCESS); 1521 } 1522 1523 static PetscErrorCode MatDenseGetSubMatrix_MPIDense(Mat A, PetscInt rbegin, PetscInt rend, PetscInt cbegin, PetscInt cend, Mat *v) 1524 { 1525 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1526 Mat_MPIDense *c; 1527 MPI_Comm comm; 1528 PetscInt pbegin, pend; 1529 1530 PetscFunctionBegin; 1531 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 1532 PetscCheck(!a->vecinuse, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first"); 1533 PetscCheck(!a->matinuse, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1534 pbegin = PetscMax(0, PetscMin(A->rmap->rend, rbegin) - A->rmap->rstart); 1535 pend = PetscMin(A->rmap->n, PetscMax(0, rend - A->rmap->rstart)); 1536 if (!a->cmat) { 1537 PetscCall(MatCreate(comm, &a->cmat)); 1538 PetscCall(MatSetType(a->cmat, ((PetscObject)A)->type_name)); 1539 if (rend - rbegin == A->rmap->N) PetscCall(PetscLayoutReference(A->rmap, &a->cmat->rmap)); 1540 else { 1541 PetscCall(PetscLayoutSetLocalSize(a->cmat->rmap, pend - pbegin)); 1542 PetscCall(PetscLayoutSetSize(a->cmat->rmap, rend - rbegin)); 1543 PetscCall(PetscLayoutSetUp(a->cmat->rmap)); 1544 } 1545 PetscCall(PetscLayoutSetSize(a->cmat->cmap, cend - cbegin)); 1546 PetscCall(PetscLayoutSetUp(a->cmat->cmap)); 1547 } else { 1548 PetscBool same = (PetscBool)(rend - rbegin == a->cmat->rmap->N); 1549 if (same && a->cmat->rmap->N != A->rmap->N) { 1550 same = (PetscBool)(pend - pbegin == a->cmat->rmap->n); 1551 PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &same, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A))); 1552 } 1553 if (!same) { 1554 PetscCall(PetscLayoutDestroy(&a->cmat->rmap)); 1555 PetscCall(PetscLayoutCreate(comm, &a->cmat->rmap)); 1556 PetscCall(PetscLayoutSetLocalSize(a->cmat->rmap, pend - pbegin)); 1557 PetscCall(PetscLayoutSetSize(a->cmat->rmap, rend - rbegin)); 1558 PetscCall(PetscLayoutSetUp(a->cmat->rmap)); 1559 } 1560 if (cend - cbegin != a->cmat->cmap->N) { 1561 PetscCall(PetscLayoutDestroy(&a->cmat->cmap)); 1562 PetscCall(PetscLayoutCreate(comm, &a->cmat->cmap)); 1563 PetscCall(PetscLayoutSetSize(a->cmat->cmap, cend - cbegin)); 1564 PetscCall(PetscLayoutSetUp(a->cmat->cmap)); 1565 } 1566 } 1567 c = (Mat_MPIDense *)a->cmat->data; 1568 PetscCheck(!c->A, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first"); 1569 PetscCall(MatDenseGetSubMatrix(a->A, pbegin, pend, cbegin, cend, &c->A)); 1570 1571 a->cmat->preallocated = PETSC_TRUE; 1572 a->cmat->assembled = PETSC_TRUE; 1573 #if defined(PETSC_HAVE_DEVICE) 1574 a->cmat->offloadmask = c->A->offloadmask; 1575 #endif 1576 a->matinuse = cbegin + 1; 1577 *v = a->cmat; 1578 PetscFunctionReturn(PETSC_SUCCESS); 1579 } 1580 1581 static PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat A, Mat *v) 1582 { 1583 Mat_MPIDense *a = (Mat_MPIDense *)A->data; 1584 Mat_MPIDense *c; 1585 1586 PetscFunctionBegin; 1587 PetscCheck(a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetSubMatrix() first"); 1588 PetscCheck(a->cmat, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal matrix"); 1589 PetscCheck(*v == a->cmat, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Not the matrix obtained from MatDenseGetSubMatrix()"); 1590 a->matinuse = 0; 1591 c = (Mat_MPIDense *)a->cmat->data; 1592 PetscCall(MatDenseRestoreSubMatrix(a->A, &c->A)); 1593 if (v) *v = NULL; 1594 #if defined(PETSC_HAVE_DEVICE) 1595 A->offloadmask = a->A->offloadmask; 1596 #endif 1597 PetscFunctionReturn(PETSC_SUCCESS); 1598 } 1599 1600 /*MC 1601 MATMPIDENSE - MATMPIDENSE = "mpidense" - A matrix type to be used for distributed dense matrices. 1602 1603 Options Database Key: 1604 . -mat_type mpidense - sets the matrix type to `MATMPIDENSE` during a call to `MatSetFromOptions()` 1605 1606 Level: beginner 1607 1608 .seealso: [](ch_matrices), `Mat`, `MatCreateDense()`, `MATSEQDENSE`, `MATDENSE` 1609 M*/ 1610 PetscErrorCode MatCreate_MPIDense(Mat mat) 1611 { 1612 Mat_MPIDense *a; 1613 1614 PetscFunctionBegin; 1615 PetscCall(PetscNew(&a)); 1616 mat->data = (void *)a; 1617 mat->ops[0] = MatOps_Values; 1618 1619 mat->insertmode = NOT_SET_VALUES; 1620 1621 /* build cache for off array entries formed */ 1622 a->donotstash = PETSC_FALSE; 1623 1624 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)mat), 1, &mat->stash)); 1625 1626 /* stuff used for matrix vector multiply */ 1627 a->lvec = NULL; 1628 a->Mvctx = NULL; 1629 a->roworiented = PETSC_TRUE; 1630 1631 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetLDA_C", MatDenseGetLDA_MPIDense)); 1632 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseSetLDA_C", MatDenseSetLDA_MPIDense)); 1633 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArray_C", MatDenseGetArray_MPIDense)); 1634 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArray_C", MatDenseRestoreArray_MPIDense)); 1635 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayRead_C", MatDenseGetArrayRead_MPIDense)); 1636 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayRead_C", MatDenseRestoreArrayRead_MPIDense)); 1637 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayWrite_C", MatDenseGetArrayWrite_MPIDense)); 1638 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayWrite_C", MatDenseRestoreArrayWrite_MPIDense)); 1639 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDensePlaceArray_C", MatDensePlaceArray_MPIDense)); 1640 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseResetArray_C", MatDenseResetArray_MPIDense)); 1641 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseReplaceArray_C", MatDenseReplaceArray_MPIDense)); 1642 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", MatDenseGetColumnVec_MPIDense)); 1643 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", MatDenseRestoreColumnVec_MPIDense)); 1644 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", MatDenseGetColumnVecRead_MPIDense)); 1645 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", MatDenseRestoreColumnVecRead_MPIDense)); 1646 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", MatDenseGetColumnVecWrite_MPIDense)); 1647 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", MatDenseRestoreColumnVecWrite_MPIDense)); 1648 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetSubMatrix_C", MatDenseGetSubMatrix_MPIDense)); 1649 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreSubMatrix_C", MatDenseRestoreSubMatrix_MPIDense)); 1650 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", MatConvert_MPIAIJ_MPIDense)); 1651 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpiaij_C", MatConvert_MPIDense_MPIAIJ)); 1652 #if defined(PETSC_HAVE_ELEMENTAL) 1653 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_elemental_C", MatConvert_MPIDense_Elemental)); 1654 #endif 1655 #if defined(PETSC_HAVE_SCALAPACK) 1656 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_scalapack_C", MatConvert_Dense_ScaLAPACK)); 1657 #endif 1658 #if defined(PETSC_HAVE_CUDA) 1659 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensecuda_C", MatConvert_MPIDense_MPIDenseCUDA)); 1660 #endif 1661 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIDenseSetPreallocation_C", MatMPIDenseSetPreallocation_MPIDense)); 1662 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense)); 1663 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaij_C", MatProductSetFromOptions_MPIDense_MPIAIJ)); 1664 #if defined(PETSC_HAVE_CUDA) 1665 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense)); 1666 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijcusparse_C", MatProductSetFromOptions_MPIDense_MPIAIJ)); 1667 #endif 1668 #if defined(PETSC_HAVE_HIP) 1669 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensehip_C", MatConvert_MPIDense_MPIDenseHIP)); 1670 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense)); 1671 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijhipsparse_C", MatProductSetFromOptions_MPIDense_MPIAIJ)); 1672 #endif 1673 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumn_C", MatDenseGetColumn_MPIDense)); 1674 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumn_C", MatDenseRestoreColumn_MPIDense)); 1675 PetscCall(PetscObjectChangeTypeName((PetscObject)mat, MATMPIDENSE)); 1676 PetscFunctionReturn(PETSC_SUCCESS); 1677 } 1678 1679 /*MC 1680 MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices. 1681 1682 This matrix type is identical to `MATSEQDENSE` when constructed with a single process communicator, 1683 and `MATMPIDENSE` otherwise. 1684 1685 Options Database Key: 1686 . -mat_type dense - sets the matrix type to `MATDENSE` during a call to `MatSetFromOptions()` 1687 1688 Level: beginner 1689 1690 .seealso: [](ch_matrices), `Mat`, `MATSEQDENSE`, `MATMPIDENSE`, `MATDENSECUDA`, `MATDENSEHIP` 1691 M*/ 1692 1693 /*@C 1694 MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries 1695 1696 Collective 1697 1698 Input Parameters: 1699 + B - the matrix 1700 - data - optional location of matrix data. Set to `NULL` for PETSc 1701 to control all matrix memory allocation. 1702 1703 Level: intermediate 1704 1705 Notes: 1706 The dense format is fully compatible with standard Fortran 1707 storage by columns. 1708 1709 The data input variable is intended primarily for Fortran programmers 1710 who wish to allocate their own matrix memory space. Most users should 1711 set `data` to `NULL`. 1712 1713 .seealso: [](ch_matrices), `Mat`, `MATMPIDENSE`, `MatCreate()`, `MatCreateSeqDense()`, `MatSetValues()` 1714 @*/ 1715 PetscErrorCode MatMPIDenseSetPreallocation(Mat B, PetscScalar *data) 1716 { 1717 PetscFunctionBegin; 1718 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 1719 PetscTryMethod(B, "MatMPIDenseSetPreallocation_C", (Mat, PetscScalar *), (B, data)); 1720 PetscFunctionReturn(PETSC_SUCCESS); 1721 } 1722 1723 /*@ 1724 MatDensePlaceArray - Allows one to replace the array in a `MATDENSE` matrix with an 1725 array provided by the user. This is useful to avoid copying an array 1726 into a matrix 1727 1728 Not Collective 1729 1730 Input Parameters: 1731 + mat - the matrix 1732 - array - the array in column major order 1733 1734 Level: developer 1735 1736 Note: 1737 You can return to the original array with a call to `MatDenseResetArray()`. The user is responsible for freeing this array; it will not be 1738 freed when the matrix is destroyed. 1739 1740 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseGetArray()`, `MatDenseResetArray()`, `VecPlaceArray()`, `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`, `VecResetArray()`, 1741 `MatDenseReplaceArray()` 1742 @*/ 1743 PetscErrorCode MatDensePlaceArray(Mat mat, const PetscScalar *array) 1744 { 1745 PetscFunctionBegin; 1746 PetscValidHeaderSpecific(mat, MAT_CLASSID, 1); 1747 PetscUseMethod(mat, "MatDensePlaceArray_C", (Mat, const PetscScalar *), (mat, array)); 1748 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 1749 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 1750 mat->offloadmask = PETSC_OFFLOAD_CPU; 1751 #endif 1752 PetscFunctionReturn(PETSC_SUCCESS); 1753 } 1754 1755 /*@ 1756 MatDenseResetArray - Resets the matrix array to that it previously had before the call to `MatDensePlaceArray()` 1757 1758 Not Collective 1759 1760 Input Parameter: 1761 . mat - the matrix 1762 1763 Level: developer 1764 1765 Note: 1766 You can only call this after a call to `MatDensePlaceArray()` 1767 1768 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseGetArray()`, `MatDensePlaceArray()`, `VecPlaceArray()`, `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`, `VecResetArray()` 1769 @*/ 1770 PetscErrorCode MatDenseResetArray(Mat mat) 1771 { 1772 PetscFunctionBegin; 1773 PetscValidHeaderSpecific(mat, MAT_CLASSID, 1); 1774 PetscUseMethod(mat, "MatDenseResetArray_C", (Mat), (mat)); 1775 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 1776 PetscFunctionReturn(PETSC_SUCCESS); 1777 } 1778 1779 /*@ 1780 MatDenseReplaceArray - Allows one to replace the array in a dense matrix with an 1781 array provided by the user. This is useful to avoid copying an array 1782 into a matrix 1783 1784 Not Collective 1785 1786 Input Parameters: 1787 + mat - the matrix 1788 - array - the array in column major order 1789 1790 Level: developer 1791 1792 Note: 1793 The memory passed in MUST be obtained with `PetscMalloc()` and CANNOT be 1794 freed by the user. It will be freed when the matrix is destroyed. 1795 1796 .seealso: [](ch_matrices), `Mat`, `MatDensePlaceArray()`, `MatDenseGetArray()`, `VecReplaceArray()` 1797 @*/ 1798 PetscErrorCode MatDenseReplaceArray(Mat mat, const PetscScalar *array) 1799 { 1800 PetscFunctionBegin; 1801 PetscValidHeaderSpecific(mat, MAT_CLASSID, 1); 1802 PetscUseMethod(mat, "MatDenseReplaceArray_C", (Mat, const PetscScalar *), (mat, array)); 1803 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 1804 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) 1805 mat->offloadmask = PETSC_OFFLOAD_CPU; 1806 #endif 1807 PetscFunctionReturn(PETSC_SUCCESS); 1808 } 1809 1810 /*@C 1811 MatCreateDense - Creates a matrix in `MATDENSE` format. 1812 1813 Collective 1814 1815 Input Parameters: 1816 + comm - MPI communicator 1817 . m - number of local rows (or `PETSC_DECIDE` to have calculated if `M` is given) 1818 . n - number of local columns (or `PETSC_DECIDE` to have calculated if `N` is given) 1819 . M - number of global rows (or `PETSC_DECIDE` to have calculated if `m` is given) 1820 . N - number of global columns (or `PETSC_DECIDE` to have calculated if `n` is given) 1821 - data - optional location of matrix data. Set data to `NULL` (`PETSC_NULL_SCALAR` for Fortran users) for PETSc 1822 to control all matrix memory allocation. 1823 1824 Output Parameter: 1825 . A - the matrix 1826 1827 Level: intermediate 1828 1829 Notes: 1830 The dense format is fully compatible with standard Fortran 1831 storage by columns. 1832 1833 Although local portions of the matrix are stored in column-major 1834 order, the matrix is partitioned across MPI ranks by row. 1835 1836 The data input variable is intended primarily for Fortran programmers 1837 who wish to allocate their own matrix memory space. Most users should 1838 set `data` to `NULL` (`PETSC_NULL_SCALAR` for Fortran users). 1839 1840 The user MUST specify either the local or global matrix dimensions 1841 (possibly both). 1842 1843 .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatCreate()`, `MatCreateSeqDense()`, `MatSetValues()` 1844 @*/ 1845 PetscErrorCode MatCreateDense(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscScalar *data, Mat *A) 1846 { 1847 PetscFunctionBegin; 1848 PetscCall(MatCreate(comm, A)); 1849 PetscCall(MatSetSizes(*A, m, n, M, N)); 1850 PetscCall(MatSetType(*A, MATDENSE)); 1851 PetscCall(MatSeqDenseSetPreallocation(*A, data)); 1852 PetscCall(MatMPIDenseSetPreallocation(*A, data)); 1853 PetscFunctionReturn(PETSC_SUCCESS); 1854 } 1855 1856 static PetscErrorCode MatDuplicate_MPIDense(Mat A, MatDuplicateOption cpvalues, Mat *newmat) 1857 { 1858 Mat mat; 1859 Mat_MPIDense *a, *oldmat = (Mat_MPIDense *)A->data; 1860 1861 PetscFunctionBegin; 1862 *newmat = NULL; 1863 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat)); 1864 PetscCall(MatSetSizes(mat, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 1865 PetscCall(MatSetType(mat, ((PetscObject)A)->type_name)); 1866 a = (Mat_MPIDense *)mat->data; 1867 1868 mat->factortype = A->factortype; 1869 mat->assembled = PETSC_TRUE; 1870 mat->preallocated = PETSC_TRUE; 1871 1872 mat->insertmode = NOT_SET_VALUES; 1873 a->donotstash = oldmat->donotstash; 1874 1875 PetscCall(PetscLayoutReference(A->rmap, &mat->rmap)); 1876 PetscCall(PetscLayoutReference(A->cmap, &mat->cmap)); 1877 1878 PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A)); 1879 1880 *newmat = mat; 1881 PetscFunctionReturn(PETSC_SUCCESS); 1882 } 1883 1884 static PetscErrorCode MatLoad_MPIDense(Mat newMat, PetscViewer viewer) 1885 { 1886 PetscBool isbinary; 1887 #if defined(PETSC_HAVE_HDF5) 1888 PetscBool ishdf5; 1889 #endif 1890 1891 PetscFunctionBegin; 1892 PetscValidHeaderSpecific(newMat, MAT_CLASSID, 1); 1893 PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2); 1894 /* force binary viewer to load .info file if it has not yet done so */ 1895 PetscCall(PetscViewerSetUp(viewer)); 1896 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 1897 #if defined(PETSC_HAVE_HDF5) 1898 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5)); 1899 #endif 1900 if (isbinary) { 1901 PetscCall(MatLoad_Dense_Binary(newMat, viewer)); 1902 #if defined(PETSC_HAVE_HDF5) 1903 } else if (ishdf5) { 1904 PetscCall(MatLoad_Dense_HDF5(newMat, viewer)); 1905 #endif 1906 } else SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name); 1907 PetscFunctionReturn(PETSC_SUCCESS); 1908 } 1909 1910 static PetscErrorCode MatEqual_MPIDense(Mat A, Mat B, PetscBool *flag) 1911 { 1912 Mat_MPIDense *matB = (Mat_MPIDense *)B->data, *matA = (Mat_MPIDense *)A->data; 1913 Mat a, b; 1914 1915 PetscFunctionBegin; 1916 a = matA->A; 1917 b = matB->A; 1918 PetscCall(MatEqual(a, b, flag)); 1919 PetscCall(MPIU_Allreduce(MPI_IN_PLACE, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A))); 1920 PetscFunctionReturn(PETSC_SUCCESS); 1921 } 1922 1923 static PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(void *data) 1924 { 1925 Mat_TransMatMultDense *atb = (Mat_TransMatMultDense *)data; 1926 1927 PetscFunctionBegin; 1928 PetscCall(PetscFree2(atb->sendbuf, atb->recvcounts)); 1929 PetscCall(MatDestroy(&atb->atb)); 1930 PetscCall(PetscFree(atb)); 1931 PetscFunctionReturn(PETSC_SUCCESS); 1932 } 1933 1934 static PetscErrorCode MatDestroy_MatMatTransMult_MPIDense_MPIDense(void *data) 1935 { 1936 Mat_MatTransMultDense *abt = (Mat_MatTransMultDense *)data; 1937 1938 PetscFunctionBegin; 1939 PetscCall(PetscFree2(abt->buf[0], abt->buf[1])); 1940 PetscCall(PetscFree2(abt->recvcounts, abt->recvdispls)); 1941 PetscCall(PetscFree(abt)); 1942 PetscFunctionReturn(PETSC_SUCCESS); 1943 } 1944 1945 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C) 1946 { 1947 Mat_MPIDense *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data; 1948 Mat_TransMatMultDense *atb; 1949 MPI_Comm comm; 1950 PetscMPIInt size, *recvcounts; 1951 PetscScalar *carray, *sendbuf; 1952 const PetscScalar *atbarray; 1953 PetscInt i, cN = C->cmap->N, proc, k, j, lda; 1954 const PetscInt *ranges; 1955 1956 PetscFunctionBegin; 1957 MatCheckProduct(C, 3); 1958 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 1959 atb = (Mat_TransMatMultDense *)C->product->data; 1960 recvcounts = atb->recvcounts; 1961 sendbuf = atb->sendbuf; 1962 1963 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 1964 PetscCallMPI(MPI_Comm_size(comm, &size)); 1965 1966 /* compute atbarray = aseq^T * bseq */ 1967 PetscCall(MatTransposeMatMult(a->A, b->A, atb->atb ? MAT_REUSE_MATRIX : MAT_INITIAL_MATRIX, PETSC_DEFAULT, &atb->atb)); 1968 1969 PetscCall(MatGetOwnershipRanges(C, &ranges)); 1970 1971 /* arrange atbarray into sendbuf */ 1972 PetscCall(MatDenseGetArrayRead(atb->atb, &atbarray)); 1973 PetscCall(MatDenseGetLDA(atb->atb, &lda)); 1974 for (proc = 0, k = 0; proc < size; proc++) { 1975 for (j = 0; j < cN; j++) { 1976 for (i = ranges[proc]; i < ranges[proc + 1]; i++) sendbuf[k++] = atbarray[i + j * lda]; 1977 } 1978 } 1979 PetscCall(MatDenseRestoreArrayRead(atb->atb, &atbarray)); 1980 1981 /* sum all atbarray to local values of C */ 1982 PetscCall(MatDenseGetArrayWrite(c->A, &carray)); 1983 PetscCallMPI(MPI_Reduce_scatter(sendbuf, carray, recvcounts, MPIU_SCALAR, MPIU_SUM, comm)); 1984 PetscCall(MatDenseRestoreArrayWrite(c->A, &carray)); 1985 PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY)); 1986 PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY)); 1987 PetscFunctionReturn(PETSC_SUCCESS); 1988 } 1989 1990 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C) 1991 { 1992 MPI_Comm comm; 1993 PetscMPIInt size; 1994 PetscInt cm = A->cmap->n, cM, cN = B->cmap->N; 1995 Mat_TransMatMultDense *atb; 1996 PetscBool cisdense = PETSC_FALSE; 1997 PetscInt i; 1998 const PetscInt *ranges; 1999 2000 PetscFunctionBegin; 2001 MatCheckProduct(C, 4); 2002 PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty"); 2003 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 2004 if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) { 2005 SETERRQ(comm, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend); 2006 } 2007 2008 /* create matrix product C */ 2009 PetscCall(MatSetSizes(C, cm, B->cmap->n, A->cmap->N, B->cmap->N)); 2010 #if defined(PETSC_HAVE_CUDA) 2011 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSECUDA, "")); 2012 #endif 2013 #if defined(PETSC_HAVE_HIP) 2014 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSEHIP, "")); 2015 #endif 2016 if (!cisdense) PetscCall(MatSetType(C, ((PetscObject)A)->type_name)); 2017 PetscCall(MatSetUp(C)); 2018 2019 /* create data structure for reuse C */ 2020 PetscCallMPI(MPI_Comm_size(comm, &size)); 2021 PetscCall(PetscNew(&atb)); 2022 cM = C->rmap->N; 2023 PetscCall(PetscMalloc2(cM * cN, &atb->sendbuf, size, &atb->recvcounts)); 2024 PetscCall(MatGetOwnershipRanges(C, &ranges)); 2025 for (i = 0; i < size; i++) atb->recvcounts[i] = (ranges[i + 1] - ranges[i]) * cN; 2026 2027 C->product->data = atb; 2028 C->product->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense; 2029 PetscFunctionReturn(PETSC_SUCCESS); 2030 } 2031 2032 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C) 2033 { 2034 MPI_Comm comm; 2035 PetscMPIInt i, size; 2036 PetscInt maxRows, bufsiz; 2037 PetscMPIInt tag; 2038 PetscInt alg; 2039 Mat_MatTransMultDense *abt; 2040 Mat_Product *product = C->product; 2041 PetscBool flg; 2042 2043 PetscFunctionBegin; 2044 MatCheckProduct(C, 4); 2045 PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty"); 2046 /* check local size of A and B */ 2047 PetscCheck(A->cmap->n == B->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local column dimensions are incompatible, A (%" PetscInt_FMT ") != B (%" PetscInt_FMT ")", A->cmap->n, B->cmap->n); 2048 2049 PetscCall(PetscStrcmp(product->alg, "allgatherv", &flg)); 2050 alg = flg ? 0 : 1; 2051 2052 /* setup matrix product C */ 2053 PetscCall(MatSetSizes(C, A->rmap->n, B->rmap->n, A->rmap->N, B->rmap->N)); 2054 PetscCall(MatSetType(C, MATMPIDENSE)); 2055 PetscCall(MatSetUp(C)); 2056 PetscCall(PetscObjectGetNewTag((PetscObject)C, &tag)); 2057 2058 /* create data structure for reuse C */ 2059 PetscCall(PetscObjectGetComm((PetscObject)C, &comm)); 2060 PetscCallMPI(MPI_Comm_size(comm, &size)); 2061 PetscCall(PetscNew(&abt)); 2062 abt->tag = tag; 2063 abt->alg = alg; 2064 switch (alg) { 2065 case 1: /* alg: "cyclic" */ 2066 for (maxRows = 0, i = 0; i < size; i++) maxRows = PetscMax(maxRows, (B->rmap->range[i + 1] - B->rmap->range[i])); 2067 bufsiz = A->cmap->N * maxRows; 2068 PetscCall(PetscMalloc2(bufsiz, &(abt->buf[0]), bufsiz, &(abt->buf[1]))); 2069 break; 2070 default: /* alg: "allgatherv" */ 2071 PetscCall(PetscMalloc2(B->rmap->n * B->cmap->N, &(abt->buf[0]), B->rmap->N * B->cmap->N, &(abt->buf[1]))); 2072 PetscCall(PetscMalloc2(size, &(abt->recvcounts), size + 1, &(abt->recvdispls))); 2073 for (i = 0; i <= size; i++) abt->recvdispls[i] = B->rmap->range[i] * A->cmap->N; 2074 for (i = 0; i < size; i++) abt->recvcounts[i] = abt->recvdispls[i + 1] - abt->recvdispls[i]; 2075 break; 2076 } 2077 2078 C->product->data = abt; 2079 C->product->destroy = MatDestroy_MatMatTransMult_MPIDense_MPIDense; 2080 C->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_MPIDense_MPIDense; 2081 PetscFunctionReturn(PETSC_SUCCESS); 2082 } 2083 2084 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(Mat A, Mat B, Mat C) 2085 { 2086 Mat_MPIDense *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data; 2087 Mat_MatTransMultDense *abt; 2088 MPI_Comm comm; 2089 PetscMPIInt rank, size, sendsiz, recvsiz, sendto, recvfrom, recvisfrom; 2090 PetscScalar *sendbuf, *recvbuf = NULL, *cv; 2091 PetscInt i, cK = A->cmap->N, k, j, bn; 2092 PetscScalar _DOne = 1.0, _DZero = 0.0; 2093 const PetscScalar *av, *bv; 2094 PetscBLASInt cm, cn, ck, alda, blda = 0, clda; 2095 MPI_Request reqs[2]; 2096 const PetscInt *ranges; 2097 2098 PetscFunctionBegin; 2099 MatCheckProduct(C, 3); 2100 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 2101 abt = (Mat_MatTransMultDense *)C->product->data; 2102 PetscCall(PetscObjectGetComm((PetscObject)C, &comm)); 2103 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 2104 PetscCallMPI(MPI_Comm_size(comm, &size)); 2105 PetscCall(MatDenseGetArrayRead(a->A, &av)); 2106 PetscCall(MatDenseGetArrayRead(b->A, &bv)); 2107 PetscCall(MatDenseGetArrayWrite(c->A, &cv)); 2108 PetscCall(MatDenseGetLDA(a->A, &i)); 2109 PetscCall(PetscBLASIntCast(i, &alda)); 2110 PetscCall(MatDenseGetLDA(b->A, &i)); 2111 PetscCall(PetscBLASIntCast(i, &blda)); 2112 PetscCall(MatDenseGetLDA(c->A, &i)); 2113 PetscCall(PetscBLASIntCast(i, &clda)); 2114 PetscCall(MatGetOwnershipRanges(B, &ranges)); 2115 bn = B->rmap->n; 2116 if (blda == bn) { 2117 sendbuf = (PetscScalar *)bv; 2118 } else { 2119 sendbuf = abt->buf[0]; 2120 for (k = 0, i = 0; i < cK; i++) { 2121 for (j = 0; j < bn; j++, k++) sendbuf[k] = bv[i * blda + j]; 2122 } 2123 } 2124 if (size > 1) { 2125 sendto = (rank + size - 1) % size; 2126 recvfrom = (rank + size + 1) % size; 2127 } else { 2128 sendto = recvfrom = 0; 2129 } 2130 PetscCall(PetscBLASIntCast(cK, &ck)); 2131 PetscCall(PetscBLASIntCast(c->A->rmap->n, &cm)); 2132 recvisfrom = rank; 2133 for (i = 0; i < size; i++) { 2134 /* we have finished receiving in sending, bufs can be read/modified */ 2135 PetscInt nextrecvisfrom = (recvisfrom + 1) % size; /* which process the next recvbuf will originate on */ 2136 PetscInt nextbn = ranges[nextrecvisfrom + 1] - ranges[nextrecvisfrom]; 2137 2138 if (nextrecvisfrom != rank) { 2139 /* start the cyclic sends from sendbuf, to recvbuf (which will switch to sendbuf) */ 2140 sendsiz = cK * bn; 2141 recvsiz = cK * nextbn; 2142 recvbuf = (i & 1) ? abt->buf[0] : abt->buf[1]; 2143 PetscCallMPI(MPI_Isend(sendbuf, sendsiz, MPIU_SCALAR, sendto, abt->tag, comm, &reqs[0])); 2144 PetscCallMPI(MPI_Irecv(recvbuf, recvsiz, MPIU_SCALAR, recvfrom, abt->tag, comm, &reqs[1])); 2145 } 2146 2147 /* local aseq * sendbuf^T */ 2148 PetscCall(PetscBLASIntCast(ranges[recvisfrom + 1] - ranges[recvisfrom], &cn)); 2149 if (cm && cn && ck) PetscCallBLAS("BLASgemm", BLASgemm_("N", "T", &cm, &cn, &ck, &_DOne, av, &alda, sendbuf, &cn, &_DZero, cv + clda * ranges[recvisfrom], &clda)); 2150 2151 if (nextrecvisfrom != rank) { 2152 /* wait for the sends and receives to complete, swap sendbuf and recvbuf */ 2153 PetscCallMPI(MPI_Waitall(2, reqs, MPI_STATUSES_IGNORE)); 2154 } 2155 bn = nextbn; 2156 recvisfrom = nextrecvisfrom; 2157 sendbuf = recvbuf; 2158 } 2159 PetscCall(MatDenseRestoreArrayRead(a->A, &av)); 2160 PetscCall(MatDenseRestoreArrayRead(b->A, &bv)); 2161 PetscCall(MatDenseRestoreArrayWrite(c->A, &cv)); 2162 PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY)); 2163 PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY)); 2164 PetscFunctionReturn(PETSC_SUCCESS); 2165 } 2166 2167 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(Mat A, Mat B, Mat C) 2168 { 2169 Mat_MPIDense *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data; 2170 Mat_MatTransMultDense *abt; 2171 MPI_Comm comm; 2172 PetscMPIInt size; 2173 PetscScalar *cv, *sendbuf, *recvbuf; 2174 const PetscScalar *av, *bv; 2175 PetscInt blda, i, cK = A->cmap->N, k, j, bn; 2176 PetscScalar _DOne = 1.0, _DZero = 0.0; 2177 PetscBLASInt cm, cn, ck, alda, clda; 2178 2179 PetscFunctionBegin; 2180 MatCheckProduct(C, 3); 2181 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 2182 abt = (Mat_MatTransMultDense *)C->product->data; 2183 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 2184 PetscCallMPI(MPI_Comm_size(comm, &size)); 2185 PetscCall(MatDenseGetArrayRead(a->A, &av)); 2186 PetscCall(MatDenseGetArrayRead(b->A, &bv)); 2187 PetscCall(MatDenseGetArrayWrite(c->A, &cv)); 2188 PetscCall(MatDenseGetLDA(a->A, &i)); 2189 PetscCall(PetscBLASIntCast(i, &alda)); 2190 PetscCall(MatDenseGetLDA(b->A, &blda)); 2191 PetscCall(MatDenseGetLDA(c->A, &i)); 2192 PetscCall(PetscBLASIntCast(i, &clda)); 2193 /* copy transpose of B into buf[0] */ 2194 bn = B->rmap->n; 2195 sendbuf = abt->buf[0]; 2196 recvbuf = abt->buf[1]; 2197 for (k = 0, j = 0; j < bn; j++) { 2198 for (i = 0; i < cK; i++, k++) sendbuf[k] = bv[i * blda + j]; 2199 } 2200 PetscCall(MatDenseRestoreArrayRead(b->A, &bv)); 2201 PetscCallMPI(MPI_Allgatherv(sendbuf, bn * cK, MPIU_SCALAR, recvbuf, abt->recvcounts, abt->recvdispls, MPIU_SCALAR, comm)); 2202 PetscCall(PetscBLASIntCast(cK, &ck)); 2203 PetscCall(PetscBLASIntCast(c->A->rmap->n, &cm)); 2204 PetscCall(PetscBLASIntCast(c->A->cmap->n, &cn)); 2205 if (cm && cn && ck) PetscCallBLAS("BLASgemm", BLASgemm_("N", "N", &cm, &cn, &ck, &_DOne, av, &alda, recvbuf, &ck, &_DZero, cv, &clda)); 2206 PetscCall(MatDenseRestoreArrayRead(a->A, &av)); 2207 PetscCall(MatDenseRestoreArrayRead(b->A, &bv)); 2208 PetscCall(MatDenseRestoreArrayWrite(c->A, &cv)); 2209 PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY)); 2210 PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY)); 2211 PetscFunctionReturn(PETSC_SUCCESS); 2212 } 2213 2214 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C) 2215 { 2216 Mat_MatTransMultDense *abt; 2217 2218 PetscFunctionBegin; 2219 MatCheckProduct(C, 3); 2220 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 2221 abt = (Mat_MatTransMultDense *)C->product->data; 2222 switch (abt->alg) { 2223 case 1: 2224 PetscCall(MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(A, B, C)); 2225 break; 2226 default: 2227 PetscCall(MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(A, B, C)); 2228 break; 2229 } 2230 PetscFunctionReturn(PETSC_SUCCESS); 2231 } 2232 2233 #if defined(PETSC_HAVE_ELEMENTAL) 2234 static PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(void *data) 2235 { 2236 Mat_MatMultDense *ab = (Mat_MatMultDense *)data; 2237 2238 PetscFunctionBegin; 2239 PetscCall(MatDestroy(&ab->Ce)); 2240 PetscCall(MatDestroy(&ab->Ae)); 2241 PetscCall(MatDestroy(&ab->Be)); 2242 PetscCall(PetscFree(ab)); 2243 PetscFunctionReturn(PETSC_SUCCESS); 2244 } 2245 2246 PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C) 2247 { 2248 Mat_MatMultDense *ab; 2249 2250 PetscFunctionBegin; 2251 MatCheckProduct(C, 3); 2252 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Missing product data"); 2253 ab = (Mat_MatMultDense *)C->product->data; 2254 PetscCall(MatConvert_MPIDense_Elemental(A, MATELEMENTAL, MAT_REUSE_MATRIX, &ab->Ae)); 2255 PetscCall(MatConvert_MPIDense_Elemental(B, MATELEMENTAL, MAT_REUSE_MATRIX, &ab->Be)); 2256 PetscCall(MatMatMultNumeric_Elemental(ab->Ae, ab->Be, ab->Ce)); 2257 PetscCall(MatConvert(ab->Ce, MATMPIDENSE, MAT_REUSE_MATRIX, &C)); 2258 PetscFunctionReturn(PETSC_SUCCESS); 2259 } 2260 2261 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C) 2262 { 2263 Mat Ae, Be, Ce; 2264 Mat_MatMultDense *ab; 2265 2266 PetscFunctionBegin; 2267 MatCheckProduct(C, 4); 2268 PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty"); 2269 /* check local size of A and B */ 2270 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 2271 SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend); 2272 } 2273 2274 /* create elemental matrices Ae and Be */ 2275 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &Ae)); 2276 PetscCall(MatSetSizes(Ae, PETSC_DECIDE, PETSC_DECIDE, A->rmap->N, A->cmap->N)); 2277 PetscCall(MatSetType(Ae, MATELEMENTAL)); 2278 PetscCall(MatSetUp(Ae)); 2279 PetscCall(MatSetOption(Ae, MAT_ROW_ORIENTED, PETSC_FALSE)); 2280 2281 PetscCall(MatCreate(PetscObjectComm((PetscObject)B), &Be)); 2282 PetscCall(MatSetSizes(Be, PETSC_DECIDE, PETSC_DECIDE, B->rmap->N, B->cmap->N)); 2283 PetscCall(MatSetType(Be, MATELEMENTAL)); 2284 PetscCall(MatSetUp(Be)); 2285 PetscCall(MatSetOption(Be, MAT_ROW_ORIENTED, PETSC_FALSE)); 2286 2287 /* compute symbolic Ce = Ae*Be */ 2288 PetscCall(MatCreate(PetscObjectComm((PetscObject)C), &Ce)); 2289 PetscCall(MatMatMultSymbolic_Elemental(Ae, Be, fill, Ce)); 2290 2291 /* setup C */ 2292 PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, PETSC_DECIDE, PETSC_DECIDE)); 2293 PetscCall(MatSetType(C, MATDENSE)); 2294 PetscCall(MatSetUp(C)); 2295 2296 /* create data structure for reuse Cdense */ 2297 PetscCall(PetscNew(&ab)); 2298 ab->Ae = Ae; 2299 ab->Be = Be; 2300 ab->Ce = Ce; 2301 2302 C->product->data = ab; 2303 C->product->destroy = MatDestroy_MatMatMult_MPIDense_MPIDense; 2304 C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense; 2305 PetscFunctionReturn(PETSC_SUCCESS); 2306 } 2307 #endif 2308 2309 #if defined(PETSC_HAVE_ELEMENTAL) 2310 static PetscErrorCode MatProductSetFromOptions_MPIDense_AB(Mat C) 2311 { 2312 PetscFunctionBegin; 2313 C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIDense; 2314 C->ops->productsymbolic = MatProductSymbolic_AB; 2315 PetscFunctionReturn(PETSC_SUCCESS); 2316 } 2317 #endif 2318 2319 static PetscErrorCode MatProductSetFromOptions_MPIDense_AtB(Mat C) 2320 { 2321 Mat_Product *product = C->product; 2322 Mat A = product->A, B = product->B; 2323 2324 PetscFunctionBegin; 2325 if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) 2326 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend); 2327 C->ops->transposematmultsymbolic = MatTransposeMatMultSymbolic_MPIDense_MPIDense; 2328 C->ops->productsymbolic = MatProductSymbolic_AtB; 2329 PetscFunctionReturn(PETSC_SUCCESS); 2330 } 2331 2332 static PetscErrorCode MatProductSetFromOptions_MPIDense_ABt(Mat C) 2333 { 2334 Mat_Product *product = C->product; 2335 const char *algTypes[2] = {"allgatherv", "cyclic"}; 2336 PetscInt alg, nalg = 2; 2337 PetscBool flg = PETSC_FALSE; 2338 2339 PetscFunctionBegin; 2340 /* Set default algorithm */ 2341 alg = 0; /* default is allgatherv */ 2342 PetscCall(PetscStrcmp(product->alg, "default", &flg)); 2343 if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg])); 2344 2345 /* Get runtime option */ 2346 if (product->api_user) { 2347 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatMatTransposeMult", "Mat"); 2348 PetscCall(PetscOptionsEList("-matmattransmult_mpidense_mpidense_via", "Algorithmic approach", "MatMatTransposeMult", algTypes, nalg, algTypes[alg], &alg, &flg)); 2349 PetscOptionsEnd(); 2350 } else { 2351 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_ABt", "Mat"); 2352 PetscCall(PetscOptionsEList("-mat_product_algorithm", "Algorithmic approach", "MatProduct_ABt", algTypes, nalg, algTypes[alg], &alg, &flg)); 2353 PetscOptionsEnd(); 2354 } 2355 if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg])); 2356 2357 C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_MPIDense_MPIDense; 2358 C->ops->productsymbolic = MatProductSymbolic_ABt; 2359 PetscFunctionReturn(PETSC_SUCCESS); 2360 } 2361 2362 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense(Mat C) 2363 { 2364 Mat_Product *product = C->product; 2365 2366 PetscFunctionBegin; 2367 switch (product->type) { 2368 #if defined(PETSC_HAVE_ELEMENTAL) 2369 case MATPRODUCT_AB: 2370 PetscCall(MatProductSetFromOptions_MPIDense_AB(C)); 2371 break; 2372 #endif 2373 case MATPRODUCT_AtB: 2374 PetscCall(MatProductSetFromOptions_MPIDense_AtB(C)); 2375 break; 2376 case MATPRODUCT_ABt: 2377 PetscCall(MatProductSetFromOptions_MPIDense_ABt(C)); 2378 break; 2379 default: 2380 break; 2381 } 2382 PetscFunctionReturn(PETSC_SUCCESS); 2383 } 2384