1735d7f90SBarry Smith static char help[] = "Demonstrates the use of the COO interface to PETSc matrices for finite element computations\n\n"; 2735d7f90SBarry Smith 3735d7f90SBarry Smith /* 4735d7f90SBarry Smith The COO interface for PETSc matrices provides a convenient way to provide finite element element stiffness matrices to PETSc matrix that should work 5735d7f90SBarry Smith well on both CPUs and GPUs. It is an alternative to using MatSetValues() 6735d7f90SBarry Smith 7735d7f90SBarry Smith This example is intended for people who are NOT using DMPLEX or libCEED or any other higher-level infrastructure for finite elements; 8735d7f90SBarry Smith it is only to demonstrate the concepts in a simple way for those people who are interested and for those people who are using PETSc for 9735d7f90SBarry Smith linear algebra solvers but are managing their own finite element process. 10735d7f90SBarry Smith 11735d7f90SBarry Smith Please do NOT use this example as a starting point to writing your own finite element code from scratch! 12735d7f90SBarry Smith 13735d7f90SBarry Smith Each element in this example has three vertices; hence the the usage below needs to be adjusted for elements of a different number of vertices. 14735d7f90SBarry Smith */ 15735d7f90SBarry Smith 16735d7f90SBarry Smith #include <petscmat.h> 17735d7f90SBarry Smith #include "ex18.h" 18735d7f90SBarry Smith 19d71ae5a4SJacob Faibussowitsch static PetscErrorCode CreateFEStruct(FEStruct *fe) 20d71ae5a4SJacob Faibussowitsch { 21735d7f90SBarry Smith PetscFunctionBeginUser; 22735d7f90SBarry Smith fe->Nv = 5; 23735d7f90SBarry Smith fe->Ne = 3; 249566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(3 * fe->Ne, &fe->vertices)); 25735d7f90SBarry Smith /* the three vertices associated with each element in order of element */ 26735d7f90SBarry Smith fe->vertices[0 + 0] = 0; 27735d7f90SBarry Smith fe->vertices[0 + 1] = 1; 28735d7f90SBarry Smith fe->vertices[0 + 2] = 2; 29735d7f90SBarry Smith fe->vertices[3 + 0] = 2; 30735d7f90SBarry Smith fe->vertices[3 + 1] = 1; 31735d7f90SBarry Smith fe->vertices[3 + 2] = 3; 32735d7f90SBarry Smith fe->vertices[6 + 0] = 2; 33735d7f90SBarry Smith fe->vertices[6 + 1] = 4; 34735d7f90SBarry Smith fe->vertices[6 + 2] = 3; 35735d7f90SBarry Smith fe->n = 5; 36735d7f90SBarry Smith PetscFunctionReturn(0); 37735d7f90SBarry Smith } 38735d7f90SBarry Smith 39d71ae5a4SJacob Faibussowitsch static PetscErrorCode DestroyFEStruct(FEStruct *fe) 40d71ae5a4SJacob Faibussowitsch { 41735d7f90SBarry Smith PetscFunctionBeginUser; 429566063dSJacob Faibussowitsch PetscCall(PetscFree(fe->vertices)); 439566063dSJacob Faibussowitsch PetscCall(PetscFree(fe->coo)); 44735d7f90SBarry Smith PetscFunctionReturn(0); 45735d7f90SBarry Smith } 46735d7f90SBarry Smith 47d71ae5a4SJacob Faibussowitsch static PetscErrorCode CreateMatrix(FEStruct *fe, Mat *A) 48d71ae5a4SJacob Faibussowitsch { 49735d7f90SBarry Smith PetscInt *oor, *ooc, cnt = 0; 50735d7f90SBarry Smith 51735d7f90SBarry Smith PetscFunctionBeginUser; 529566063dSJacob Faibussowitsch PetscCall(MatCreate(PETSC_COMM_WORLD, A)); 539566063dSJacob Faibussowitsch PetscCall(MatSetSizes(*A, fe->n, fe->n, PETSC_DECIDE, PETSC_DECIDE)); 549566063dSJacob Faibussowitsch PetscCall(MatSetFromOptions(*A)); 55735d7f90SBarry Smith 56735d7f90SBarry Smith /* determine for each entry in each element stiffness matrix the global row and colum */ 57735d7f90SBarry Smith /* since the element is triangular with piecewise linear basis functions there are three degrees of freedom per element, one for each vertex */ 589566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(3 * 3 * fe->Ne, &oor, 3 * 3 * fe->Ne, &ooc)); 59735d7f90SBarry Smith for (PetscInt e = 0; e < fe->Ne; e++) { 60735d7f90SBarry Smith for (PetscInt vi = 0; vi < 3; vi++) { 61735d7f90SBarry Smith for (PetscInt vj = 0; vj < 3; vj++) { 62735d7f90SBarry Smith oor[cnt] = fe->vertices[3 * e + vi]; 63735d7f90SBarry Smith ooc[cnt++] = fe->vertices[3 * e + vj]; 64735d7f90SBarry Smith } 65735d7f90SBarry Smith } 66735d7f90SBarry Smith } 679566063dSJacob Faibussowitsch PetscCall(MatSetPreallocationCOO(*A, 3 * 3 * fe->Ne, oor, ooc)); 689566063dSJacob Faibussowitsch PetscCall(PetscFree2(oor, ooc)); 69735d7f90SBarry Smith 70735d7f90SBarry Smith /* determine the offset into the COO value array the offset of each element stiffness; there are 9 = 3*3 entries for each element stiffness */ 71*35cb6cd3SPierre Jolivet /* for lists of elements with different numbers of degrees of freedom associated with each element the offsets will not be uniform */ 729566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(fe->Ne, &fe->coo)); 73735d7f90SBarry Smith fe->coo[0] = 0; 74ad540459SPierre Jolivet for (PetscInt e = 1; e < fe->Ne; e++) fe->coo[e] = fe->coo[e - 1] + 3 * 3; 75735d7f90SBarry Smith PetscFunctionReturn(0); 76735d7f90SBarry Smith } 77735d7f90SBarry Smith 78d71ae5a4SJacob Faibussowitsch static PetscErrorCode FillMatrixCPU(FEStruct *fe, Mat A) 79d71ae5a4SJacob Faibussowitsch { 80735d7f90SBarry Smith PetscScalar s[9]; 81735d7f90SBarry Smith 82735d7f90SBarry Smith PetscFunctionBeginUser; 83735d7f90SBarry Smith /* simulation of traditional PETSc CPU based finite assembly process */ 84735d7f90SBarry Smith for (PetscInt e = 0; e < fe->Ne; e++) { 85735d7f90SBarry Smith for (PetscInt vi = 0; vi < 3; vi++) { 86ad540459SPierre Jolivet for (PetscInt vj = 0; vj < 3; vj++) s[3 * vi + vj] = vi + 2 * vj; 87735d7f90SBarry Smith } 889566063dSJacob Faibussowitsch PetscCall(MatSetValues(A, 3, fe->vertices + 3 * e, 3, fe->vertices + 3 * e, s, ADD_VALUES)); 89735d7f90SBarry Smith } 909566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 919566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 92735d7f90SBarry Smith PetscFunctionReturn(0); 93735d7f90SBarry Smith } 94735d7f90SBarry Smith 95735d7f90SBarry Smith /* 96735d7f90SBarry Smith Shows an example of tracking element offsets explicitly, which allows for 97735d7f90SBarry Smith mixed-topology meshes and combining both volume and surface parts into the weak form. 98735d7f90SBarry Smith */ 99d71ae5a4SJacob Faibussowitsch static PetscErrorCode FillMatrixCPUCOO(FEStruct *fe, Mat A) 100d71ae5a4SJacob Faibussowitsch { 101735d7f90SBarry Smith PetscScalar *v, *s; 102735d7f90SBarry Smith 103735d7f90SBarry Smith PetscFunctionBeginUser; 104735d7f90SBarry Smith /* simulation of CPU based finite assembly process with COO */ 1059566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(3 * 3 * fe->Ne, &v)); 106735d7f90SBarry Smith for (PetscInt e = 0; e < fe->Ne; e++) { 107735d7f90SBarry Smith s = v + fe->coo[e]; /* point to location in COO of current element stiffness */ 108735d7f90SBarry Smith for (PetscInt vi = 0; vi < 3; vi++) { 109ad540459SPierre Jolivet for (PetscInt vj = 0; vj < 3; vj++) s[3 * vi + vj] = vi + 2 * vj; 110735d7f90SBarry Smith } 111735d7f90SBarry Smith } 1129566063dSJacob Faibussowitsch PetscCall(MatSetValuesCOO(A, v, ADD_VALUES)); 1139566063dSJacob Faibussowitsch PetscCall(PetscFree(v)); 114735d7f90SBarry Smith PetscFunctionReturn(0); 115735d7f90SBarry Smith } 116735d7f90SBarry Smith 117735d7f90SBarry Smith /* 118735d7f90SBarry Smith Uses a multi-dimensional indexing technique that works for homogeneous meshes 119735d7f90SBarry Smith such as single-topology with volume integral only. 120735d7f90SBarry Smith */ 121d71ae5a4SJacob Faibussowitsch static PetscErrorCode FillMatrixCPUCOO3d(FEStruct *fe, Mat A) 122d71ae5a4SJacob Faibussowitsch { 123735d7f90SBarry Smith PetscScalar(*s)[3][3]; 124735d7f90SBarry Smith 125735d7f90SBarry Smith PetscFunctionBeginUser; 126735d7f90SBarry Smith /* simulation of CPU based finite assembly process with COO */ 1279566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(fe->Ne, &s)); 128735d7f90SBarry Smith for (PetscInt e = 0; e < fe->Ne; e++) { 129735d7f90SBarry Smith for (PetscInt vi = 0; vi < 3; vi++) { 130ad540459SPierre Jolivet for (PetscInt vj = 0; vj < 3; vj++) s[e][vi][vj] = vi + 2 * vj; 131735d7f90SBarry Smith } 132735d7f90SBarry Smith } 1339566063dSJacob Faibussowitsch PetscCall(MatSetValuesCOO(A, (PetscScalar *)s, INSERT_VALUES)); 1349566063dSJacob Faibussowitsch PetscCall(PetscFree(s)); 135735d7f90SBarry Smith PetscFunctionReturn(0); 136735d7f90SBarry Smith } 137735d7f90SBarry Smith 138d71ae5a4SJacob Faibussowitsch int main(int argc, char **args) 139d71ae5a4SJacob Faibussowitsch { 140735d7f90SBarry Smith Mat A; 141735d7f90SBarry Smith FEStruct fe; 142735d7f90SBarry Smith PetscMPIInt size; 143735d7f90SBarry Smith PetscBool is_kokkos, is_cuda; 144735d7f90SBarry Smith 145327415f7SBarry Smith PetscFunctionBeginUser; 1469566063dSJacob Faibussowitsch PetscCall(PetscInitialize(&argc, &args, (char *)0, help)); 1479566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(PETSC_COMM_WORLD, &size)); 14854c59aa7SJacob Faibussowitsch PetscCheck(size <= 1, PETSC_COMM_WORLD, PETSC_ERR_WRONG_MPI_SIZE, "Demonstration is only for sequential runs"); 149735d7f90SBarry Smith 1509566063dSJacob Faibussowitsch PetscCall(CreateFEStruct(&fe)); 1519566063dSJacob Faibussowitsch PetscCall(CreateMatrix(&fe, &A)); 152735d7f90SBarry Smith 1539566063dSJacob Faibussowitsch PetscCall(FillMatrixCPU(&fe, A)); 1549566063dSJacob Faibussowitsch PetscCall(MatView(A, PETSC_VIEWER_STDOUT_WORLD)); 155735d7f90SBarry Smith 1569566063dSJacob Faibussowitsch PetscCall(MatZeroEntries(A)); 1579566063dSJacob Faibussowitsch PetscCall(FillMatrixCPUCOO(&fe, A)); 1589566063dSJacob Faibussowitsch PetscCall(MatView(A, PETSC_VIEWER_STDOUT_WORLD)); 159735d7f90SBarry Smith 1609566063dSJacob Faibussowitsch PetscCall(MatZeroEntries(A)); 1619566063dSJacob Faibussowitsch PetscCall(FillMatrixCPUCOO3d(&fe, A)); 1629566063dSJacob Faibussowitsch PetscCall(MatView(A, PETSC_VIEWER_STDOUT_WORLD)); 163735d7f90SBarry Smith 1649566063dSJacob Faibussowitsch PetscCall(MatZeroEntries(A)); 1659566063dSJacob Faibussowitsch PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQAIJKOKKOS, &is_kokkos)); 1669566063dSJacob Faibussowitsch PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQAIJCUSPARSE, &is_cuda)); 167735d7f90SBarry Smith #if defined(PETSC_HAVE_KOKKOS) 1689566063dSJacob Faibussowitsch if (is_kokkos) PetscCall(FillMatrixKokkosCOO(&fe, A)); 169735d7f90SBarry Smith #endif 170735d7f90SBarry Smith #if defined(PETSC_HAVE_CUDA) 1719566063dSJacob Faibussowitsch if (is_cuda) PetscCall(FillMatrixCUDACOO(&fe, A)); 172735d7f90SBarry Smith #endif 1739566063dSJacob Faibussowitsch PetscCall(MatView(A, PETSC_VIEWER_STDOUT_WORLD)); 174735d7f90SBarry Smith 1759566063dSJacob Faibussowitsch PetscCall(MatDestroy(&A)); 1769566063dSJacob Faibussowitsch PetscCall(DestroyFEStruct(&fe)); 1779566063dSJacob Faibussowitsch PetscCall(PetscFinalize()); 178b122ec5aSJacob Faibussowitsch return 0; 179735d7f90SBarry Smith } 180735d7f90SBarry Smith 181735d7f90SBarry Smith /*TEST 182735d7f90SBarry Smith build: 183735d7f90SBarry Smith requires: cuda kokkos_kernels 184735d7f90SBarry Smith depends: ex18cu.cu ex18kok.kokkos.cxx 185735d7f90SBarry Smith 186735d7f90SBarry Smith testset: 187735d7f90SBarry Smith filter: grep -v "type" 188735d7f90SBarry Smith output_file: output/ex18_1.out 189735d7f90SBarry Smith 190735d7f90SBarry Smith test: 191735d7f90SBarry Smith suffix: kok 192735d7f90SBarry Smith requires: kokkos_kernels 193735d7f90SBarry Smith args: -mat_type aijkokkos 194735d7f90SBarry Smith 195735d7f90SBarry Smith test: 196735d7f90SBarry Smith suffix: cuda 197735d7f90SBarry Smith requires: cuda 198735d7f90SBarry Smith args: -mat_type aijcusparse 199735d7f90SBarry Smith 200735d7f90SBarry Smith TEST*/ 201