xref: /petsc/src/mat/tutorials/ex18.c (revision b122ec5aa1bd4469eb4e0673542fb7de3f411254)
1735d7f90SBarry Smith static char help[] = "Demonstrates the use of the COO interface to PETSc matrices for finite element computations\n\n";
2735d7f90SBarry Smith 
3735d7f90SBarry Smith /*
4735d7f90SBarry Smith      The COO interface for PETSc matrices provides a convenient way to provide finite element element stiffness matrices to PETSc matrix that should work
5735d7f90SBarry Smith    well on both CPUs and GPUs. It is an alternative to using MatSetValues()
6735d7f90SBarry Smith 
7735d7f90SBarry Smith      This example is intended for people who are NOT using DMPLEX or libCEED or any other higher-level infrastructure for finite elements;
8735d7f90SBarry Smith    it is only to demonstrate the concepts in a simple way for those people who are interested and for those people who are using PETSc for
9735d7f90SBarry Smith    linear algebra solvers but are managing their own finite element process.
10735d7f90SBarry Smith 
11735d7f90SBarry Smith      Please do NOT use this example as a starting point to writing your own finite element code from scratch!
12735d7f90SBarry Smith 
13735d7f90SBarry Smith      Each element in this example has three vertices; hence the the usage below needs to be adjusted for elements of a different number of vertices.
14735d7f90SBarry Smith */
15735d7f90SBarry Smith 
16735d7f90SBarry Smith #include <petscmat.h>
17735d7f90SBarry Smith #include "ex18.h"
18735d7f90SBarry Smith 
19735d7f90SBarry Smith static PetscErrorCode CreateFEStruct(FEStruct *fe)
20735d7f90SBarry Smith {
21735d7f90SBarry Smith   PetscFunctionBeginUser;
22735d7f90SBarry Smith   fe->Nv = 5;
23735d7f90SBarry Smith   fe->Ne = 3;
245f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscMalloc1(3*fe->Ne,&fe->vertices));
25735d7f90SBarry Smith   /* the three vertices associated with each element in order of element */
26735d7f90SBarry Smith   fe->vertices[0 + 0] = 0;
27735d7f90SBarry Smith   fe->vertices[0 + 1] = 1;
28735d7f90SBarry Smith   fe->vertices[0 + 2] = 2;
29735d7f90SBarry Smith   fe->vertices[3 + 0] = 2;
30735d7f90SBarry Smith   fe->vertices[3 + 1] = 1;
31735d7f90SBarry Smith   fe->vertices[3 + 2] = 3;
32735d7f90SBarry Smith   fe->vertices[6 + 0] = 2;
33735d7f90SBarry Smith   fe->vertices[6 + 1] = 4;
34735d7f90SBarry Smith   fe->vertices[6 + 2] = 3;
35735d7f90SBarry Smith   fe->n  = 5;
36735d7f90SBarry Smith   PetscFunctionReturn(0);
37735d7f90SBarry Smith }
38735d7f90SBarry Smith 
39735d7f90SBarry Smith static PetscErrorCode DestroyFEStruct(FEStruct *fe)
40735d7f90SBarry Smith {
41735d7f90SBarry Smith   PetscFunctionBeginUser;
425f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscFree(fe->vertices));
435f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscFree(fe->coo));
44735d7f90SBarry Smith   PetscFunctionReturn(0);
45735d7f90SBarry Smith }
46735d7f90SBarry Smith 
47735d7f90SBarry Smith static PetscErrorCode CreateMatrix(FEStruct *fe,Mat *A)
48735d7f90SBarry Smith {
49735d7f90SBarry Smith   PetscInt *oor,*ooc,cnt = 0;
50735d7f90SBarry Smith 
51735d7f90SBarry Smith   PetscFunctionBeginUser;
525f80ce2aSJacob Faibussowitsch   CHKERRQ(MatCreate(PETSC_COMM_WORLD,A));
535f80ce2aSJacob Faibussowitsch   CHKERRQ(MatSetSizes(*A,fe->n,fe->n,PETSC_DECIDE,PETSC_DECIDE));
545f80ce2aSJacob Faibussowitsch   CHKERRQ(MatSetFromOptions(*A));
55735d7f90SBarry Smith 
56735d7f90SBarry Smith   /* determine for each entry in each element stiffness matrix the global row and colum */
57735d7f90SBarry Smith   /* since the element is triangular with piecewise linear basis functions there are three degrees of freedom per element, one for each vertex */
585f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscMalloc2(3*3*fe->Ne,&oor,3*3*fe->Ne,&ooc));
59735d7f90SBarry Smith   for (PetscInt e=0; e<fe->Ne; e++) {
60735d7f90SBarry Smith     for (PetscInt vi=0; vi<3; vi++) {
61735d7f90SBarry Smith       for (PetscInt vj=0; vj<3; vj++) {
62735d7f90SBarry Smith         oor[cnt]   = fe->vertices[3*e+vi];
63735d7f90SBarry Smith         ooc[cnt++] = fe->vertices[3*e+vj];
64735d7f90SBarry Smith       }
65735d7f90SBarry Smith     }
66735d7f90SBarry Smith   }
675f80ce2aSJacob Faibussowitsch   CHKERRQ(MatSetPreallocationCOO(*A,3*3*fe->Ne,oor,ooc));
685f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscFree2(oor,ooc));
69735d7f90SBarry Smith 
70735d7f90SBarry Smith   /* determine the offset into the COO value array the offset of each element stiffness; there are 9 = 3*3 entries for each element stiffness */
71735d7f90SBarry Smith   /* for lists of elements with different numbers of degrees of freedom assocated with each element the offsets will not be uniform */
725f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscMalloc1(fe->Ne,&fe->coo));
73735d7f90SBarry Smith   fe->coo[0] = 0;
74735d7f90SBarry Smith   for (PetscInt e=1; e<fe->Ne; e++) {
75735d7f90SBarry Smith     fe->coo[e] = fe->coo[e-1] + 3*3;
76735d7f90SBarry Smith   }
77735d7f90SBarry Smith   PetscFunctionReturn(0);
78735d7f90SBarry Smith }
79735d7f90SBarry Smith 
80735d7f90SBarry Smith static PetscErrorCode FillMatrixCPU(FEStruct *fe,Mat A)
81735d7f90SBarry Smith {
82735d7f90SBarry Smith   PetscScalar s[9];
83735d7f90SBarry Smith 
84735d7f90SBarry Smith   PetscFunctionBeginUser;
85735d7f90SBarry Smith   /* simulation of traditional PETSc CPU based finite assembly process */
86735d7f90SBarry Smith   for (PetscInt e=0; e<fe->Ne; e++) {
87735d7f90SBarry Smith     for (PetscInt vi=0; vi<3; vi++) {
88735d7f90SBarry Smith       for (PetscInt vj=0; vj<3; vj++) {
89735d7f90SBarry Smith         s[3*vi+vj] = vi+2*vj;
90735d7f90SBarry Smith       }
91735d7f90SBarry Smith     }
925f80ce2aSJacob Faibussowitsch     CHKERRQ(MatSetValues(A,3,fe->vertices + 3*e,3, fe->vertices + 3*e,s,ADD_VALUES));
93735d7f90SBarry Smith   }
945f80ce2aSJacob Faibussowitsch   CHKERRQ(MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY));
955f80ce2aSJacob Faibussowitsch   CHKERRQ(MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY));
96735d7f90SBarry Smith   PetscFunctionReturn(0);
97735d7f90SBarry Smith }
98735d7f90SBarry Smith 
99735d7f90SBarry Smith /*
100735d7f90SBarry Smith    Shows an example of tracking element offsets explicitly, which allows for
101735d7f90SBarry Smith    mixed-topology meshes and combining both volume and surface parts into the weak form.
102735d7f90SBarry Smith */
103735d7f90SBarry Smith static PetscErrorCode FillMatrixCPUCOO(FEStruct *fe,Mat A)
104735d7f90SBarry Smith {
105735d7f90SBarry Smith   PetscScalar *v,*s;
106735d7f90SBarry Smith 
107735d7f90SBarry Smith   PetscFunctionBeginUser;
108735d7f90SBarry Smith   /* simulation of CPU based finite assembly process with COO */
1095f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscMalloc1(3*3*fe->Ne,&v));
110735d7f90SBarry Smith   for (PetscInt e=0; e<fe->Ne; e++) {
111735d7f90SBarry Smith     s = v + fe->coo[e]; /* point to location in COO of current element stiffness */
112735d7f90SBarry Smith     for (PetscInt vi=0; vi<3; vi++) {
113735d7f90SBarry Smith       for (PetscInt vj=0; vj<3; vj++) {
114735d7f90SBarry Smith         s[3*vi+vj] = vi+2*vj;
115735d7f90SBarry Smith       }
116735d7f90SBarry Smith     }
117735d7f90SBarry Smith   }
1185f80ce2aSJacob Faibussowitsch   CHKERRQ(MatSetValuesCOO(A,v,ADD_VALUES));
1195f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscFree(v));
120735d7f90SBarry Smith   PetscFunctionReturn(0);
121735d7f90SBarry Smith }
122735d7f90SBarry Smith 
123735d7f90SBarry Smith /*
124735d7f90SBarry Smith   Uses a multi-dimensional indexing technique that works for homogeneous meshes
125735d7f90SBarry Smith   such as single-topology with volume integral only.
126735d7f90SBarry Smith */
127735d7f90SBarry Smith static PetscErrorCode FillMatrixCPUCOO3d(FEStruct *fe,Mat A)
128735d7f90SBarry Smith {
129735d7f90SBarry Smith   PetscScalar (*s)[3][3];
130735d7f90SBarry Smith 
131735d7f90SBarry Smith   PetscFunctionBeginUser;
132735d7f90SBarry Smith   /* simulation of CPU based finite assembly process with COO */
1335f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscMalloc1(fe->Ne,&s));
134735d7f90SBarry Smith   for (PetscInt e=0; e<fe->Ne; e++) {
135735d7f90SBarry Smith     for (PetscInt vi=0; vi<3; vi++) {
136735d7f90SBarry Smith       for (PetscInt vj=0; vj<3; vj++) {
137735d7f90SBarry Smith         s[e][vi][vj] = vi+2*vj;
138735d7f90SBarry Smith       }
139735d7f90SBarry Smith     }
140735d7f90SBarry Smith   }
1415f80ce2aSJacob Faibussowitsch   CHKERRQ(MatSetValuesCOO(A,(PetscScalar*)s,INSERT_VALUES));
1425f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscFree(s));
143735d7f90SBarry Smith   PetscFunctionReturn(0);
144735d7f90SBarry Smith }
145735d7f90SBarry Smith 
146735d7f90SBarry Smith int main(int argc, char **args)
147735d7f90SBarry Smith {
148735d7f90SBarry Smith   Mat             A;
149735d7f90SBarry Smith   FEStruct        fe;
150735d7f90SBarry Smith   PetscMPIInt     size;
151735d7f90SBarry Smith   PetscBool       is_kokkos,is_cuda;
152735d7f90SBarry Smith 
153*b122ec5aSJacob Faibussowitsch   CHKERRQ(PetscInitialize(&argc,&args,(char*)0,help));
1545f80ce2aSJacob Faibussowitsch   CHKERRMPI(MPI_Comm_size(PETSC_COMM_WORLD,&size));
15554c59aa7SJacob Faibussowitsch   PetscCheck(size <= 1,PETSC_COMM_WORLD,PETSC_ERR_WRONG_MPI_SIZE,"Demonstration is only for sequential runs");
156735d7f90SBarry Smith 
1575f80ce2aSJacob Faibussowitsch   CHKERRQ(CreateFEStruct(&fe));
1585f80ce2aSJacob Faibussowitsch   CHKERRQ(CreateMatrix(&fe,&A));
159735d7f90SBarry Smith 
1605f80ce2aSJacob Faibussowitsch   CHKERRQ(FillMatrixCPU(&fe,A));
1615f80ce2aSJacob Faibussowitsch   CHKERRQ(MatView(A,PETSC_VIEWER_STDOUT_WORLD));
162735d7f90SBarry Smith 
1635f80ce2aSJacob Faibussowitsch   CHKERRQ(MatZeroEntries(A));
1645f80ce2aSJacob Faibussowitsch   CHKERRQ(FillMatrixCPUCOO(&fe,A));
1655f80ce2aSJacob Faibussowitsch   CHKERRQ(MatView(A,PETSC_VIEWER_STDOUT_WORLD));
166735d7f90SBarry Smith 
1675f80ce2aSJacob Faibussowitsch   CHKERRQ(MatZeroEntries(A));
1685f80ce2aSJacob Faibussowitsch   CHKERRQ(FillMatrixCPUCOO3d(&fe,A));
1695f80ce2aSJacob Faibussowitsch   CHKERRQ(MatView(A,PETSC_VIEWER_STDOUT_WORLD));
170735d7f90SBarry Smith 
1715f80ce2aSJacob Faibussowitsch   CHKERRQ(MatZeroEntries(A));
1725f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscObjectBaseTypeCompare((PetscObject)A,MATSEQAIJKOKKOS,&is_kokkos));
1735f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscObjectBaseTypeCompare((PetscObject)A,MATSEQAIJCUSPARSE,&is_cuda));
174735d7f90SBarry Smith  #if defined(PETSC_HAVE_KOKKOS)
1755f80ce2aSJacob Faibussowitsch   if (is_kokkos) CHKERRQ(FillMatrixKokkosCOO(&fe,A));
176735d7f90SBarry Smith  #endif
177735d7f90SBarry Smith  #if defined(PETSC_HAVE_CUDA)
1785f80ce2aSJacob Faibussowitsch   if (is_cuda) CHKERRQ(FillMatrixCUDACOO(&fe,A));
179735d7f90SBarry Smith  #endif
1805f80ce2aSJacob Faibussowitsch   CHKERRQ(MatView(A,PETSC_VIEWER_STDOUT_WORLD));
181735d7f90SBarry Smith 
1825f80ce2aSJacob Faibussowitsch   CHKERRQ(MatDestroy(&A));
1835f80ce2aSJacob Faibussowitsch   CHKERRQ(DestroyFEStruct(&fe));
184*b122ec5aSJacob Faibussowitsch   CHKERRQ(PetscFinalize());
185*b122ec5aSJacob Faibussowitsch   return 0;
186735d7f90SBarry Smith }
187735d7f90SBarry Smith 
188735d7f90SBarry Smith /*TEST
189735d7f90SBarry Smith   build:
190735d7f90SBarry Smith     requires: cuda kokkos_kernels
191735d7f90SBarry Smith     depends: ex18cu.cu ex18kok.kokkos.cxx
192735d7f90SBarry Smith 
193735d7f90SBarry Smith   testset:
194735d7f90SBarry Smith     filter: grep -v "type"
195735d7f90SBarry Smith     output_file: output/ex18_1.out
196735d7f90SBarry Smith 
197735d7f90SBarry Smith     test:
198735d7f90SBarry Smith       suffix: kok
199735d7f90SBarry Smith       requires: kokkos_kernels
200735d7f90SBarry Smith       args: -mat_type aijkokkos
201735d7f90SBarry Smith 
202735d7f90SBarry Smith     test:
203735d7f90SBarry Smith       suffix: cuda
204735d7f90SBarry Smith       requires: cuda
205735d7f90SBarry Smith       args: -mat_type aijcusparse
206735d7f90SBarry Smith 
207735d7f90SBarry Smith TEST*/
208