1 static char help[] = "Demonstrates the use of the COO interface to PETSc matrices for finite element computations\n\n";
2
3 /*
4 The COO interface for PETSc matrices provides a convenient way to provide finite element stiffness matrices to PETSc matrix that should work
5 well on both CPUs and GPUs. It is an alternative to using MatSetValues()
6
7 This example is intended for people who are NOT using DMPLEX or libCEED or any other higher-level infrastructure for finite elements;
8 it is only to demonstrate the concepts in a simple way for those people who are interested and for those people who are using PETSc for
9 linear algebra solvers but are managing their own finite element process.
10
11 Please do NOT use this example as a starting point to writing your own finite element code from scratch!
12
13 Each element in this example has three vertices; hence the usage below needs to be adjusted for elements of a different number of vertices.
14 */
15
16 #include <petscmat.h>
17 #include "ex18.h"
18
CreateFEStruct(FEStruct * fe)19 static PetscErrorCode CreateFEStruct(FEStruct *fe)
20 {
21 PetscFunctionBeginUser;
22 fe->Nv = 5;
23 fe->Ne = 3;
24 PetscCall(PetscMalloc1(3 * fe->Ne, &fe->vertices));
25 /* the three vertices associated with each element in order of element */
26 fe->vertices[0 + 0] = 0;
27 fe->vertices[0 + 1] = 1;
28 fe->vertices[0 + 2] = 2;
29 fe->vertices[3 + 0] = 2;
30 fe->vertices[3 + 1] = 1;
31 fe->vertices[3 + 2] = 3;
32 fe->vertices[6 + 0] = 2;
33 fe->vertices[6 + 1] = 4;
34 fe->vertices[6 + 2] = 3;
35 fe->n = 5;
36 PetscFunctionReturn(PETSC_SUCCESS);
37 }
38
DestroyFEStruct(FEStruct * fe)39 static PetscErrorCode DestroyFEStruct(FEStruct *fe)
40 {
41 PetscFunctionBeginUser;
42 PetscCall(PetscFree(fe->vertices));
43 PetscCall(PetscFree(fe->coo));
44 PetscFunctionReturn(PETSC_SUCCESS);
45 }
46
CreateMatrix(FEStruct * fe,Mat * A)47 static PetscErrorCode CreateMatrix(FEStruct *fe, Mat *A)
48 {
49 PetscInt *oor, *ooc, cnt = 0;
50
51 PetscFunctionBeginUser;
52 PetscCall(MatCreate(PETSC_COMM_WORLD, A));
53 PetscCall(MatSetSizes(*A, fe->n, fe->n, PETSC_DECIDE, PETSC_DECIDE));
54 PetscCall(MatSetFromOptions(*A));
55
56 /* determine for each entry in each element stiffness matrix the global row and column */
57 /* since the element is triangular with piecewise linear basis functions there are three degrees of freedom per element, one for each vertex */
58 PetscCall(PetscMalloc2(3 * 3 * fe->Ne, &oor, 3 * 3 * fe->Ne, &ooc));
59 for (PetscInt e = 0; e < fe->Ne; e++) {
60 for (PetscInt vi = 0; vi < 3; vi++) {
61 for (PetscInt vj = 0; vj < 3; vj++) {
62 oor[cnt] = fe->vertices[3 * e + vi];
63 ooc[cnt++] = fe->vertices[3 * e + vj];
64 }
65 }
66 }
67 PetscCall(MatSetPreallocationCOO(*A, 3 * 3 * fe->Ne, oor, ooc));
68 PetscCall(PetscFree2(oor, ooc));
69
70 /* determine the offset into the COO value array the offset of each element stiffness; there are 9 = 3*3 entries for each element stiffness */
71 /* for lists of elements with different numbers of degrees of freedom associated with each element the offsets will not be uniform */
72 PetscCall(PetscMalloc1(fe->Ne, &fe->coo));
73 fe->coo[0] = 0;
74 for (PetscInt e = 1; e < fe->Ne; e++) fe->coo[e] = fe->coo[e - 1] + 3 * 3;
75 PetscFunctionReturn(PETSC_SUCCESS);
76 }
77
FillMatrixCPU(FEStruct * fe,Mat A)78 static PetscErrorCode FillMatrixCPU(FEStruct *fe, Mat A)
79 {
80 PetscScalar s[9];
81
82 PetscFunctionBeginUser;
83 /* simulation of traditional PETSc CPU based finite assembly process */
84 for (PetscInt e = 0; e < fe->Ne; e++) {
85 for (PetscInt vi = 0; vi < 3; vi++) {
86 for (PetscInt vj = 0; vj < 3; vj++) s[3 * vi + vj] = vi + 2 * vj;
87 }
88 PetscCall(MatSetValues(A, 3, fe->vertices + 3 * e, 3, fe->vertices + 3 * e, s, ADD_VALUES));
89 }
90 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
91 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
92 PetscFunctionReturn(PETSC_SUCCESS);
93 }
94
95 /*
96 Shows an example of tracking element offsets explicitly, which allows for
97 mixed-topology meshes and combining both volume and surface parts into the weak form.
98 */
FillMatrixCPUCOO(FEStruct * fe,Mat A)99 static PetscErrorCode FillMatrixCPUCOO(FEStruct *fe, Mat A)
100 {
101 PetscScalar *v, *s;
102
103 PetscFunctionBeginUser;
104 /* simulation of CPU based finite assembly process with COO */
105 PetscCall(PetscMalloc1(3 * 3 * fe->Ne, &v));
106 for (PetscInt e = 0; e < fe->Ne; e++) {
107 s = v + fe->coo[e]; /* point to location in COO of current element stiffness */
108 for (PetscInt vi = 0; vi < 3; vi++) {
109 for (PetscInt vj = 0; vj < 3; vj++) s[3 * vi + vj] = vi + 2 * vj;
110 }
111 }
112 PetscCall(MatSetValuesCOO(A, v, ADD_VALUES));
113 PetscCall(PetscFree(v));
114 PetscFunctionReturn(PETSC_SUCCESS);
115 }
116
117 /*
118 Uses a multi-dimensional indexing technique that works for homogeneous meshes
119 such as single-topology with volume integral only.
120 */
FillMatrixCPUCOO3d(FEStruct * fe,Mat A)121 static PetscErrorCode FillMatrixCPUCOO3d(FEStruct *fe, Mat A)
122 {
123 PetscScalar (*s)[3][3];
124
125 PetscFunctionBeginUser;
126 /* simulation of CPU based finite assembly process with COO */
127 PetscCall(PetscMalloc1(fe->Ne, &s));
128 for (PetscInt e = 0; e < fe->Ne; e++) {
129 for (PetscInt vi = 0; vi < 3; vi++) {
130 for (PetscInt vj = 0; vj < 3; vj++) s[e][vi][vj] = vi + 2 * vj;
131 }
132 }
133 PetscCall(MatSetValuesCOO(A, (PetscScalar *)s, INSERT_VALUES));
134 PetscCall(PetscFree(s));
135 PetscFunctionReturn(PETSC_SUCCESS);
136 }
137
main(int argc,char ** args)138 int main(int argc, char **args)
139 {
140 Mat A, B;
141 FEStruct fe;
142 PetscMPIInt size;
143 PetscBool is_kokkos, is_cuda;
144
145 PetscFunctionBeginUser;
146 PetscCall(PetscInitialize(&argc, &args, NULL, help));
147 PetscCallMPI(MPI_Comm_size(PETSC_COMM_WORLD, &size));
148 PetscCheck(size <= 1, PETSC_COMM_WORLD, PETSC_ERR_WRONG_MPI_SIZE, "Demonstration is only for sequential runs");
149
150 PetscCall(CreateFEStruct(&fe));
151
152 PetscCall(CreateMatrix(&fe, &B));
153 PetscCall(MatDuplicate(B, MAT_DO_NOT_COPY_VALUES, &A));
154 PetscCall(MatDestroy(&B));
155
156 PetscCall(FillMatrixCPU(&fe, A));
157 PetscCall(MatView(A, PETSC_VIEWER_STDOUT_WORLD));
158
159 PetscCall(MatZeroEntries(A));
160 PetscCall(FillMatrixCPUCOO(&fe, A));
161 PetscCall(MatView(A, PETSC_VIEWER_STDOUT_WORLD));
162
163 PetscCall(MatZeroEntries(A));
164 PetscCall(FillMatrixCPUCOO3d(&fe, A));
165 PetscCall(MatView(A, PETSC_VIEWER_STDOUT_WORLD));
166
167 PetscCall(MatZeroEntries(A));
168 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQAIJKOKKOS, &is_kokkos));
169 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQAIJCUSPARSE, &is_cuda));
170 #if defined(PETSC_HAVE_KOKKOS)
171 if (is_kokkos) PetscCall(FillMatrixKokkosCOO(&fe, A));
172 #endif
173 #if defined(PETSC_HAVE_CUDA)
174 if (is_cuda) PetscCall(FillMatrixCUDACOO(&fe, A));
175 #endif
176 PetscCall(MatView(A, PETSC_VIEWER_STDOUT_WORLD));
177
178 PetscCall(MatDestroy(&A));
179 PetscCall(DestroyFEStruct(&fe));
180 PetscCall(PetscFinalize());
181 return 0;
182 }
183
184 /*TEST
185 build:
186 requires: cuda kokkos_kernels
187 depends: ex18cu.cu ex18k.kokkos.cxx
188
189 testset:
190 filter: grep -v "type"
191 output_file: output/ex18_1.out
192
193 test:
194 suffix: kok
195 requires: kokkos_kernels
196 args: -mat_type aijkokkos
197
198 test:
199 suffix: cuda
200 requires: cuda
201 args: -mat_type aijcusparse
202
203 test:
204 suffix: hip
205 requires: hip
206 args: -mat_type aijhipsparse
207
208 TEST*/
209