1 static char help[] = "Demonstrates the use of the COO interface to PETSc matrices for finite element computations\n\n"; 2 3 /* 4 The COO interface for PETSc matrices provides a convenient way to provide finite element stiffness matrices to PETSc matrix that should work 5 well on both CPUs and GPUs. It is an alternative to using MatSetValues() 6 7 This example is intended for people who are NOT using DMPLEX or libCEED or any other higher-level infrastructure for finite elements; 8 it is only to demonstrate the concepts in a simple way for those people who are interested and for those people who are using PETSc for 9 linear algebra solvers but are managing their own finite element process. 10 11 Please do NOT use this example as a starting point to writing your own finite element code from scratch! 12 13 Each element in this example has three vertices; hence the usage below needs to be adjusted for elements of a different number of vertices. 14 */ 15 16 #include <petscmat.h> 17 #include "ex18.h" 18 19 static PetscErrorCode CreateFEStruct(FEStruct *fe) 20 { 21 PetscFunctionBeginUser; 22 fe->Nv = 5; 23 fe->Ne = 3; 24 PetscCall(PetscMalloc1(3 * fe->Ne, &fe->vertices)); 25 /* the three vertices associated with each element in order of element */ 26 fe->vertices[0 + 0] = 0; 27 fe->vertices[0 + 1] = 1; 28 fe->vertices[0 + 2] = 2; 29 fe->vertices[3 + 0] = 2; 30 fe->vertices[3 + 1] = 1; 31 fe->vertices[3 + 2] = 3; 32 fe->vertices[6 + 0] = 2; 33 fe->vertices[6 + 1] = 4; 34 fe->vertices[6 + 2] = 3; 35 fe->n = 5; 36 PetscFunctionReturn(PETSC_SUCCESS); 37 } 38 39 static PetscErrorCode DestroyFEStruct(FEStruct *fe) 40 { 41 PetscFunctionBeginUser; 42 PetscCall(PetscFree(fe->vertices)); 43 PetscCall(PetscFree(fe->coo)); 44 PetscFunctionReturn(PETSC_SUCCESS); 45 } 46 47 static PetscErrorCode CreateMatrix(FEStruct *fe, Mat *A) 48 { 49 PetscInt *oor, *ooc, cnt = 0; 50 51 PetscFunctionBeginUser; 52 PetscCall(MatCreate(PETSC_COMM_WORLD, A)); 53 PetscCall(MatSetSizes(*A, fe->n, fe->n, PETSC_DECIDE, PETSC_DECIDE)); 54 PetscCall(MatSetFromOptions(*A)); 55 56 /* determine for each entry in each element stiffness matrix the global row and column */ 57 /* since the element is triangular with piecewise linear basis functions there are three degrees of freedom per element, one for each vertex */ 58 PetscCall(PetscMalloc2(3 * 3 * fe->Ne, &oor, 3 * 3 * fe->Ne, &ooc)); 59 for (PetscInt e = 0; e < fe->Ne; e++) { 60 for (PetscInt vi = 0; vi < 3; vi++) { 61 for (PetscInt vj = 0; vj < 3; vj++) { 62 oor[cnt] = fe->vertices[3 * e + vi]; 63 ooc[cnt++] = fe->vertices[3 * e + vj]; 64 } 65 } 66 } 67 PetscCall(MatSetPreallocationCOO(*A, 3 * 3 * fe->Ne, oor, ooc)); 68 PetscCall(PetscFree2(oor, ooc)); 69 70 /* determine the offset into the COO value array the offset of each element stiffness; there are 9 = 3*3 entries for each element stiffness */ 71 /* for lists of elements with different numbers of degrees of freedom associated with each element the offsets will not be uniform */ 72 PetscCall(PetscMalloc1(fe->Ne, &fe->coo)); 73 fe->coo[0] = 0; 74 for (PetscInt e = 1; e < fe->Ne; e++) fe->coo[e] = fe->coo[e - 1] + 3 * 3; 75 PetscFunctionReturn(PETSC_SUCCESS); 76 } 77 78 static PetscErrorCode FillMatrixCPU(FEStruct *fe, Mat A) 79 { 80 PetscScalar s[9]; 81 82 PetscFunctionBeginUser; 83 /* simulation of traditional PETSc CPU based finite assembly process */ 84 for (PetscInt e = 0; e < fe->Ne; e++) { 85 for (PetscInt vi = 0; vi < 3; vi++) { 86 for (PetscInt vj = 0; vj < 3; vj++) s[3 * vi + vj] = vi + 2 * vj; 87 } 88 PetscCall(MatSetValues(A, 3, fe->vertices + 3 * e, 3, fe->vertices + 3 * e, s, ADD_VALUES)); 89 } 90 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 91 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 92 PetscFunctionReturn(PETSC_SUCCESS); 93 } 94 95 /* 96 Shows an example of tracking element offsets explicitly, which allows for 97 mixed-topology meshes and combining both volume and surface parts into the weak form. 98 */ 99 static PetscErrorCode FillMatrixCPUCOO(FEStruct *fe, Mat A) 100 { 101 PetscScalar *v, *s; 102 103 PetscFunctionBeginUser; 104 /* simulation of CPU based finite assembly process with COO */ 105 PetscCall(PetscMalloc1(3 * 3 * fe->Ne, &v)); 106 for (PetscInt e = 0; e < fe->Ne; e++) { 107 s = v + fe->coo[e]; /* point to location in COO of current element stiffness */ 108 for (PetscInt vi = 0; vi < 3; vi++) { 109 for (PetscInt vj = 0; vj < 3; vj++) s[3 * vi + vj] = vi + 2 * vj; 110 } 111 } 112 PetscCall(MatSetValuesCOO(A, v, ADD_VALUES)); 113 PetscCall(PetscFree(v)); 114 PetscFunctionReturn(PETSC_SUCCESS); 115 } 116 117 /* 118 Uses a multi-dimensional indexing technique that works for homogeneous meshes 119 such as single-topology with volume integral only. 120 */ 121 static PetscErrorCode FillMatrixCPUCOO3d(FEStruct *fe, Mat A) 122 { 123 PetscScalar (*s)[3][3]; 124 125 PetscFunctionBeginUser; 126 /* simulation of CPU based finite assembly process with COO */ 127 PetscCall(PetscMalloc1(fe->Ne, &s)); 128 for (PetscInt e = 0; e < fe->Ne; e++) { 129 for (PetscInt vi = 0; vi < 3; vi++) { 130 for (PetscInt vj = 0; vj < 3; vj++) s[e][vi][vj] = vi + 2 * vj; 131 } 132 } 133 PetscCall(MatSetValuesCOO(A, (PetscScalar *)s, INSERT_VALUES)); 134 PetscCall(PetscFree(s)); 135 PetscFunctionReturn(PETSC_SUCCESS); 136 } 137 138 int main(int argc, char **args) 139 { 140 Mat A, B; 141 FEStruct fe; 142 PetscMPIInt size; 143 PetscBool is_kokkos, is_cuda; 144 145 PetscFunctionBeginUser; 146 PetscCall(PetscInitialize(&argc, &args, NULL, help)); 147 PetscCallMPI(MPI_Comm_size(PETSC_COMM_WORLD, &size)); 148 PetscCheck(size <= 1, PETSC_COMM_WORLD, PETSC_ERR_WRONG_MPI_SIZE, "Demonstration is only for sequential runs"); 149 150 PetscCall(CreateFEStruct(&fe)); 151 152 PetscCall(CreateMatrix(&fe, &B)); 153 PetscCall(MatDuplicate(B, MAT_DO_NOT_COPY_VALUES, &A)); 154 PetscCall(MatDestroy(&B)); 155 156 PetscCall(FillMatrixCPU(&fe, A)); 157 PetscCall(MatView(A, PETSC_VIEWER_STDOUT_WORLD)); 158 159 PetscCall(MatZeroEntries(A)); 160 PetscCall(FillMatrixCPUCOO(&fe, A)); 161 PetscCall(MatView(A, PETSC_VIEWER_STDOUT_WORLD)); 162 163 PetscCall(MatZeroEntries(A)); 164 PetscCall(FillMatrixCPUCOO3d(&fe, A)); 165 PetscCall(MatView(A, PETSC_VIEWER_STDOUT_WORLD)); 166 167 PetscCall(MatZeroEntries(A)); 168 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQAIJKOKKOS, &is_kokkos)); 169 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQAIJCUSPARSE, &is_cuda)); 170 #if defined(PETSC_HAVE_KOKKOS) 171 if (is_kokkos) PetscCall(FillMatrixKokkosCOO(&fe, A)); 172 #endif 173 #if defined(PETSC_HAVE_CUDA) 174 if (is_cuda) PetscCall(FillMatrixCUDACOO(&fe, A)); 175 #endif 176 PetscCall(MatView(A, PETSC_VIEWER_STDOUT_WORLD)); 177 178 PetscCall(MatDestroy(&A)); 179 PetscCall(DestroyFEStruct(&fe)); 180 PetscCall(PetscFinalize()); 181 return 0; 182 } 183 184 /*TEST 185 build: 186 requires: cuda kokkos_kernels 187 depends: ex18cu.cu ex18kok.kokkos.cxx 188 189 testset: 190 filter: grep -v "type" 191 output_file: output/ex18_1.out 192 193 test: 194 suffix: kok 195 requires: kokkos_kernels 196 args: -mat_type aijkokkos 197 198 test: 199 suffix: cuda 200 requires: cuda 201 args: -mat_type aijcusparse 202 203 test: 204 suffix: hip 205 requires: hip 206 args: -mat_type aijhipsparse 207 208 TEST*/ 209