1 static char help[] = "Demonstrates the use of the COO interface to PETSc matrices for finite element computations\n\n"; 2 3 /* 4 The COO interface for PETSc matrices provides a convenient way to provide finite element element stiffness matrices to PETSc matrix that should work 5 well on both CPUs and GPUs. It is an alternative to using MatSetValues() 6 7 This example is intended for people who are NOT using DMPLEX or libCEED or any other higher-level infrastructure for finite elements; 8 it is only to demonstrate the concepts in a simple way for those people who are interested and for those people who are using PETSc for 9 linear algebra solvers but are managing their own finite element process. 10 11 Please do NOT use this example as a starting point to writing your own finite element code from scratch! 12 13 Each element in this example has three vertices; hence the the usage below needs to be adjusted for elements of a different number of vertices. 14 */ 15 16 #include <petscmat.h> 17 #include "ex18.h" 18 19 static PetscErrorCode CreateFEStruct(FEStruct *fe) 20 { 21 PetscErrorCode ierr; 22 23 PetscFunctionBeginUser; 24 fe->Nv = 5; 25 fe->Ne = 3; 26 ierr = PetscMalloc1(3*fe->Ne,&fe->vertices);CHKERRQ(ierr); 27 /* the three vertices associated with each element in order of element */ 28 fe->vertices[0 + 0] = 0; 29 fe->vertices[0 + 1] = 1; 30 fe->vertices[0 + 2] = 2; 31 fe->vertices[3 + 0] = 2; 32 fe->vertices[3 + 1] = 1; 33 fe->vertices[3 + 2] = 3; 34 fe->vertices[6 + 0] = 2; 35 fe->vertices[6 + 1] = 4; 36 fe->vertices[6 + 2] = 3; 37 fe->n = 5; 38 PetscFunctionReturn(0); 39 } 40 41 static PetscErrorCode DestroyFEStruct(FEStruct *fe) 42 { 43 PetscErrorCode ierr; 44 45 PetscFunctionBeginUser; 46 ierr = PetscFree(fe->vertices);CHKERRQ(ierr); 47 ierr = PetscFree(fe->coo);CHKERRQ(ierr); 48 PetscFunctionReturn(0); 49 } 50 51 static PetscErrorCode CreateMatrix(FEStruct *fe,Mat *A) 52 { 53 PetscErrorCode ierr; 54 PetscInt *oor,*ooc,cnt = 0; 55 56 PetscFunctionBeginUser; 57 ierr = MatCreate(PETSC_COMM_WORLD,A);CHKERRQ(ierr); 58 ierr = MatSetSizes(*A,fe->n,fe->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 59 ierr = MatSetFromOptions(*A);CHKERRQ(ierr); 60 61 /* determine for each entry in each element stiffness matrix the global row and colum */ 62 /* since the element is triangular with piecewise linear basis functions there are three degrees of freedom per element, one for each vertex */ 63 ierr = PetscMalloc2(3*3*fe->Ne,&oor,3*3*fe->Ne,&ooc);CHKERRQ(ierr); 64 for (PetscInt e=0; e<fe->Ne; e++) { 65 for (PetscInt vi=0; vi<3; vi++) { 66 for (PetscInt vj=0; vj<3; vj++) { 67 oor[cnt] = fe->vertices[3*e+vi]; 68 ooc[cnt++] = fe->vertices[3*e+vj]; 69 } 70 } 71 } 72 ierr = MatSetPreallocationCOO(*A,3*3*fe->Ne,oor,ooc);CHKERRQ(ierr); 73 ierr = PetscFree2(oor,ooc);CHKERRQ(ierr); 74 75 /* determine the offset into the COO value array the offset of each element stiffness; there are 9 = 3*3 entries for each element stiffness */ 76 /* for lists of elements with different numbers of degrees of freedom assocated with each element the offsets will not be uniform */ 77 ierr = PetscMalloc1(fe->Ne,&fe->coo);CHKERRQ(ierr); 78 fe->coo[0] = 0; 79 for (PetscInt e=1; e<fe->Ne; e++) { 80 fe->coo[e] = fe->coo[e-1] + 3*3; 81 } 82 PetscFunctionReturn(0); 83 } 84 85 static PetscErrorCode FillMatrixCPU(FEStruct *fe,Mat A) 86 { 87 PetscErrorCode ierr; 88 PetscScalar s[9]; 89 90 PetscFunctionBeginUser; 91 /* simulation of traditional PETSc CPU based finite assembly process */ 92 for (PetscInt e=0; e<fe->Ne; e++) { 93 for (PetscInt vi=0; vi<3; vi++) { 94 for (PetscInt vj=0; vj<3; vj++) { 95 s[3*vi+vj] = vi+2*vj; 96 } 97 } 98 ierr = MatSetValues(A,3,fe->vertices + 3*e,3, fe->vertices + 3*e,s,ADD_VALUES);CHKERRQ(ierr); 99 } 100 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 101 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 102 PetscFunctionReturn(0); 103 } 104 105 /* 106 Shows an example of tracking element offsets explicitly, which allows for 107 mixed-topology meshes and combining both volume and surface parts into the weak form. 108 */ 109 static PetscErrorCode FillMatrixCPUCOO(FEStruct *fe,Mat A) 110 { 111 PetscErrorCode ierr; 112 PetscScalar *v,*s; 113 114 PetscFunctionBeginUser; 115 /* simulation of CPU based finite assembly process with COO */ 116 ierr = PetscMalloc1(3*3*fe->Ne,&v);CHKERRQ(ierr); 117 for (PetscInt e=0; e<fe->Ne; e++) { 118 s = v + fe->coo[e]; /* point to location in COO of current element stiffness */ 119 for (PetscInt vi=0; vi<3; vi++) { 120 for (PetscInt vj=0; vj<3; vj++) { 121 s[3*vi+vj] = vi+2*vj; 122 } 123 } 124 } 125 ierr = MatSetValuesCOO(A,v,ADD_VALUES);CHKERRQ(ierr); 126 ierr = PetscFree(v);CHKERRQ(ierr); 127 PetscFunctionReturn(0); 128 } 129 130 /* 131 Uses a multi-dimensional indexing technique that works for homogeneous meshes 132 such as single-topology with volume integral only. 133 */ 134 static PetscErrorCode FillMatrixCPUCOO3d(FEStruct *fe,Mat A) 135 { 136 PetscErrorCode ierr; 137 PetscScalar (*s)[3][3]; 138 139 PetscFunctionBeginUser; 140 /* simulation of CPU based finite assembly process with COO */ 141 ierr = PetscMalloc1(fe->Ne,&s);CHKERRQ(ierr); 142 for (PetscInt e=0; e<fe->Ne; e++) { 143 for (PetscInt vi=0; vi<3; vi++) { 144 for (PetscInt vj=0; vj<3; vj++) { 145 s[e][vi][vj] = vi+2*vj; 146 } 147 } 148 } 149 ierr = MatSetValuesCOO(A,(PetscScalar*)s,INSERT_VALUES);CHKERRQ(ierr); 150 ierr = PetscFree(s);CHKERRQ(ierr); 151 PetscFunctionReturn(0); 152 } 153 154 int main(int argc, char **args) 155 { 156 Mat A; 157 PetscErrorCode ierr; 158 FEStruct fe; 159 PetscMPIInt size; 160 PetscBool is_kokkos,is_cuda; 161 162 ierr = PetscInitialize(&argc,&args,(char*)0,help);if (ierr) return ierr; 163 ierr = MPI_Comm_size(PETSC_COMM_WORLD,&size);CHKERRMPI(ierr); 164 PetscCheck(size <= 1,PETSC_COMM_WORLD,PETSC_ERR_WRONG_MPI_SIZE,"Demonstration is only for sequential runs"); 165 166 ierr = CreateFEStruct(&fe);CHKERRQ(ierr); 167 ierr = CreateMatrix(&fe,&A);CHKERRQ(ierr); 168 169 ierr = FillMatrixCPU(&fe,A);CHKERRQ(ierr); 170 ierr = MatView(A,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); 171 172 ierr = MatZeroEntries(A);CHKERRQ(ierr); 173 ierr = FillMatrixCPUCOO(&fe,A);CHKERRQ(ierr); 174 ierr = MatView(A,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); 175 176 ierr = MatZeroEntries(A);CHKERRQ(ierr); 177 ierr = FillMatrixCPUCOO3d(&fe,A);CHKERRQ(ierr); 178 ierr = MatView(A,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); 179 180 ierr = MatZeroEntries(A);CHKERRQ(ierr); 181 ierr = PetscObjectBaseTypeCompare((PetscObject)A,MATSEQAIJKOKKOS,&is_kokkos);CHKERRQ(ierr); 182 ierr = PetscObjectBaseTypeCompare((PetscObject)A,MATSEQAIJCUSPARSE,&is_cuda);CHKERRQ(ierr); 183 #if defined(PETSC_HAVE_KOKKOS) 184 if (is_kokkos) {ierr = FillMatrixKokkosCOO(&fe,A);CHKERRQ(ierr);} 185 #endif 186 #if defined(PETSC_HAVE_CUDA) 187 if (is_cuda) {ierr = FillMatrixCUDACOO(&fe,A);CHKERRQ(ierr);} 188 #endif 189 ierr = MatView(A,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); 190 191 ierr = MatDestroy(&A);CHKERRQ(ierr); 192 ierr = DestroyFEStruct(&fe);CHKERRQ(ierr); 193 ierr = PetscFinalize(); 194 return ierr; 195 } 196 197 /*TEST 198 build: 199 requires: cuda kokkos_kernels 200 depends: ex18cu.cu ex18kok.kokkos.cxx 201 202 testset: 203 filter: grep -v "type" 204 output_file: output/ex18_1.out 205 206 test: 207 suffix: kok 208 requires: kokkos_kernels 209 args: -mat_type aijkokkos 210 211 test: 212 suffix: cuda 213 requires: cuda 214 args: -mat_type aijcusparse 215 216 TEST*/ 217