1 static char help[] = "Benchmark for MatMatMult() of AIJ matrices using different 2d finite-difference stencils.\n\n"; 2 3 #include <petscmat.h> 4 5 /* Converts 3d grid coordinates (i,j,k) for a grid of size m \times n to global indexing. Pass k = 0 for a 2d grid. */ 6 int global_index(PetscInt i,PetscInt j,PetscInt k, PetscInt m, PetscInt n) { return i + j * m + k * m * n; } 7 8 int main(int argc,char **argv) 9 { 10 Mat A,B,C,PtAP,PtAP_copy,PtAP_squared; 11 PetscInt i,M,N,Istart,Iend,n=7,j,J,Ii,m=8,k,o=1; 12 PetscScalar v; 13 PetscErrorCode ierr; 14 PetscBool equal=PETSC_FALSE,mat_view=PETSC_FALSE; 15 char stencil[PETSC_MAX_PATH_LEN]; 16 #if defined(PETSC_USE_LOG) 17 PetscLogStage fullMatMatMultStage; 18 #endif 19 20 ierr = PetscInitialize(&argc,&argv,(char*)0,help);if (ierr) return ierr; 21 ierr = PetscOptionsGetInt(NULL,NULL,"-m",&m,NULL);CHKERRQ(ierr); 22 ierr = PetscOptionsGetInt(NULL,NULL,"-n",&n,NULL);CHKERRQ(ierr); 23 ierr = PetscOptionsGetInt(NULL,NULL,"-o",&o,NULL);CHKERRQ(ierr); 24 ierr = PetscOptionsHasName(NULL,NULL,"-result_view",&mat_view);CHKERRQ(ierr); 25 ierr = PetscOptionsGetString(NULL,NULL,"-stencil",stencil,sizeof(stencil),NULL);CHKERRQ(ierr); 26 27 /* Create a aij matrix A */ 28 M = N = m*n*o; 29 ierr = MatCreate(PETSC_COMM_WORLD,&A);CHKERRQ(ierr); 30 ierr = MatSetSizes(A,PETSC_DECIDE,PETSC_DECIDE,M,N);CHKERRQ(ierr); 31 ierr = MatSetType(A,MATAIJ);CHKERRQ(ierr); 32 ierr = MatSetFromOptions(A);CHKERRQ(ierr); 33 34 /* Consistency checks */ 35 if (o < 1 || m < 1 || n < 1) SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_USER,"Dimensions need to be larger than zero!"); 36 37 /************ 2D stencils ***************/ 38 ierr = PetscStrcmp(stencil, "2d5point", &equal);CHKERRQ(ierr); 39 if (equal) { /* 5-point stencil, 2D */ 40 ierr = MatMPIAIJSetPreallocation(A,5,NULL,5,NULL);CHKERRQ(ierr); 41 ierr = MatSeqAIJSetPreallocation(A,5,NULL);CHKERRQ(ierr); 42 ierr = MatGetOwnershipRange(A,&Istart,&Iend);CHKERRQ(ierr); 43 for (Ii=Istart; Ii<Iend; Ii++) { 44 v = -1.0; k = Ii / (m*n); j = (Ii - k * m * n) / m; i = (Ii - k * m * n - j * m); 45 if (i>0) {J = global_index(i-1,j,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 46 if (i<m-1) {J = global_index(i+1,j,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 47 if (j>0) {J = global_index(i,j-1,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 48 if (j<n-1) {J = global_index(i,j+1,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 49 v = 4.0; ierr = MatSetValues(A,1,&Ii,1,&Ii,&v,INSERT_VALUES);CHKERRQ(ierr); 50 } 51 } 52 ierr = PetscStrcmp(stencil, "2d9point", &equal);CHKERRQ(ierr); 53 if (equal) { /* 9-point stencil, 2D */ 54 ierr = MatMPIAIJSetPreallocation(A,9,NULL,9,NULL);CHKERRQ(ierr); 55 ierr = MatSeqAIJSetPreallocation(A,9,NULL);CHKERRQ(ierr); 56 ierr = MatGetOwnershipRange(A,&Istart,&Iend);CHKERRQ(ierr); 57 for (Ii=Istart; Ii<Iend; Ii++) { 58 v = -1.0; k = Ii / (m*n); j = (Ii - k * m * n) / m; i = (Ii - k * m * n - j * m); 59 if (i>0) {J = global_index(i-1,j, k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 60 if (i>0 && j>0) {J = global_index(i-1,j-1,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 61 if (j>0) {J = global_index(i, j-1,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 62 if (i<m-1 && j>0) {J = global_index(i+1,j-1,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 63 if (i<m-1) {J = global_index(i+1,j, k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 64 if (i<m-1 && j<n-1) {J = global_index(i+1,j+1,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 65 if (j<n-1) {J = global_index(i, j+1,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 66 if (i>0 && j<n-1) {J = global_index(i-1,j+1,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 67 v = 8.0; ierr = MatSetValues(A,1,&Ii,1,&Ii,&v,INSERT_VALUES);CHKERRQ(ierr); 68 } 69 } 70 ierr = PetscStrcmp(stencil, "2d9point2", &equal);CHKERRQ(ierr); 71 if (equal) { /* 9-point Cartesian stencil (width 2 per coordinate), 2D */ 72 ierr = MatMPIAIJSetPreallocation(A,9,NULL,9,NULL);CHKERRQ(ierr); 73 ierr = MatSeqAIJSetPreallocation(A,9,NULL);CHKERRQ(ierr); 74 ierr = MatGetOwnershipRange(A,&Istart,&Iend);CHKERRQ(ierr); 75 for (Ii=Istart; Ii<Iend; Ii++) { 76 v = -1.0; k = Ii / (m*n); j = (Ii - k * m * n) / m; i = (Ii - k * m * n - j * m); 77 if (i>0) {J = global_index(i-1,j,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 78 if (i>1) {J = global_index(i-2,j,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 79 if (i<m-1) {J = global_index(i+1,j,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 80 if (i<m-2) {J = global_index(i+2,j,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 81 if (j>0) {J = global_index(i,j-1,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 82 if (j>1) {J = global_index(i,j-2,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 83 if (j<n-1) {J = global_index(i,j+1,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 84 if (j<n-2) {J = global_index(i,j+2,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 85 v = 8.0; ierr = MatSetValues(A,1,&Ii,1,&Ii,&v,INSERT_VALUES);CHKERRQ(ierr); 86 } 87 } 88 ierr = PetscStrcmp(stencil, "2d13point", &equal);CHKERRQ(ierr); 89 if (equal) { /* 13-point Cartesian stencil (width 3 per coordinate), 2D */ 90 ierr = MatMPIAIJSetPreallocation(A,13,NULL,13,NULL);CHKERRQ(ierr); 91 ierr = MatSeqAIJSetPreallocation(A,13,NULL);CHKERRQ(ierr); 92 ierr = MatGetOwnershipRange(A,&Istart,&Iend);CHKERRQ(ierr); 93 for (Ii=Istart; Ii<Iend; Ii++) { 94 v = -1.0; k = Ii / (m*n); j = (Ii - k * m * n) / m; i = (Ii - k * m * n - j * m); 95 if (i>0) {J = global_index(i-1,j,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 96 if (i>1) {J = global_index(i-2,j,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 97 if (i>2) {J = global_index(i-3,j,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 98 if (i<m-1) {J = global_index(i+1,j,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 99 if (i<m-2) {J = global_index(i+2,j,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 100 if (i<m-3) {J = global_index(i+3,j,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 101 if (j>0) {J = global_index(i,j-1,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 102 if (j>1) {J = global_index(i,j-2,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 103 if (j>2) {J = global_index(i,j-3,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 104 if (j<n-1) {J = global_index(i,j+1,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 105 if (j<n-2) {J = global_index(i,j+2,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 106 if (j<n-3) {J = global_index(i,j+3,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 107 v = 12.0; ierr = MatSetValues(A,1,&Ii,1,&Ii,&v,INSERT_VALUES);CHKERRQ(ierr); 108 } 109 } 110 /************ 3D stencils ***************/ 111 ierr = PetscStrcmp(stencil, "3d7point", &equal);CHKERRQ(ierr); 112 if (equal) { /* 7-point stencil, 3D */ 113 ierr = MatMPIAIJSetPreallocation(A,7,NULL,7,NULL);CHKERRQ(ierr); 114 ierr = MatSeqAIJSetPreallocation(A,7,NULL);CHKERRQ(ierr); 115 ierr = MatGetOwnershipRange(A,&Istart,&Iend);CHKERRQ(ierr); 116 for (Ii=Istart; Ii<Iend; Ii++) { 117 v = -1.0; k = Ii / (m*n); j = (Ii - k * m * n) / m; i = (Ii - k * m * n - j * m); 118 if (i>0) {J = global_index(i-1,j,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 119 if (i<m-1) {J = global_index(i+1,j,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 120 if (j>0) {J = global_index(i,j-1,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 121 if (j<n-1) {J = global_index(i,j+1,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 122 if (k>0) {J = global_index(i,j,k-1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 123 if (k<o-1) {J = global_index(i,j,k+1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 124 v = 6.0; ierr = MatSetValues(A,1,&Ii,1,&Ii,&v,INSERT_VALUES);CHKERRQ(ierr); 125 } 126 } 127 ierr = PetscStrcmp(stencil, "3d13point", &equal);CHKERRQ(ierr); 128 if (equal) { /* 13-point stencil, 3D */ 129 ierr = MatMPIAIJSetPreallocation(A,13,NULL,13,NULL);CHKERRQ(ierr); 130 ierr = MatSeqAIJSetPreallocation(A,13,NULL);CHKERRQ(ierr); 131 ierr = MatGetOwnershipRange(A,&Istart,&Iend);CHKERRQ(ierr); 132 for (Ii=Istart; Ii<Iend; Ii++) { 133 v = -1.0; k = Ii / (m*n); j = (Ii - k * m * n) / m; i = (Ii - k * m * n - j * m); 134 if (i>0) {J = global_index(i-1,j,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 135 if (i>1) {J = global_index(i-2,j,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 136 if (i<m-1) {J = global_index(i+1,j,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 137 if (i<m-2) {J = global_index(i+2,j,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 138 if (j>0) {J = global_index(i,j-1,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 139 if (j>1) {J = global_index(i,j-2,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 140 if (j<n-1) {J = global_index(i,j+1,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 141 if (j<n-2) {J = global_index(i,j+2,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 142 if (k>0) {J = global_index(i,j,k-1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 143 if (k>1) {J = global_index(i,j,k-2,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 144 if (k<o-1) {J = global_index(i,j,k+1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 145 if (k<o-2) {J = global_index(i,j,k+2,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 146 v = 12.0; ierr = MatSetValues(A,1,&Ii,1,&Ii,&v,INSERT_VALUES);CHKERRQ(ierr); 147 } 148 } 149 ierr = PetscStrcmp(stencil, "3d19point", &equal);CHKERRQ(ierr); 150 if (equal) { /* 19-point stencil, 3D */ 151 ierr = MatMPIAIJSetPreallocation(A,19,NULL,19,NULL);CHKERRQ(ierr); 152 ierr = MatSeqAIJSetPreallocation(A,19,NULL);CHKERRQ(ierr); 153 ierr = MatGetOwnershipRange(A,&Istart,&Iend);CHKERRQ(ierr); 154 for (Ii=Istart; Ii<Iend; Ii++) { 155 v = -1.0; k = Ii / (m*n); j = (Ii - k * m * n) / m; i = (Ii - k * m * n - j * m); 156 /* one hop */ 157 if (i>0) {J = global_index(i-1,j,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 158 if (i<m-1) {J = global_index(i+1,j,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 159 if (j>0) {J = global_index(i,j-1,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 160 if (j<n-1) {J = global_index(i,j+1,k,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 161 if (k>0) {J = global_index(i,j,k-1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 162 if (k<o-1) {J = global_index(i,j,k+1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 163 /* two hops */ 164 if (i>0 && j>0) {J = global_index(i-1,j-1,k ,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 165 if (i>0 && k>0) {J = global_index(i-1,j, k-1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 166 if (i>0 && j<n-1) {J = global_index(i-1,j+1,k ,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 167 if (i>0 && k<o-1) {J = global_index(i-1,j, k+1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 168 if (i<m-1 && j>0) {J = global_index(i+1,j-1,k ,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 169 if (i<m-1 && k>0) {J = global_index(i+1,j, k-1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 170 if (i<m-1 && j<n-1) {J = global_index(i+1,j+1,k ,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 171 if (i<m-1 && k<o-1) {J = global_index(i+1,j, k+1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 172 if (j>0 && k>0) {J = global_index(i, j-1,k-1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 173 if (j>0 && k<o-1) {J = global_index(i, j-1,k+1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 174 if (j<n-1 && k>0) {J = global_index(i, j+1,k-1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 175 if (j<n-1 && k<o-1) {J = global_index(i, j+1,k+1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 176 v = 18.0; ierr = MatSetValues(A,1,&Ii,1,&Ii,&v,INSERT_VALUES);CHKERRQ(ierr); 177 } 178 } 179 ierr = PetscStrcmp(stencil, "3d27point", &equal);CHKERRQ(ierr); 180 if (equal) { /* 27-point stencil, 3D */ 181 ierr = MatMPIAIJSetPreallocation(A,27,NULL,27,NULL);CHKERRQ(ierr); 182 ierr = MatSeqAIJSetPreallocation(A,27,NULL);CHKERRQ(ierr); 183 ierr = MatGetOwnershipRange(A,&Istart,&Iend);CHKERRQ(ierr); 184 for (Ii=Istart; Ii<Iend; Ii++) { 185 v = -1.0; k = Ii / (m*n); j = (Ii - k * m * n) / m; i = (Ii - k * m * n - j * m); 186 if (k>0) { 187 if (j>0) { 188 if (i>0) {J = global_index(i-1,j-1,k-1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 189 J = global_index(i, j-1,k-1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr); 190 if (i<m-1) {J = global_index(i+1,j-1,k-1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 191 } 192 { 193 if (i>0) {J = global_index(i-1,j, k-1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 194 J = global_index(i, j, k-1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr); 195 if (i<m-1) {J = global_index(i+1,j, k-1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 196 } 197 if (j<n-1) { 198 if (i>0) {J = global_index(i-1,j+1,k-1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 199 J = global_index(i, j+1,k-1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr); 200 if (i<m-1) {J = global_index(i+1,j+1,k-1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 201 } 202 } 203 { 204 if (j>0) { 205 if (i>0) {J = global_index(i-1,j-1,k ,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 206 J = global_index(i, j-1,k ,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr); 207 if (i<m-1) {J = global_index(i+1,j-1,k ,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 208 } 209 { 210 if (i>0) {J = global_index(i-1,j, k ,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 211 J = global_index(i, j, k ,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr); 212 if (i<m-1) {J = global_index(i+1,j, k ,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 213 } 214 if (j<n-1) { 215 if (i>0) {J = global_index(i-1,j+1,k ,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 216 J = global_index(i, j+1,k ,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr); 217 if (i<m-1) {J = global_index(i+1,j+1,k ,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 218 } 219 } 220 if (k<o-1) { 221 if (j>0) { 222 if (i>0) {J = global_index(i-1,j-1,k+1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 223 J = global_index(i, j-1,k+1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr); 224 if (i<m-1) {J = global_index(i+1,j-1,k+1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 225 } 226 { 227 if (i>0) {J = global_index(i-1,j, k+1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 228 J = global_index(i, j, k+1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr); 229 if (i<m-1) {J = global_index(i+1,j, k+1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 230 } 231 if (j<n-1) { 232 if (i>0) {J = global_index(i-1,j+1,k+1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 233 J = global_index(i, j+1,k+1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr); 234 if (i<m-1) {J = global_index(i+1,j+1,k+1,m,n); ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} 235 } 236 } 237 v = 26.0; ierr = MatSetValues(A,1,&Ii,1,&Ii,&v,INSERT_VALUES);CHKERRQ(ierr); 238 } 239 } 240 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 241 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 242 243 /* Copy A into B in order to have a more representative benchmark (A*A has more cache hits than A*B) */ 244 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 245 246 ierr = PetscLogStageRegister("Full MatMatMult",&fullMatMatMultStage);CHKERRQ(ierr); 247 248 /* Test C = A*B */ 249 ierr = PetscLogStagePush(fullMatMatMultStage);CHKERRQ(ierr); 250 ierr = MatMatMult(A,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&C);CHKERRQ(ierr); 251 252 /* Test PtAP_squared = PtAP(C,C)*PtAP(C,C) */ 253 ierr = MatPtAP(C,C,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&PtAP);CHKERRQ(ierr); 254 ierr = MatDuplicate(PtAP,MAT_COPY_VALUES,&PtAP_copy);CHKERRQ(ierr); 255 ierr = MatMatMult(PtAP,PtAP_copy,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&PtAP_squared);CHKERRQ(ierr); 256 257 ierr = MatView(C,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); 258 ierr = MatView(PtAP_squared,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); 259 260 ierr = MatDestroy(&PtAP_squared);CHKERRQ(ierr); 261 ierr = MatDestroy(&PtAP_copy);CHKERRQ(ierr); 262 ierr = MatDestroy(&PtAP);CHKERRQ(ierr); 263 ierr = MatDestroy(&C);CHKERRQ(ierr); 264 ierr = MatDestroy(&B);CHKERRQ(ierr); 265 ierr = MatDestroy(&A);CHKERRQ(ierr); 266 ierr = PetscFinalize(); 267 return ierr; 268 } 269 270 /*TEST 271 272 test: 273 suffix: 1 274 nsize: 1 275 args: -m 8 -n 8 -stencil 2d5point -matmatmult_via sorted 276 277 test: 278 suffix: 2 279 nsize: 1 280 args: -m 5 -n 5 -o 5 -stencil 3d27point -matmatmult_via rowmerge 281 282 test: 283 suffix: 3 284 nsize: 4 285 args: -m 6 -n 6 -stencil 2d5point -matmatmult_via seqmpi 286 287 TEST*/ 288