1 2 #include "petsc.h" 3 #include "petscsys.h" 4 5 extern int BlastCache(void); 6 extern int test1(void); 7 extern int test2(void); 8 9 #undef __FUNCT__ 10 #define __FUNCT__ "main" 11 int main(int argc,char **argv) 12 { 13 PetscErrorCode ierr; 14 15 PetscInitialize(&argc,&argv,0,0); 16 17 ierr = test1();CHKERRQ(ierr); 18 ierr = test2();CHKERRQ(ierr); 19 20 ierr = PetscFinalize();CHKERRQ(ierr); 21 PetscFunctionReturn(0); 22 } 23 24 #undef __FUNCT__ 25 #define __FUNCT__ "test1" 26 int test1(void) 27 { 28 PetscLogDouble t1,t2; 29 double value; 30 int i,ierr,*z,*zi,intval; 31 PetscScalar *x,*y; 32 PetscRandom r; 33 34 ierr = PetscRandomCreate(PETSC_COMM_SELF,&r);CHKERRQ(ierr); 35 ierr = PetscRandomSetFromOptions(r);CHKERRQ(ierr); 36 ierr = PetscMalloc(20000*sizeof(PetscScalar),&x);CHKERRQ(ierr); 37 ierr = PetscMalloc(20000*sizeof(PetscScalar),&y);CHKERRQ(ierr); 38 39 ierr = PetscMalloc(2000*sizeof(int),&z);CHKERRQ(ierr); 40 ierr = PetscMalloc(2000*sizeof(int),&zi);CHKERRQ(ierr); 41 42 43 44 /* Take care of paging effects */ 45 ierr = PetscGetTime(&t1);CHKERRQ(ierr); 46 47 /* Form the random set of integers */ 48 for (i=0; i<2000; i++) { 49 ierr = PetscRandomGetValue(r,&value);CHKERRQ(ierr); 50 intval = (int)(value*20000.0); 51 z[i] = intval; 52 } 53 54 for (i=0; i<2000; i++) { 55 ierr = PetscRandomGetValue(r,&value);CHKERRQ(ierr); 56 intval = (int)(value*20000.0); 57 zi[i] = intval; 58 } 59 /* fprintf(stdout,"Done setup\n"); */ 60 61 ierr = BlastCache();CHKERRQ(ierr); 62 63 ierr = PetscGetTime(&t1);CHKERRQ(ierr); 64 for (i=0; i<2000; i++) { x[i] = y[i]; } 65 ierr = PetscGetTime(&t2);CHKERRQ(ierr); 66 fprintf(stdout,"%-27s : %e sec\n","x[i] = y[i]",(t2-t1)/2000.0); 67 68 ierr = BlastCache();CHKERRQ(ierr); 69 70 ierr = PetscGetTime(&t1);CHKERRQ(ierr); 71 for (i=0; i<500; i+=4) { 72 x[i] = y[z[i]]; 73 x[1+i] = y[z[1+i]]; 74 x[2+i] = y[z[2+i]]; 75 x[3+i] = y[z[3+i]]; 76 } 77 ierr = PetscGetTime(&t2);CHKERRQ(ierr); 78 fprintf(stdout,"%-27s : %e sec\n","x[i] = y[idx[i]] - unroll 4",(t2-t1)/2000.0); 79 80 ierr = BlastCache();CHKERRQ(ierr); 81 82 ierr = PetscGetTime(&t1);CHKERRQ(ierr) 83 for (i=0; i<2000; i++) { x[i] = y[z[i]]; } 84 ierr = PetscGetTime(&t2);CHKERRQ(ierr); 85 fprintf(stdout,"%-27s : %e sec\n","x[i] = y[idx[i]]",(t2-t1)/2000.0); 86 87 ierr = BlastCache();CHKERRQ(ierr); 88 89 ierr = PetscGetTime(&t1);CHKERRQ(ierr); 90 for (i=0; i<1000; i+=2) { x[i] = y[z[i]]; x[1+i] = y[z[1+i]]; } 91 ierr = PetscGetTime(&t2);CHKERRQ(ierr); 92 fprintf(stdout,"%-27s : %e sec\n","x[i] = y[idx[i]] - unroll 2",(t2-t1)/2000.0); 93 94 ierr = BlastCache();CHKERRQ(ierr); 95 96 ierr = PetscGetTime(&t1);CHKERRQ(ierr); 97 for (i=0; i<2000; i++) { x[z[i]] = y[i]; } 98 ierr = PetscGetTime(&t2);CHKERRQ(ierr); 99 fprintf(stdout,"%-27s : %e sec\n","x[z[i]] = y[i]",(t2-t1)/2000.0); 100 101 ierr = BlastCache();CHKERRQ(ierr); 102 103 ierr = PetscGetTime(&t1);CHKERRQ(ierr); 104 for (i=0; i<2000; i++) { x[z[i]] = y[zi[i]]; } 105 ierr = PetscGetTime(&t2);CHKERRQ(ierr); 106 fprintf(stdout,"%-27s : %e sec\n","x[z[i]] = y[zi[i]]",(t2-t1)/2000.0); 107 108 ierr = PetscMemcpy(x,y,10);CHKERRQ(ierr); 109 ierr = PetscMemcpy(z,zi,10);CHKERRQ(ierr); 110 ierr = PetscFree(z);CHKERRQ(ierr); 111 ierr = PetscFree(zi);CHKERRQ(ierr); 112 ierr = PetscFree(x);CHKERRQ(ierr); 113 ierr = PetscFree(y);CHKERRQ(ierr); 114 PetscRandomDestroy(r); 115 PetscFunctionReturn(0); 116 } 117 118 #undef __FUNCT__ 119 #define __FUNCT__ "test2" 120 int test2(void) 121 { 122 PetscLogDouble t1,t2; 123 double value; 124 int i,ierr,z[20000],zi[20000],intval,tmp; 125 PetscScalar x[20000],y[20000]; 126 PetscRandom r; 127 128 ierr = PetscRandomCreate(PETSC_COMM_SELF,&r);CHKERRQ(ierr); 129 ierr = PetscRandomSetFromOptions(r);CHKERRQ(ierr); 130 131 /* Take care of paging effects */ 132 ierr = PetscGetTime(&t1);CHKERRQ(ierr); 133 134 for (i=0; i<20000; i++) { 135 x[i] = i; 136 y[i] = i; 137 z[i] = i; 138 zi[i] = i; 139 } 140 141 /* Form the random set of integers */ 142 for (i=0; i<20000; i++) { 143 ierr = PetscRandomGetValue(r,&value);CHKERRQ(ierr); 144 intval = (int)(value*20000.0); 145 tmp = z[i]; 146 z[i] = z[intval]; 147 z[intval] = tmp; 148 } 149 150 for (i=0; i<20000; i++) { 151 ierr = PetscRandomGetValue(r,&value);CHKERRQ(ierr); 152 intval = (int)(value*20000.0); 153 tmp = zi[i]; 154 zi[i] = zi[intval]; 155 zi[intval] = tmp; 156 } 157 /* fprintf(stdout,"Done setup\n"); */ 158 159 /* ierr = BlastCache();CHKERRQ(ierr); */ 160 161 ierr = PetscGetTime(&t1);CHKERRQ(ierr); 162 for (i=0; i<2000; i++) { x[i] = y[i]; } 163 ierr = PetscGetTime(&t2);CHKERRQ(ierr); 164 fprintf(stdout,"%-27s : %e sec\n","x[i] = y[i]",(t2-t1)/2000.0); 165 166 /* ierr = BlastCache();CHKERRQ(ierr); */ 167 168 ierr = PetscGetTime(&t1);CHKERRQ(ierr); 169 for (i=0; i<2000; i++) { y[i] = x[z[i]]; } 170 ierr = PetscGetTime(&t2);CHKERRQ(ierr); 171 fprintf(stdout,"%-27s : %e sec\n","x[i] = y[idx[i]]",(t2-t1)/2000.0); 172 173 /* ierr = BlastCache();CHKERRQ(ierr); */ 174 175 ierr = PetscGetTime(&t1);CHKERRQ(ierr); 176 for (i=0; i<2000; i++) { x[z[i]] = y[i]; } 177 ierr = PetscGetTime(&t2);CHKERRQ(ierr); 178 fprintf(stdout,"%-27s : %e sec\n","x[z[i]] = y[i]",(t2-t1)/2000.0); 179 180 /* ierr = BlastCache();CHKERRQ(ierr); */ 181 182 ierr = PetscGetTime(&t1);CHKERRQ(ierr); 183 for (i=0; i<2000; i++) { y[z[i]] = x[zi[i]]; } 184 ierr = PetscGetTime(&t2);CHKERRQ(ierr); 185 fprintf(stdout,"%-27s : %e sec\n","x[z[i]] = y[zi[i]]",(t2-t1)/2000.0); 186 187 188 PetscRandomDestroy(r); 189 PetscFunctionReturn(0); 190 } 191 192 #undef __FUNCT__ 193 #define __FUNCT__ "BlastCache" 194 int BlastCache(void) 195 { 196 int i,ierr,n = 1000000; 197 PetscScalar *x,*y,*z,*a,*b; 198 199 ierr = PetscMalloc(5*n*sizeof(PetscScalar),&x);CHKERRQ(ierr); 200 y = x + n; 201 z = y + n; 202 a = z + n; 203 b = a + n; 204 205 for (i=0; i<n; i++) { 206 a[i] = (PetscScalar) i; 207 y[i] = (PetscScalar) i; 208 z[i] = (PetscScalar) i; 209 b[i] = (PetscScalar) i; 210 x[i] = (PetscScalar) i; 211 } 212 213 for (i=0; i<n; i++) { 214 a[i] = 3.0*x[i] + 2.0*y[i] + 3.3*z[i] - 25.*b[i]; 215 } 216 for (i=0; i<n; i++) { 217 b[i] = 3.0*x[i] + 2.0*y[i] + 3.3*a[i] - 25.*b[i]; 218 } 219 for (i=0; i<n; i++) { 220 z[i] = 3.0*x[i] + 2.0*y[i] + 3.3*a[i] - 25.*b[i]; 221 } 222 ierr = PetscFree(x);CHKERRQ(ierr); 223 PetscFunctionReturn(0); 224 } 225