1 2 #include <petscsys.h> 3 4 extern int BlastCache(void); 5 extern int test1(void); 6 extern int test2(void); 7 8 #undef __FUNCT__ 9 #define __FUNCT__ "main" 10 int main(int argc,char **argv) 11 { 12 PetscErrorCode ierr; 13 14 PetscInitialize(&argc,&argv,0,0); 15 16 ierr = test1();CHKERRQ(ierr); 17 ierr = test2();CHKERRQ(ierr); 18 19 ierr = PetscFinalize(); 20 PetscFunctionReturn(0); 21 } 22 23 #undef __FUNCT__ 24 #define __FUNCT__ "test1" 25 int test1(void) 26 { 27 PetscLogDouble t1,t2; 28 double value; 29 int i,ierr,*z,*zi,intval; 30 PetscScalar *x,*y; 31 PetscRandom r; 32 33 ierr = PetscRandomCreate(PETSC_COMM_SELF,&r);CHKERRQ(ierr); 34 ierr = PetscRandomSetFromOptions(r);CHKERRQ(ierr); 35 ierr = PetscMalloc(20000*sizeof(PetscScalar),&x);CHKERRQ(ierr); 36 ierr = PetscMalloc(20000*sizeof(PetscScalar),&y);CHKERRQ(ierr); 37 38 ierr = PetscMalloc(2000*sizeof(int),&z);CHKERRQ(ierr); 39 ierr = PetscMalloc(2000*sizeof(int),&zi);CHKERRQ(ierr); 40 41 42 43 /* Take care of paging effects */ 44 ierr = PetscGetTime(&t1);CHKERRQ(ierr); 45 46 /* Form the random set of integers */ 47 for (i=0; i<2000; i++) { 48 ierr = PetscRandomGetValue(r,&value);CHKERRQ(ierr); 49 intval = (int)(value*20000.0); 50 z[i] = intval; 51 } 52 53 for (i=0; i<2000; i++) { 54 ierr = PetscRandomGetValue(r,&value);CHKERRQ(ierr); 55 intval = (int)(value*20000.0); 56 zi[i] = intval; 57 } 58 /* fprintf(stdout,"Done setup\n"); */ 59 60 ierr = BlastCache();CHKERRQ(ierr); 61 62 ierr = PetscGetTime(&t1);CHKERRQ(ierr); 63 for (i=0; i<2000; i++) { x[i] = y[i]; } 64 ierr = PetscGetTime(&t2);CHKERRQ(ierr); 65 fprintf(stdout,"%-27s : %e sec\n","x[i] = y[i]",(t2-t1)/2000.0); 66 67 ierr = BlastCache();CHKERRQ(ierr); 68 69 ierr = PetscGetTime(&t1);CHKERRQ(ierr); 70 for (i=0; i<500; i+=4) { 71 x[i] = y[z[i]]; 72 x[1+i] = y[z[1+i]]; 73 x[2+i] = y[z[2+i]]; 74 x[3+i] = y[z[3+i]]; 75 } 76 ierr = PetscGetTime(&t2);CHKERRQ(ierr); 77 fprintf(stdout,"%-27s : %e sec\n","x[i] = y[idx[i]] - unroll 4",(t2-t1)/2000.0); 78 79 ierr = BlastCache();CHKERRQ(ierr); 80 81 ierr = PetscGetTime(&t1);CHKERRQ(ierr) 82 for (i=0; i<2000; i++) { x[i] = y[z[i]]; } 83 ierr = PetscGetTime(&t2);CHKERRQ(ierr); 84 fprintf(stdout,"%-27s : %e sec\n","x[i] = y[idx[i]]",(t2-t1)/2000.0); 85 86 ierr = BlastCache();CHKERRQ(ierr); 87 88 ierr = PetscGetTime(&t1);CHKERRQ(ierr); 89 for (i=0; i<1000; i+=2) { x[i] = y[z[i]]; x[1+i] = y[z[1+i]]; } 90 ierr = PetscGetTime(&t2);CHKERRQ(ierr); 91 fprintf(stdout,"%-27s : %e sec\n","x[i] = y[idx[i]] - unroll 2",(t2-t1)/2000.0); 92 93 ierr = BlastCache();CHKERRQ(ierr); 94 95 ierr = PetscGetTime(&t1);CHKERRQ(ierr); 96 for (i=0; i<2000; i++) { x[z[i]] = y[i]; } 97 ierr = PetscGetTime(&t2);CHKERRQ(ierr); 98 fprintf(stdout,"%-27s : %e sec\n","x[z[i]] = y[i]",(t2-t1)/2000.0); 99 100 ierr = BlastCache();CHKERRQ(ierr); 101 102 ierr = PetscGetTime(&t1);CHKERRQ(ierr); 103 for (i=0; i<2000; i++) { x[z[i]] = y[zi[i]]; } 104 ierr = PetscGetTime(&t2);CHKERRQ(ierr); 105 fprintf(stdout,"%-27s : %e sec\n","x[z[i]] = y[zi[i]]",(t2-t1)/2000.0); 106 107 ierr = PetscMemcpy(x,y,10);CHKERRQ(ierr); 108 ierr = PetscMemcpy(z,zi,10);CHKERRQ(ierr); 109 ierr = PetscFree(z);CHKERRQ(ierr); 110 ierr = PetscFree(zi);CHKERRQ(ierr); 111 ierr = PetscFree(x);CHKERRQ(ierr); 112 ierr = PetscFree(y);CHKERRQ(ierr); 113 ierr = PetscRandomDestroy(&r);CHKERRQ(ierr); 114 PetscFunctionReturn(0); 115 } 116 117 #undef __FUNCT__ 118 #define __FUNCT__ "test2" 119 int test2(void) 120 { 121 PetscLogDouble t1,t2; 122 double value; 123 int i,ierr,z[20000],zi[20000],intval,tmp; 124 PetscScalar x[20000],y[20000]; 125 PetscRandom r; 126 127 ierr = PetscRandomCreate(PETSC_COMM_SELF,&r);CHKERRQ(ierr); 128 ierr = PetscRandomSetFromOptions(r);CHKERRQ(ierr); 129 130 /* Take care of paging effects */ 131 ierr = PetscGetTime(&t1);CHKERRQ(ierr); 132 133 for (i=0; i<20000; i++) { 134 x[i] = i; 135 y[i] = i; 136 z[i] = i; 137 zi[i] = i; 138 } 139 140 /* Form the random set of integers */ 141 for (i=0; i<20000; i++) { 142 ierr = PetscRandomGetValue(r,&value);CHKERRQ(ierr); 143 intval = (int)(value*20000.0); 144 tmp = z[i]; 145 z[i] = z[intval]; 146 z[intval] = tmp; 147 } 148 149 for (i=0; i<20000; i++) { 150 ierr = PetscRandomGetValue(r,&value);CHKERRQ(ierr); 151 intval = (int)(value*20000.0); 152 tmp = zi[i]; 153 zi[i] = zi[intval]; 154 zi[intval] = tmp; 155 } 156 /* fprintf(stdout,"Done setup\n"); */ 157 158 /* ierr = BlastCache();CHKERRQ(ierr); */ 159 160 ierr = PetscGetTime(&t1);CHKERRQ(ierr); 161 for (i=0; i<2000; i++) { x[i] = y[i]; } 162 ierr = PetscGetTime(&t2);CHKERRQ(ierr); 163 fprintf(stdout,"%-27s : %e sec\n","x[i] = y[i]",(t2-t1)/2000.0); 164 165 /* ierr = BlastCache();CHKERRQ(ierr); */ 166 167 ierr = PetscGetTime(&t1);CHKERRQ(ierr); 168 for (i=0; i<2000; i++) { y[i] = x[z[i]]; } 169 ierr = PetscGetTime(&t2);CHKERRQ(ierr); 170 fprintf(stdout,"%-27s : %e sec\n","x[i] = y[idx[i]]",(t2-t1)/2000.0); 171 172 /* ierr = BlastCache();CHKERRQ(ierr); */ 173 174 ierr = PetscGetTime(&t1);CHKERRQ(ierr); 175 for (i=0; i<2000; i++) { x[z[i]] = y[i]; } 176 ierr = PetscGetTime(&t2);CHKERRQ(ierr); 177 fprintf(stdout,"%-27s : %e sec\n","x[z[i]] = y[i]",(t2-t1)/2000.0); 178 179 /* ierr = BlastCache();CHKERRQ(ierr); */ 180 181 ierr = PetscGetTime(&t1);CHKERRQ(ierr); 182 for (i=0; i<2000; i++) { y[z[i]] = x[zi[i]]; } 183 ierr = PetscGetTime(&t2);CHKERRQ(ierr); 184 fprintf(stdout,"%-27s : %e sec\n","x[z[i]] = y[zi[i]]",(t2-t1)/2000.0); 185 186 187 ierr = PetscRandomDestroy(&r);CHKERRQ(ierr); 188 PetscFunctionReturn(0); 189 } 190 191 #undef __FUNCT__ 192 #define __FUNCT__ "BlastCache" 193 int BlastCache(void) 194 { 195 int i,ierr,n = 1000000; 196 PetscScalar *x,*y,*z,*a,*b; 197 198 ierr = PetscMalloc(5*n*sizeof(PetscScalar),&x);CHKERRQ(ierr); 199 y = x + n; 200 z = y + n; 201 a = z + n; 202 b = a + n; 203 204 for (i=0; i<n; i++) { 205 a[i] = (PetscScalar) i; 206 y[i] = (PetscScalar) i; 207 z[i] = (PetscScalar) i; 208 b[i] = (PetscScalar) i; 209 x[i] = (PetscScalar) i; 210 } 211 212 for (i=0; i<n; i++) { 213 a[i] = 3.0*x[i] + 2.0*y[i] + 3.3*z[i] - 25.*b[i]; 214 } 215 for (i=0; i<n; i++) { 216 b[i] = 3.0*x[i] + 2.0*y[i] + 3.3*a[i] - 25.*b[i]; 217 } 218 for (i=0; i<n; i++) { 219 z[i] = 3.0*x[i] + 2.0*y[i] + 3.3*a[i] - 25.*b[i]; 220 } 221 ierr = PetscFree(x);CHKERRQ(ierr); 222 PetscFunctionReturn(0); 223 } 224