1 #include <petscsys.h>
2 #include <petsctime.h>
3
4 extern int BlastCache(void);
5 extern int test1(void);
6 extern int test2(void);
7
main(int argc,char ** argv)8 int main(int argc, char **argv)
9 {
10 PetscCall(PetscInitialize(&argc, &argv, 0, 0));
11 PetscCall(test1());
12 PetscCall(test2());
13 PetscCall(PetscFinalize());
14 return 0;
15 }
16
test1(void)17 int test1(void)
18 {
19 PetscLogDouble t1, t2;
20 double value;
21 int i, ierr, *z, *zi, intval;
22 PetscScalar *x, *y;
23 PetscRandom r;
24
25 PetscCall(PetscRandomCreate(PETSC_COMM_SELF, &r));
26 PetscCall(PetscRandomSetFromOptions(r));
27 PetscCall(PetscMalloc1(20000, &x));
28 PetscCall(PetscMalloc1(20000, &y));
29
30 PetscCall(PetscMalloc1(2000, &z));
31 PetscCall(PetscMalloc1(2000, &zi));
32
33 /* Take care of paging effects */
34 PetscCall(PetscTime(&t1));
35
36 /* Form the random set of integers */
37 for (i = 0; i < 2000; i++) {
38 PetscCall(PetscRandomGetValue(r, &value));
39 intval = (int)(value * 20000.0);
40 z[i] = intval;
41 }
42
43 for (i = 0; i < 2000; i++) {
44 PetscCall(PetscRandomGetValue(r, &value));
45 intval = (int)(value * 20000.0);
46 zi[i] = intval;
47 }
48 /* fprintf(stdout,"Done setup\n"); */
49
50 PetscCall(BlastCache());
51
52 PetscCall(PetscTime(&t1));
53 for (i = 0; i < 2000; i++) x[i] = y[i];
54 PetscCall(PetscTime(&t2));
55 fprintf(stdout, "%-27s : %e sec\n", "x[i] = y[i]", (t2 - t1) / 2000.0);
56
57 PetscCall(BlastCache());
58
59 PetscCall(PetscTime(&t1));
60 for (i = 0; i < 500; i += 4) {
61 x[i] = y[z[i]];
62 x[1 + i] = y[z[1 + i]];
63 x[2 + i] = y[z[2 + i]];
64 x[3 + i] = y[z[3 + i]];
65 }
66 PetscCall(PetscTime(&t2));
67 fprintf(stdout, "%-27s : %e sec\n", "x[i] = y[idx[i]] - unroll 4", (t2 - t1) / 2000.0);
68
69 PetscCall(BlastCache());
70
71 PetscCall(PetscTime(&t1));
72 for (i = 0; i < 2000; i++) x[i] = y[z[i]];
73 PetscCall(PetscTime(&t2));
74 fprintf(stdout, "%-27s : %e sec\n", "x[i] = y[idx[i]]", (t2 - t1) / 2000.0);
75
76 PetscCall(BlastCache());
77
78 PetscCall(PetscTime(&t1));
79 for (i = 0; i < 1000; i += 2) {
80 x[i] = y[z[i]];
81 x[1 + i] = y[z[1 + i]];
82 }
83 PetscCall(PetscTime(&t2));
84 fprintf(stdout, "%-27s : %e sec\n", "x[i] = y[idx[i]] - unroll 2", (t2 - t1) / 2000.0);
85
86 PetscCall(BlastCache());
87
88 PetscCall(PetscTime(&t1));
89 for (i = 0; i < 2000; i++) x[z[i]] = y[i];
90 PetscCall(PetscTime(&t2));
91 fprintf(stdout, "%-27s : %e sec\n", "x[z[i]] = y[i]", (t2 - t1) / 2000.0);
92
93 PetscCall(BlastCache());
94
95 PetscCall(PetscTime(&t1));
96 for (i = 0; i < 2000; i++) x[z[i]] = y[zi[i]];
97 PetscCall(PetscTime(&t2));
98 fprintf(stdout, "%-27s : %e sec\n", "x[z[i]] = y[zi[i]]", (t2 - t1) / 2000.0);
99
100 PetscCall(PetscArraycpy(x, y, 10));
101 PetscCall(PetscArraycpy(z, zi, 10));
102 PetscCall(PetscFree(z));
103 PetscCall(PetscFree(zi));
104 PetscCall(PetscFree(x));
105 PetscCall(PetscFree(y));
106 PetscCall(PetscRandomDestroy(&r));
107 PetscFunctionReturn(PETSC_SUCCESS);
108 }
109
test2(void)110 int test2(void)
111 {
112 PetscLogDouble t1, t2;
113 double value;
114 int i, ierr, z[20000], zi[20000], intval, tmp;
115 PetscScalar x[20000], y[20000];
116 PetscRandom r;
117
118 PetscCall(PetscRandomCreate(PETSC_COMM_SELF, &r));
119 PetscCall(PetscRandomSetFromOptions(r));
120
121 /* Take care of paging effects */
122 PetscCall(PetscTime(&t1));
123
124 for (i = 0; i < 20000; i++) {
125 x[i] = i;
126 y[i] = i;
127 z[i] = i;
128 zi[i] = i;
129 }
130
131 /* Form the random set of integers */
132 for (i = 0; i < 20000; i++) {
133 PetscCall(PetscRandomGetValue(r, &value));
134 intval = (int)(value * 20000.0);
135 tmp = z[i];
136 z[i] = z[intval];
137 z[intval] = tmp;
138 }
139
140 for (i = 0; i < 20000; i++) {
141 PetscCall(PetscRandomGetValue(r, &value));
142 intval = (int)(value * 20000.0);
143 tmp = zi[i];
144 zi[i] = zi[intval];
145 zi[intval] = tmp;
146 }
147 /* fprintf(stdout,"Done setup\n"); */
148
149 /* PetscCall(BlastCache()); */
150
151 PetscCall(PetscTime(&t1));
152 for (i = 0; i < 2000; i++) x[i] = y[i];
153 PetscCall(PetscTime(&t2));
154 fprintf(stdout, "%-27s : %e sec\n", "x[i] = y[i]", (t2 - t1) / 2000.0);
155
156 /* PetscCall(BlastCache()); */
157
158 PetscCall(PetscTime(&t1));
159 for (i = 0; i < 2000; i++) y[i] = x[z[i]];
160 PetscCall(PetscTime(&t2));
161 fprintf(stdout, "%-27s : %e sec\n", "x[i] = y[idx[i]]", (t2 - t1) / 2000.0);
162
163 /* PetscCall(BlastCache()); */
164
165 PetscCall(PetscTime(&t1));
166 for (i = 0; i < 2000; i++) x[z[i]] = y[i];
167 PetscCall(PetscTime(&t2));
168 fprintf(stdout, "%-27s : %e sec\n", "x[z[i]] = y[i]", (t2 - t1) / 2000.0);
169
170 /* PetscCall(BlastCache()); */
171
172 PetscCall(PetscTime(&t1));
173 for (i = 0; i < 2000; i++) y[z[i]] = x[zi[i]];
174 PetscCall(PetscTime(&t2));
175 fprintf(stdout, "%-27s : %e sec\n", "x[z[i]] = y[zi[i]]", (t2 - t1) / 2000.0);
176
177 PetscCall(PetscRandomDestroy(&r));
178 PetscFunctionReturn(PETSC_SUCCESS);
179 }
180
BlastCache(void)181 int BlastCache(void)
182 {
183 int i, ierr, n = 1000000;
184 PetscScalar *x, *y, *z, *a, *b;
185
186 PetscCall(PetscMalloc1(5 * n, &x));
187 y = x + n;
188 z = y + n;
189 a = z + n;
190 b = a + n;
191
192 for (i = 0; i < n; i++) {
193 a[i] = (PetscScalar)i;
194 y[i] = (PetscScalar)i;
195 z[i] = (PetscScalar)i;
196 b[i] = (PetscScalar)i;
197 x[i] = (PetscScalar)i;
198 }
199
200 for (i = 0; i < n; i++) a[i] = 3.0 * x[i] + 2.0 * y[i] + 3.3 * z[i] - 25. * b[i];
201 for (i = 0; i < n; i++) b[i] = 3.0 * x[i] + 2.0 * y[i] + 3.3 * a[i] - 25. * b[i];
202 for (i = 0; i < n; i++) z[i] = 3.0 * x[i] + 2.0 * y[i] + 3.3 * a[i] - 25. * b[i];
203 PetscCall(PetscFree(x));
204 PetscFunctionReturn(PETSC_SUCCESS);
205 }
206