1c6db04a5SJed Brown #include <petscsys.h> 28563dfccSBarry Smith #include <petsctime.h> 38563dfccSBarry Smith 45a655dc6SBarry Smith extern int BlastCache(void); 55a655dc6SBarry Smith extern int test1(void); 65a655dc6SBarry Smith extern int test2(void); 777c4ece6SBarry Smith 81f480b34SSatish Balay int main(int argc, char **argv) 91f480b34SSatish Balay { 109566063dSJacob Faibussowitsch PetscCall(PetscInitialize(&argc, &argv, 0, 0)); 119566063dSJacob Faibussowitsch PetscCall(test1()); 129566063dSJacob Faibussowitsch PetscCall(test2()); 139566063dSJacob Faibussowitsch PetscCall(PetscFinalize()); 14b122ec5aSJacob Faibussowitsch return 0; 1577c4ece6SBarry Smith } 1677c4ece6SBarry Smith 17cf256101SBarry Smith int test1(void) 1877c4ece6SBarry Smith { 19b0a32e0cSBarry Smith PetscLogDouble t1, t2; 2047794344SBarry Smith double value; 212758efb8SSatish Balay int i, ierr, *z, *zi, intval; 22ea709b57SSatish Balay PetscScalar *x, *y; 2377c4ece6SBarry Smith PetscRandom r; 2477c4ece6SBarry Smith 259566063dSJacob Faibussowitsch PetscCall(PetscRandomCreate(PETSC_COMM_SELF, &r)); 269566063dSJacob Faibussowitsch PetscCall(PetscRandomSetFromOptions(r)); 279566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(20000, &x)); 289566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(20000, &y)); 2977c4ece6SBarry Smith 309566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(2000, &z)); 319566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(2000, &zi)); 3277c4ece6SBarry Smith 331f480b34SSatish Balay /* Take care of paging effects */ 349566063dSJacob Faibussowitsch PetscCall(PetscTime(&t1)); 351f480b34SSatish Balay 361f480b34SSatish Balay /* Form the random set of integers */ 3777c4ece6SBarry Smith for (i = 0; i < 2000; i++) { 389566063dSJacob Faibussowitsch PetscCall(PetscRandomGetValue(r, &value)); 3977c4ece6SBarry Smith intval = (int)(value * 20000.0); 40c9a02da4SSatish Balay z[i] = intval; 411f480b34SSatish Balay } 421f480b34SSatish Balay 4377c4ece6SBarry Smith for (i = 0; i < 2000; i++) { 449566063dSJacob Faibussowitsch PetscCall(PetscRandomGetValue(r, &value)); 4577c4ece6SBarry Smith intval = (int)(value * 20000.0); 46ba8edd79SBarry Smith zi[i] = intval; 4777c4ece6SBarry Smith } 48b4d8b9abSSatish Balay /* fprintf(stdout,"Done setup\n"); */ 4977c4ece6SBarry Smith 509566063dSJacob Faibussowitsch PetscCall(BlastCache()); 511f480b34SSatish Balay 529566063dSJacob Faibussowitsch PetscCall(PetscTime(&t1)); 536f2b61bcSKarl Rupp for (i = 0; i < 2000; i++) x[i] = y[i]; 549566063dSJacob Faibussowitsch PetscCall(PetscTime(&t2)); 55b4d8b9abSSatish Balay fprintf(stdout, "%-27s : %e sec\n", "x[i] = y[i]", (t2 - t1) / 2000.0); 561f480b34SSatish Balay 579566063dSJacob Faibussowitsch PetscCall(BlastCache()); 581f480b34SSatish Balay 599566063dSJacob Faibussowitsch PetscCall(PetscTime(&t1)); 60608f96ebSSatish Balay for (i = 0; i < 500; i += 4) { 61608f96ebSSatish Balay x[i] = y[z[i]]; 62608f96ebSSatish Balay x[1 + i] = y[z[1 + i]]; 63608f96ebSSatish Balay x[2 + i] = y[z[2 + i]]; 64608f96ebSSatish Balay x[3 + i] = y[z[3 + i]]; 65608f96ebSSatish Balay } 669566063dSJacob Faibussowitsch PetscCall(PetscTime(&t2)); 67b4d8b9abSSatish Balay fprintf(stdout, "%-27s : %e sec\n", "x[i] = y[idx[i]] - unroll 4", (t2 - t1) / 2000.0); 68608f96ebSSatish Balay 699566063dSJacob Faibussowitsch PetscCall(BlastCache()); 70608f96ebSSatish Balay 719566063dSJacob Faibussowitsch PetscCall(PetscTime(&t1)); 726f2b61bcSKarl Rupp for (i = 0; i < 2000; i++) x[i] = y[z[i]]; 739566063dSJacob Faibussowitsch PetscCall(PetscTime(&t2)); 74b4d8b9abSSatish Balay fprintf(stdout, "%-27s : %e sec\n", "x[i] = y[idx[i]]", (t2 - t1) / 2000.0); 7577c4ece6SBarry Smith 769566063dSJacob Faibussowitsch PetscCall(BlastCache()); 771f480b34SSatish Balay 789566063dSJacob Faibussowitsch PetscCall(PetscTime(&t1)); 79*67595998SJunchao Zhang for (i = 0; i < 1000; i += 2) { 80*67595998SJunchao Zhang x[i] = y[z[i]]; 81*67595998SJunchao Zhang x[1 + i] = y[z[1 + i]]; 82*67595998SJunchao Zhang } 839566063dSJacob Faibussowitsch PetscCall(PetscTime(&t2)); 84b4d8b9abSSatish Balay fprintf(stdout, "%-27s : %e sec\n", "x[i] = y[idx[i]] - unroll 2", (t2 - t1) / 2000.0); 85608f96ebSSatish Balay 869566063dSJacob Faibussowitsch PetscCall(BlastCache()); 87608f96ebSSatish Balay 889566063dSJacob Faibussowitsch PetscCall(PetscTime(&t1)); 896f2b61bcSKarl Rupp for (i = 0; i < 2000; i++) x[z[i]] = y[i]; 909566063dSJacob Faibussowitsch PetscCall(PetscTime(&t2)); 91b4d8b9abSSatish Balay fprintf(stdout, "%-27s : %e sec\n", "x[z[i]] = y[i]", (t2 - t1) / 2000.0); 921f480b34SSatish Balay 939566063dSJacob Faibussowitsch PetscCall(BlastCache()); 9477c4ece6SBarry Smith 959566063dSJacob Faibussowitsch PetscCall(PetscTime(&t1)); 966f2b61bcSKarl Rupp for (i = 0; i < 2000; i++) x[z[i]] = y[zi[i]]; 979566063dSJacob Faibussowitsch PetscCall(PetscTime(&t2)); 98b4d8b9abSSatish Balay fprintf(stdout, "%-27s : %e sec\n", "x[z[i]] = y[zi[i]]", (t2 - t1) / 2000.0); 9977c4ece6SBarry Smith 1009566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(x, y, 10)); 1019566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(z, zi, 10)); 1029566063dSJacob Faibussowitsch PetscCall(PetscFree(z)); 1039566063dSJacob Faibussowitsch PetscCall(PetscFree(zi)); 1049566063dSJacob Faibussowitsch PetscCall(PetscFree(x)); 1059566063dSJacob Faibussowitsch PetscCall(PetscFree(y)); 1069566063dSJacob Faibussowitsch PetscCall(PetscRandomDestroy(&r)); 1073ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 10877c4ece6SBarry Smith } 10977c4ece6SBarry Smith 110cf256101SBarry Smith int test2(void) 11177c4ece6SBarry Smith { 112b0a32e0cSBarry Smith PetscLogDouble t1, t2; 11347794344SBarry Smith double value; 114d3093643SSatish Balay int i, ierr, z[20000], zi[20000], intval, tmp; 115ea709b57SSatish Balay PetscScalar x[20000], y[20000]; 11677c4ece6SBarry Smith PetscRandom r; 11777c4ece6SBarry Smith 1189566063dSJacob Faibussowitsch PetscCall(PetscRandomCreate(PETSC_COMM_SELF, &r)); 1199566063dSJacob Faibussowitsch PetscCall(PetscRandomSetFromOptions(r)); 12077c4ece6SBarry Smith 12177c4ece6SBarry Smith /* Take care of paging effects */ 1229566063dSJacob Faibussowitsch PetscCall(PetscTime(&t1)); 12377c4ece6SBarry Smith 12477c4ece6SBarry Smith for (i = 0; i < 20000; i++) { 12577c4ece6SBarry Smith x[i] = i; 12677c4ece6SBarry Smith y[i] = i; 127d3093643SSatish Balay z[i] = i; 128d3093643SSatish Balay zi[i] = i; 12977c4ece6SBarry Smith } 13077c4ece6SBarry Smith 13177c4ece6SBarry Smith /* Form the random set of integers */ 132d3093643SSatish Balay for (i = 0; i < 20000; i++) { 1339566063dSJacob Faibussowitsch PetscCall(PetscRandomGetValue(r, &value)); 13477c4ece6SBarry Smith intval = (int)(value * 20000.0); 13577c4ece6SBarry Smith tmp = z[i]; 13677c4ece6SBarry Smith z[i] = z[intval]; 13777c4ece6SBarry Smith z[intval] = tmp; 13877c4ece6SBarry Smith } 13977c4ece6SBarry Smith 140d3093643SSatish Balay for (i = 0; i < 20000; i++) { 1419566063dSJacob Faibussowitsch PetscCall(PetscRandomGetValue(r, &value)); 14277c4ece6SBarry Smith intval = (int)(value * 20000.0); 14377c4ece6SBarry Smith tmp = zi[i]; 14477c4ece6SBarry Smith zi[i] = zi[intval]; 14577c4ece6SBarry Smith zi[intval] = tmp; 14677c4ece6SBarry Smith } 147b4d8b9abSSatish Balay /* fprintf(stdout,"Done setup\n"); */ 14877c4ece6SBarry Smith 1499566063dSJacob Faibussowitsch /* PetscCall(BlastCache()); */ 15077c4ece6SBarry Smith 1519566063dSJacob Faibussowitsch PetscCall(PetscTime(&t1)); 1526f2b61bcSKarl Rupp for (i = 0; i < 2000; i++) x[i] = y[i]; 1539566063dSJacob Faibussowitsch PetscCall(PetscTime(&t2)); 154b4d8b9abSSatish Balay fprintf(stdout, "%-27s : %e sec\n", "x[i] = y[i]", (t2 - t1) / 2000.0); 15577c4ece6SBarry Smith 1569566063dSJacob Faibussowitsch /* PetscCall(BlastCache()); */ 15777c4ece6SBarry Smith 1589566063dSJacob Faibussowitsch PetscCall(PetscTime(&t1)); 1596f2b61bcSKarl Rupp for (i = 0; i < 2000; i++) y[i] = x[z[i]]; 1609566063dSJacob Faibussowitsch PetscCall(PetscTime(&t2)); 161b4d8b9abSSatish Balay fprintf(stdout, "%-27s : %e sec\n", "x[i] = y[idx[i]]", (t2 - t1) / 2000.0); 16277c4ece6SBarry Smith 1639566063dSJacob Faibussowitsch /* PetscCall(BlastCache()); */ 16477c4ece6SBarry Smith 1659566063dSJacob Faibussowitsch PetscCall(PetscTime(&t1)); 1666f2b61bcSKarl Rupp for (i = 0; i < 2000; i++) x[z[i]] = y[i]; 1679566063dSJacob Faibussowitsch PetscCall(PetscTime(&t2)); 168b4d8b9abSSatish Balay fprintf(stdout, "%-27s : %e sec\n", "x[z[i]] = y[i]", (t2 - t1) / 2000.0); 16977c4ece6SBarry Smith 1709566063dSJacob Faibussowitsch /* PetscCall(BlastCache()); */ 17177c4ece6SBarry Smith 1729566063dSJacob Faibussowitsch PetscCall(PetscTime(&t1)); 1736f2b61bcSKarl Rupp for (i = 0; i < 2000; i++) y[z[i]] = x[zi[i]]; 1749566063dSJacob Faibussowitsch PetscCall(PetscTime(&t2)); 175b4d8b9abSSatish Balay fprintf(stdout, "%-27s : %e sec\n", "x[z[i]] = y[zi[i]]", (t2 - t1) / 2000.0); 17677c4ece6SBarry Smith 1779566063dSJacob Faibussowitsch PetscCall(PetscRandomDestroy(&r)); 1783ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 17977c4ece6SBarry Smith } 18077c4ece6SBarry Smith 181465d0859SSatish Balay int BlastCache(void) 18277c4ece6SBarry Smith { 1839ae0b57aSSatish Balay int i, ierr, n = 1000000; 184ea709b57SSatish Balay PetscScalar *x, *y, *z, *a, *b; 18577c4ece6SBarry Smith 1869566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(5 * n, &x)); 18777c4ece6SBarry Smith y = x + n; 18877c4ece6SBarry Smith z = y + n; 18977c4ece6SBarry Smith a = z + n; 19077c4ece6SBarry Smith b = a + n; 19177c4ece6SBarry Smith 19277c4ece6SBarry Smith for (i = 0; i < n; i++) { 19387828ca2SBarry Smith a[i] = (PetscScalar)i; 19487828ca2SBarry Smith y[i] = (PetscScalar)i; 19587828ca2SBarry Smith z[i] = (PetscScalar)i; 19687828ca2SBarry Smith b[i] = (PetscScalar)i; 19787828ca2SBarry Smith x[i] = (PetscScalar)i; 198ba8edd79SBarry Smith } 199ba8edd79SBarry Smith 2006f2b61bcSKarl Rupp for (i = 0; i < n; i++) a[i] = 3.0 * x[i] + 2.0 * y[i] + 3.3 * z[i] - 25. * b[i]; 2016f2b61bcSKarl Rupp for (i = 0; i < n; i++) b[i] = 3.0 * x[i] + 2.0 * y[i] + 3.3 * a[i] - 25. * b[i]; 2026f2b61bcSKarl Rupp for (i = 0; i < n; i++) z[i] = 3.0 * x[i] + 2.0 * y[i] + 3.3 * a[i] - 25. * b[i]; 2039566063dSJacob Faibussowitsch PetscCall(PetscFree(x)); 2043ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2051f480b34SSatish Balay } 206