CPU Cache探秘

CPU Cache探秘

分析如下代码

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
#include <iomanip>
#include <iostream>
#include <chrono>
#include <memory>
#include <random>
#include <algorithm>

int64_t SimpleSum(const std::unique_ptr<int32_t[]>& data, const int32_t& size, const int32_t& flag)
{
int64_t sum = 0;

for (int32_t idx = 0; idx < size; ++idx) {
int32_t num = data[idx];
if (num < flag) {
sum += num;
}
}

return sum;
}

std::unique_ptr<int32_t[]> MakeData(const int32_t& size)
{
std::unique_ptr<int32_t[]> data(new int32_t[size]);

std::random_device rd;
std::mt19937 mt(rd());
std::uniform_real_distribution<double> dist(INT32_MIN, INT32_MAX);

for (int32_t idx = 0; idx < size; ++idx) {
data[idx] = dist(mt);
}

return data;
}

int main()
{
int32_t size = 128 * 1024 * 1024;
int32_t loop = 1;
std::unique_ptr<int32_t[]> data = MakeData(size);

int64_t result;
{
auto t1 = std::chrono::high_resolution_clock::now();
for (auto i = 0; i < loop; ++i) {
result = SimpleSum(data, size, 123456789);
}
auto t2 = std::chrono::high_resolution_clock::now();

std::cout << "time1:" << std::chrono::duration_cast<std::chrono::milliseconds>(t2 - t1).count()
<< std::endl
<< " result:" << result << std::endl;
}

std::sort(data.get(), data.get() + size);

{
auto t1 = std::chrono::high_resolution_clock::now();
for (auto i = 0; i < loop; ++i) {
result = SimpleSum(data, size, 123456789);
}
auto t2 = std::chrono::high_resolution_clock::now();

std::cout << "time1:" << std::chrono::duration_cast<std::chrono::milliseconds>(t2 - t1).count()
<< std::endl
<< " result:" << result << std::endl;
}

return 0;
}

CPU: i7-10700
OS: windows10 x64
Toolset: MSVC 14.2
Config: Release X86
Compile Flags: /permissive- /ifcOutput "Release\" /GS /GL /analyze- /W3 /Gy /Zc:wchar_t /Zi /Gm- /O2 /sdl /Fd"Release\vc142.pdb" /Zc:inline /fp:precise /D "WIN32" /D "NDEBUG" /D "_CONSOLE" /D "_UNICODE" /D "UNICODE" /errorReport:prompt /WX- /Zc:forScope /Gd /Oy- /Oi /MD /FC /Fa"Release\" /EHsc /nologo /Fo"Release\" /Fp"Release\CPU Cache探秘.pch" /diagnostics:column

分析time1和time2会有怎样的大小关系?

运行

time2显著比time1小,节省了84%的时间。它们的算法一致,参数相同,仅仅是计算用的数据进行了排序,为什么相差这么多时间呢?

分析


本博客所有文章除特别声明外,均采用 CC BY-SA 4.0 协议 ,转载请注明出处!