teak-llvm/libcxx/benchmarks/ContainerBenchmarks.hpp
Eric Fiselier d4ace50ed0 Fix PR35637: suboptimal codegen for vector<unsigned char>.
The optimizer is petulant and temperamental. In this case LLVM failed to lower
the the "insert at end" loop used by`vector<unsigned char>` to a `memset` despite
`memset` being substantially faster over a range of bytes.

LLVM has the ability to lower loops to `memset` whet appropriate, but the
odd nature of libc++'s loops prevented the optimization from taking places.

This patch addresses the issue by rewriting the loops from the form
`do [ ... --__n; } while (__n > 0);` to instead use a for loop over a pointer
range (For example: `for (auto *__i = ...; __i < __e; ++__i)`).

This patch also rewrites the asan annotations to unposion all additional memory
at the start of the loop instead of once per iterations. This could potentially
permit false negatives where the constructor of element N attempts to access
element N + 1 during its construction.

The before and after results for the `BM_ConstructSize/vector_byte/5140480_mean`
benchmark (run 5 times) are:

--------------------------------------------------------------------------------------------
Benchmark                                                 Time             CPU   Iterations
--------------------------------------------------------------------------------------------
Before
------
BM_ConstructSize/vector_byte/5140480_mean          12530140 ns     12469693 ns            N/A
BM_ConstructSize/vector_byte/5140480_median        12512818 ns     12445571 ns            N/A
BM_ConstructSize/vector_byte/5140480_stddev          106224 ns       107907 ns            5
-----
After
-----
BM_ConstructSize/vector_byte/5140480_mean            167285 ns       166500 ns            N/A
BM_ConstructSize/vector_byte/5140480_median          166749 ns       166069 ns            N/A
BM_ConstructSize/vector_byte/5140480_stddev            3242 ns         3184 ns            5

llvm-svn: 367183
2019-07-28 04:37:02 +00:00

131 lines
3.8 KiB
C++

#ifndef BENCHMARK_CONTAINER_BENCHMARKS_HPP
#define BENCHMARK_CONTAINER_BENCHMARKS_HPP
#include <cassert>
#include "benchmark/benchmark.h"
namespace ContainerBenchmarks {
template <class Container>
void BM_ConstructSize(benchmark::State& st, Container) {
auto size = st.range(0);
for (auto _ : st) {
Container c(size);
benchmark::DoNotOptimize(c.data());
}
}
template <class Container>
void BM_ConstructSizeValue(benchmark::State& st, Container, typename Container::value_type const& val) {
const auto size = st.range(0);
for (auto _ : st) {
Container c(size, val);
benchmark::DoNotOptimize(c.data());
}
}
template <class Container, class GenInputs>
void BM_ConstructIterIter(benchmark::State& st, Container, GenInputs gen) {
auto in = gen(st.range(0));
const auto begin = in.begin();
const auto end = in.end();
benchmark::DoNotOptimize(&in);
while (st.KeepRunning()) {
Container c(begin, end);
benchmark::DoNotOptimize(c.data());
}
}
template <class Container, class GenInputs>
void BM_InsertValue(benchmark::State& st, Container c, GenInputs gen) {
auto in = gen(st.range(0));
const auto end = in.end();
while (st.KeepRunning()) {
c.clear();
for (auto it = in.begin(); it != end; ++it) {
benchmark::DoNotOptimize(&(*c.insert(*it).first));
}
benchmark::ClobberMemory();
}
}
template <class Container, class GenInputs>
void BM_InsertValueRehash(benchmark::State& st, Container c, GenInputs gen) {
auto in = gen(st.range(0));
const auto end = in.end();
while (st.KeepRunning()) {
c.clear();
c.rehash(16);
for (auto it = in.begin(); it != end; ++it) {
benchmark::DoNotOptimize(&(*c.insert(*it).first));
}
benchmark::ClobberMemory();
}
}
template <class Container, class GenInputs>
void BM_InsertDuplicate(benchmark::State& st, Container c, GenInputs gen) {
auto in = gen(st.range(0));
const auto end = in.end();
c.insert(in.begin(), in.end());
benchmark::DoNotOptimize(&c);
benchmark::DoNotOptimize(&in);
while (st.KeepRunning()) {
for (auto it = in.begin(); it != end; ++it) {
benchmark::DoNotOptimize(&(*c.insert(*it).first));
}
benchmark::ClobberMemory();
}
}
template <class Container, class GenInputs>
void BM_EmplaceDuplicate(benchmark::State& st, Container c, GenInputs gen) {
auto in = gen(st.range(0));
const auto end = in.end();
c.insert(in.begin(), in.end());
benchmark::DoNotOptimize(&c);
benchmark::DoNotOptimize(&in);
while (st.KeepRunning()) {
for (auto it = in.begin(); it != end; ++it) {
benchmark::DoNotOptimize(&(*c.emplace(*it).first));
}
benchmark::ClobberMemory();
}
}
template <class Container, class GenInputs>
static void BM_Find(benchmark::State& st, Container c, GenInputs gen) {
auto in = gen(st.range(0));
c.insert(in.begin(), in.end());
benchmark::DoNotOptimize(&(*c.begin()));
const auto end = in.data() + in.size();
while (st.KeepRunning()) {
for (auto it = in.data(); it != end; ++it) {
benchmark::DoNotOptimize(&(*c.find(*it)));
}
benchmark::ClobberMemory();
}
}
template <class Container, class GenInputs>
static void BM_FindRehash(benchmark::State& st, Container c, GenInputs gen) {
c.rehash(8);
auto in = gen(st.range(0));
c.insert(in.begin(), in.end());
benchmark::DoNotOptimize(&(*c.begin()));
const auto end = in.data() + in.size();
while (st.KeepRunning()) {
for (auto it = in.data(); it != end; ++it) {
benchmark::DoNotOptimize(&(*c.find(*it)));
}
benchmark::ClobberMemory();
}
}
} // end namespace ContainerBenchmarks
#endif // BENCHMARK_CONTAINER_BENCHMARKS_HPP