mirror of
https://github.com/Gericom/teak-llvm.git
synced 2025-06-21 20:45:53 -04:00

This was originally reported as a bug with the symptom being "cvdump crashes when printing an LLD-linked PDB that has an S_FILESTATIC record in it". After some additional investigation, I determined that this was a symptom of a larger problem, and in fact the real problem was in the way we emitted the global PDB string table. As evidence of this, you can take any lld-generated PDB, run cvdump -stringtable on it, and it would return no results. My hypothesis was that cvdump could not *find* the string table to begin with. Normally it would do this by looking in the "named stream map", finding the string /names, and using its value as the stream index. If this lookup fails, then cvdump would fail to load the string table. To test this hypothesis, I looked at the name stream map generated by a link.exe PDB, and I emitted exactly those bytes into an LLD-generated PDB. Suddenly, cvdump could read our string table! This code has always been hacky and we knew there was something we didn't understand. After all, there were some comments to the effect of "we have to emit strings in a specific order, otherwise things don't work". The key to fixing this was finally understanding this. The way it works is that it makes use of a generic serializable hash map that maps integers to other integers. In this case, the "key" is the offset into a buffer, and the value is the stream number. If you index into the buffer at the offset specified by a given key, you find the name. The underlying cause of all these problems is that we were using the identity function for the hash. i.e. if a string's offset in the buffer was 12, the hash value was 12. Instead, we need to hash the string *at that offset*. There is an additional catch, in that we have to compute the hash as a uint32 and then truncate it to uint16. Making this work is a little bit annoying, because we use the same hash table in other places as well, and normally just using the identity function for the hash function is actually what's desired. I'm not totally happy with the template goo I came up with, but it works in any case. The reason we never found this bug through our own testing is because we were building a /parallel/ hash table (in the form of an llvm::StringMap<>) and doing all of our lookups and "real" hash table work against that. I deleted all of that code and now everything goes through the real hash table. Then, to test it, I added a unit test which adds 7 strings and queries the associated values. I test every possible insertion order permutation of these 7 strings, to verify that it really does work as expected. Differential Revision: https://reviews.llvm.org/D43326 llvm-svn: 325386
239 lines
7.0 KiB
C++
239 lines
7.0 KiB
C++
//===- HashTable.cpp - PDB Hash Table -------------------------------------===//
|
|
//
|
|
// The LLVM Compiler Infrastructure
|
|
//
|
|
// This file is distributed under the University of Illinois Open Source
|
|
// License. See LICENSE.TXT for details.
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#include "llvm/DebugInfo/PDB/Native/HashTable.h"
|
|
#include "llvm/ADT/Optional.h"
|
|
#include "llvm/DebugInfo/PDB/Native/RawError.h"
|
|
#include "llvm/Support/BinaryStreamReader.h"
|
|
#include "llvm/Support/BinaryStreamWriter.h"
|
|
#include "llvm/Support/Error.h"
|
|
#include "llvm/Support/MathExtras.h"
|
|
#include <algorithm>
|
|
#include <cassert>
|
|
#include <cstdint>
|
|
#include <utility>
|
|
|
|
using namespace llvm;
|
|
using namespace llvm::pdb;
|
|
|
|
namespace {
|
|
struct IdentityTraits {
|
|
static uint32_t hash(uint32_t K, const HashTable &Ctx) { return K; }
|
|
static uint32_t realKey(uint32_t K, const HashTable &Ctx) { return K; }
|
|
static uint32_t lowerKey(uint32_t K, const HashTable &Ctx) { return K; }
|
|
};
|
|
} // namespace
|
|
|
|
HashTable::HashTable() : HashTable(8) {}
|
|
|
|
HashTable::HashTable(uint32_t Capacity) { Buckets.resize(Capacity); }
|
|
|
|
Error HashTable::load(BinaryStreamReader &Stream) {
|
|
const Header *H;
|
|
if (auto EC = Stream.readObject(H))
|
|
return EC;
|
|
if (H->Capacity == 0)
|
|
return make_error<RawError>(raw_error_code::corrupt_file,
|
|
"Invalid Hash Table Capacity");
|
|
if (H->Size > maxLoad(H->Capacity))
|
|
return make_error<RawError>(raw_error_code::corrupt_file,
|
|
"Invalid Hash Table Size");
|
|
|
|
Buckets.resize(H->Capacity);
|
|
|
|
if (auto EC = readSparseBitVector(Stream, Present))
|
|
return EC;
|
|
if (Present.count() != H->Size)
|
|
return make_error<RawError>(raw_error_code::corrupt_file,
|
|
"Present bit vector does not match size!");
|
|
|
|
if (auto EC = readSparseBitVector(Stream, Deleted))
|
|
return EC;
|
|
if (Present.intersects(Deleted))
|
|
return make_error<RawError>(raw_error_code::corrupt_file,
|
|
"Present bit vector interesects deleted!");
|
|
|
|
for (uint32_t P : Present) {
|
|
if (auto EC = Stream.readInteger(Buckets[P].first))
|
|
return EC;
|
|
if (auto EC = Stream.readInteger(Buckets[P].second))
|
|
return EC;
|
|
}
|
|
|
|
return Error::success();
|
|
}
|
|
|
|
uint32_t HashTable::calculateSerializedLength() const {
|
|
uint32_t Size = sizeof(Header);
|
|
|
|
int NumBitsP = Present.find_last() + 1;
|
|
int NumBitsD = Deleted.find_last() + 1;
|
|
|
|
// Present bit set number of words, followed by that many actual words.
|
|
Size += sizeof(uint32_t);
|
|
Size += alignTo(NumBitsP, sizeof(uint32_t));
|
|
|
|
// Deleted bit set number of words, followed by that many actual words.
|
|
Size += sizeof(uint32_t);
|
|
Size += alignTo(NumBitsD, sizeof(uint32_t));
|
|
|
|
// One (Key, Value) pair for each entry Present.
|
|
Size += 2 * sizeof(uint32_t) * size();
|
|
|
|
return Size;
|
|
}
|
|
|
|
Error HashTable::commit(BinaryStreamWriter &Writer) const {
|
|
Header H;
|
|
H.Size = size();
|
|
H.Capacity = capacity();
|
|
if (auto EC = Writer.writeObject(H))
|
|
return EC;
|
|
|
|
if (auto EC = writeSparseBitVector(Writer, Present))
|
|
return EC;
|
|
|
|
if (auto EC = writeSparseBitVector(Writer, Deleted))
|
|
return EC;
|
|
|
|
for (const auto &Entry : *this) {
|
|
if (auto EC = Writer.writeInteger(Entry.first))
|
|
return EC;
|
|
if (auto EC = Writer.writeInteger(Entry.second))
|
|
return EC;
|
|
}
|
|
return Error::success();
|
|
}
|
|
|
|
void HashTable::clear() {
|
|
Buckets.resize(8);
|
|
Present.clear();
|
|
Deleted.clear();
|
|
}
|
|
|
|
uint32_t HashTable::capacity() const { return Buckets.size(); }
|
|
|
|
uint32_t HashTable::size() const { return Present.count(); }
|
|
|
|
HashTableIterator HashTable::begin() const { return HashTableIterator(*this); }
|
|
|
|
HashTableIterator HashTable::end() const {
|
|
return HashTableIterator(*this, 0, true);
|
|
}
|
|
|
|
HashTableIterator HashTable::find(uint32_t K) const {
|
|
return find_as<IdentityTraits>(K, *this);
|
|
}
|
|
|
|
void HashTable::set(uint32_t K, uint32_t V) {
|
|
set_as<IdentityTraits, uint32_t>(K, V, *this);
|
|
}
|
|
|
|
void HashTable::remove(uint32_t K) { remove_as<IdentityTraits>(K, *this); }
|
|
|
|
uint32_t HashTable::get(uint32_t K) {
|
|
auto I = find(K);
|
|
assert(I != end());
|
|
return (*I).second;
|
|
}
|
|
|
|
uint32_t HashTable::maxLoad(uint32_t capacity) { return capacity * 2 / 3 + 1; }
|
|
|
|
Error HashTable::readSparseBitVector(BinaryStreamReader &Stream,
|
|
SparseBitVector<> &V) {
|
|
uint32_t NumWords;
|
|
if (auto EC = Stream.readInteger(NumWords))
|
|
return joinErrors(
|
|
std::move(EC),
|
|
make_error<RawError>(raw_error_code::corrupt_file,
|
|
"Expected hash table number of words"));
|
|
|
|
for (uint32_t I = 0; I != NumWords; ++I) {
|
|
uint32_t Word;
|
|
if (auto EC = Stream.readInteger(Word))
|
|
return joinErrors(std::move(EC),
|
|
make_error<RawError>(raw_error_code::corrupt_file,
|
|
"Expected hash table word"));
|
|
for (unsigned Idx = 0; Idx < 32; ++Idx)
|
|
if (Word & (1U << Idx))
|
|
V.set((I * 32) + Idx);
|
|
}
|
|
return Error::success();
|
|
}
|
|
|
|
Error HashTable::writeSparseBitVector(BinaryStreamWriter &Writer,
|
|
SparseBitVector<> &Vec) {
|
|
int ReqBits = Vec.find_last() + 1;
|
|
uint32_t NumWords = alignTo(ReqBits, sizeof(uint32_t)) / sizeof(uint32_t);
|
|
if (auto EC = Writer.writeInteger(NumWords))
|
|
return joinErrors(
|
|
std::move(EC),
|
|
make_error<RawError>(raw_error_code::corrupt_file,
|
|
"Could not write linear map number of words"));
|
|
|
|
uint32_t Idx = 0;
|
|
for (uint32_t I = 0; I != NumWords; ++I) {
|
|
uint32_t Word = 0;
|
|
for (uint32_t WordIdx = 0; WordIdx < 32; ++WordIdx, ++Idx) {
|
|
if (Vec.test(Idx))
|
|
Word |= (1 << WordIdx);
|
|
}
|
|
if (auto EC = Writer.writeInteger(Word))
|
|
return joinErrors(std::move(EC), make_error<RawError>(
|
|
raw_error_code::corrupt_file,
|
|
"Could not write linear map word"));
|
|
}
|
|
return Error::success();
|
|
}
|
|
|
|
HashTableIterator::HashTableIterator(const HashTable &Map, uint32_t Index,
|
|
bool IsEnd)
|
|
: Map(&Map), Index(Index), IsEnd(IsEnd) {}
|
|
|
|
HashTableIterator::HashTableIterator(const HashTable &Map) : Map(&Map) {
|
|
int I = Map.Present.find_first();
|
|
if (I == -1) {
|
|
Index = 0;
|
|
IsEnd = true;
|
|
} else {
|
|
Index = static_cast<uint32_t>(I);
|
|
IsEnd = false;
|
|
}
|
|
}
|
|
|
|
HashTableIterator &HashTableIterator::operator=(const HashTableIterator &R) {
|
|
Map = R.Map;
|
|
return *this;
|
|
}
|
|
|
|
bool HashTableIterator::operator==(const HashTableIterator &R) const {
|
|
if (IsEnd && R.IsEnd)
|
|
return true;
|
|
if (IsEnd != R.IsEnd)
|
|
return false;
|
|
|
|
return (Map == R.Map) && (Index == R.Index);
|
|
}
|
|
|
|
const std::pair<uint32_t, uint32_t> &HashTableIterator::operator*() const {
|
|
assert(Map->Present.test(Index));
|
|
return Map->Buckets[Index];
|
|
}
|
|
|
|
HashTableIterator &HashTableIterator::operator++() {
|
|
while (Index < Map->Buckets.size()) {
|
|
++Index;
|
|
if (Map->Present.test(Index))
|
|
return *this;
|
|
}
|
|
|
|
IsEnd = true;
|
|
return *this;
|
|
}
|