mirror of
https://github.com/Gericom/teak-llvm.git
synced 2025-06-19 19:45:40 -04:00

The construction was explained in https://reviews.llvm.org/D44810?id=139526#inline-391999 but reading the code shouldn't require hunting down old reviews to understand it. The precomputed list was missing an entry for the empty list case, and one entry at the very end. (The current last entry is the last one where 3 * BucketCount fits in a signed int, but the reference implementation uses unsigneds as far as I can tell, so there's room for one more entry.) No behavior change for inputs seen in practice. Differential Revision: https://reviews.llvm.org/D64738 llvm-svn: 366107
230 lines
6.9 KiB
C++
230 lines
6.9 KiB
C++
//===- PDBStringTableBuilder.cpp - PDB String Table -------------*- C++ -*-===//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#include "llvm/DebugInfo/PDB/Native/PDBStringTableBuilder.h"
|
|
|
|
#include "llvm/ADT/ArrayRef.h"
|
|
#include "llvm/DebugInfo/PDB/Native/Hash.h"
|
|
#include "llvm/DebugInfo/PDB/Native/RawTypes.h"
|
|
#include "llvm/Support/BinaryStreamWriter.h"
|
|
#include "llvm/Support/Endian.h"
|
|
|
|
#include <map>
|
|
|
|
using namespace llvm;
|
|
using namespace llvm::msf;
|
|
using namespace llvm::support;
|
|
using namespace llvm::support::endian;
|
|
using namespace llvm::pdb;
|
|
|
|
StringTableHashTraits::StringTableHashTraits(PDBStringTableBuilder &Table)
|
|
: Table(&Table) {}
|
|
|
|
uint32_t StringTableHashTraits::hashLookupKey(StringRef S) const {
|
|
// The reference implementation doesn't include code for /src/headerblock
|
|
// handling, but it can only read natvis entries lld's PDB files if
|
|
// this hash function truncates the hash to 16 bit.
|
|
// PDB/include/misc.h in the reference implementation has a hashSz() function
|
|
// that returns an unsigned short, that seems what's being used for
|
|
// /src/headerblock.
|
|
return static_cast<uint16_t>(Table->getIdForString(S));
|
|
}
|
|
|
|
StringRef StringTableHashTraits::storageKeyToLookupKey(uint32_t Offset) const {
|
|
return Table->getStringForId(Offset);
|
|
}
|
|
|
|
uint32_t StringTableHashTraits::lookupKeyToStorageKey(StringRef S) {
|
|
return Table->insert(S);
|
|
}
|
|
|
|
uint32_t PDBStringTableBuilder::insert(StringRef S) {
|
|
return Strings.insert(S);
|
|
}
|
|
|
|
uint32_t PDBStringTableBuilder::getIdForString(StringRef S) const {
|
|
return Strings.getIdForString(S);
|
|
}
|
|
|
|
StringRef PDBStringTableBuilder::getStringForId(uint32_t Id) const {
|
|
return Strings.getStringForId(Id);
|
|
}
|
|
|
|
static uint32_t computeBucketCount(uint32_t NumStrings) {
|
|
// This is a precomputed list of Buckets given the specified number of
|
|
// strings. Matching the reference algorithm exactly is not strictly
|
|
// necessary for correctness, but it helps when comparing LLD's PDBs with
|
|
// Microsoft's PDBs so as to eliminate superfluous differences.
|
|
// The reference implementation does (in nmt.h, NMT::grow()):
|
|
// unsigned StringCount = 0;
|
|
// unsigned BucketCount = 1;
|
|
// fn insert() {
|
|
// ++StringCount;
|
|
// if (BucketCount * 3 / 4 < StringCount)
|
|
// BucketCount = BucketCount * 3 / 2 + 1;
|
|
// }
|
|
// This list contains all StringCount, BucketCount pairs where BucketCount was
|
|
// just incremented. It ends before the first BucketCount entry where
|
|
// BucketCount * 3 would overflow a 32-bit unsigned int.
|
|
static std::map<uint32_t, uint32_t> StringsToBuckets = {
|
|
{0, 1},
|
|
{1, 2},
|
|
{2, 4},
|
|
{4, 7},
|
|
{6, 11},
|
|
{9, 17},
|
|
{13, 26},
|
|
{20, 40},
|
|
{31, 61},
|
|
{46, 92},
|
|
{70, 139},
|
|
{105, 209},
|
|
{157, 314},
|
|
{236, 472},
|
|
{355, 709},
|
|
{532, 1064},
|
|
{799, 1597},
|
|
{1198, 2396},
|
|
{1798, 3595},
|
|
{2697, 5393},
|
|
{4045, 8090},
|
|
{6068, 12136},
|
|
{9103, 18205},
|
|
{13654, 27308},
|
|
{20482, 40963},
|
|
{30723, 61445},
|
|
{46084, 92168},
|
|
{69127, 138253},
|
|
{103690, 207380},
|
|
{155536, 311071},
|
|
{233304, 466607},
|
|
{349956, 699911},
|
|
{524934, 1049867},
|
|
{787401, 1574801},
|
|
{1181101, 2362202},
|
|
{1771652, 3543304},
|
|
{2657479, 5314957},
|
|
{3986218, 7972436},
|
|
{5979328, 11958655},
|
|
{8968992, 17937983},
|
|
{13453488, 26906975},
|
|
{20180232, 40360463},
|
|
{30270348, 60540695},
|
|
{45405522, 90811043},
|
|
{68108283, 136216565},
|
|
{102162424, 204324848},
|
|
{153243637, 306487273},
|
|
{229865455, 459730910},
|
|
{344798183, 689596366},
|
|
{517197275, 1034394550},
|
|
{775795913, 1551591826},
|
|
{1163693870, 2327387740}};
|
|
auto Entry = StringsToBuckets.lower_bound(NumStrings);
|
|
assert(Entry != StringsToBuckets.end());
|
|
return Entry->second;
|
|
}
|
|
|
|
uint32_t PDBStringTableBuilder::calculateHashTableSize() const {
|
|
uint32_t Size = sizeof(uint32_t); // Hash table begins with 4-byte size field.
|
|
Size += sizeof(uint32_t) * computeBucketCount(Strings.size());
|
|
|
|
return Size;
|
|
}
|
|
|
|
uint32_t PDBStringTableBuilder::calculateSerializedSize() const {
|
|
uint32_t Size = 0;
|
|
Size += sizeof(PDBStringTableHeader);
|
|
Size += Strings.calculateSerializedSize();
|
|
Size += calculateHashTableSize();
|
|
Size += sizeof(uint32_t); // The /names stream ends with the string count.
|
|
return Size;
|
|
}
|
|
|
|
void PDBStringTableBuilder::setStrings(
|
|
const codeview::DebugStringTableSubsection &Strings) {
|
|
this->Strings = Strings;
|
|
}
|
|
|
|
Error PDBStringTableBuilder::writeHeader(BinaryStreamWriter &Writer) const {
|
|
// Write a header
|
|
PDBStringTableHeader H;
|
|
H.Signature = PDBStringTableSignature;
|
|
H.HashVersion = 1;
|
|
H.ByteSize = Strings.calculateSerializedSize();
|
|
if (auto EC = Writer.writeObject(H))
|
|
return EC;
|
|
assert(Writer.bytesRemaining() == 0);
|
|
return Error::success();
|
|
}
|
|
|
|
Error PDBStringTableBuilder::writeStrings(BinaryStreamWriter &Writer) const {
|
|
if (auto EC = Strings.commit(Writer))
|
|
return EC;
|
|
|
|
assert(Writer.bytesRemaining() == 0);
|
|
return Error::success();
|
|
}
|
|
|
|
Error PDBStringTableBuilder::writeHashTable(BinaryStreamWriter &Writer) const {
|
|
// Write a hash table.
|
|
uint32_t BucketCount = computeBucketCount(Strings.size());
|
|
if (auto EC = Writer.writeInteger(BucketCount))
|
|
return EC;
|
|
std::vector<ulittle32_t> Buckets(BucketCount);
|
|
|
|
for (auto &Pair : Strings) {
|
|
StringRef S = Pair.getKey();
|
|
uint32_t Offset = Pair.getValue();
|
|
uint32_t Hash = hashStringV1(S);
|
|
|
|
for (uint32_t I = 0; I != BucketCount; ++I) {
|
|
uint32_t Slot = (Hash + I) % BucketCount;
|
|
if (Buckets[Slot] != 0)
|
|
continue;
|
|
Buckets[Slot] = Offset;
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (auto EC = Writer.writeArray(ArrayRef<ulittle32_t>(Buckets)))
|
|
return EC;
|
|
|
|
assert(Writer.bytesRemaining() == 0);
|
|
return Error::success();
|
|
}
|
|
|
|
Error PDBStringTableBuilder::writeEpilogue(BinaryStreamWriter &Writer) const {
|
|
if (auto EC = Writer.writeInteger<uint32_t>(Strings.size()))
|
|
return EC;
|
|
assert(Writer.bytesRemaining() == 0);
|
|
return Error::success();
|
|
}
|
|
|
|
Error PDBStringTableBuilder::commit(BinaryStreamWriter &Writer) const {
|
|
BinaryStreamWriter SectionWriter;
|
|
|
|
std::tie(SectionWriter, Writer) = Writer.split(sizeof(PDBStringTableHeader));
|
|
if (auto EC = writeHeader(SectionWriter))
|
|
return EC;
|
|
|
|
std::tie(SectionWriter, Writer) =
|
|
Writer.split(Strings.calculateSerializedSize());
|
|
if (auto EC = writeStrings(SectionWriter))
|
|
return EC;
|
|
|
|
std::tie(SectionWriter, Writer) = Writer.split(calculateHashTableSize());
|
|
if (auto EC = writeHashTable(SectionWriter))
|
|
return EC;
|
|
|
|
std::tie(SectionWriter, Writer) = Writer.split(sizeof(uint32_t));
|
|
if (auto EC = writeEpilogue(SectionWriter))
|
|
return EC;
|
|
|
|
return Error::success();
|
|
}
|