teak-llvm/lldb/source/DataFormatters/StringPrinter.cpp
Raphael Isemann 808142876c [lldb][NFC] Fix all formatting errors in .cpp file headers
Summary:
A *.cpp file header in LLDB (and in LLDB) should like this:
```
//===-- TestUtilities.cpp -------------------------------------------------===//
```
However in LLDB most of our source files have arbitrary changes to this format and
these changes are spreading through LLDB as folks usually just use the existing
source files as templates for their new files (most notably the unnecessary
editor language indicator `-*- C++ -*-` is spreading and in every review
someone is pointing out that this is wrong, resulting in people pointing out that this
is done in the same way in other files).

This patch removes most of these inconsistencies including the editor language indicators,
all the different missing/additional '-' characters, files that center the file name, missing
trailing `===//` (mostly caused by clang-format breaking the line).

Reviewers: aprantl, espindola, jfb, shafik, JDevlieghere

Reviewed By: JDevlieghere

Subscribers: dexonsmith, wuzish, emaste, sdardis, nemanjai, kbarton, MaskRay, atanasyan, arphaman, jfb, abidh, jsji, JDevlieghere, usaxena95, lldb-commits

Tags: #lldb

Differential Revision: https://reviews.llvm.org/D73258
2020-01-24 08:52:55 +01:00

651 lines
21 KiB
C++

//===-- StringPrinter.cpp -------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "lldb/DataFormatters/StringPrinter.h"
#include "lldb/Core/Debugger.h"
#include "lldb/Core/ValueObject.h"
#include "lldb/Target/Language.h"
#include "lldb/Target/Process.h"
#include "lldb/Target/Target.h"
#include "lldb/Utility/Status.h"
#include "llvm/Support/ConvertUTF.h"
#include <ctype.h>
#include <locale>
#include <memory>
using namespace lldb;
using namespace lldb_private;
using namespace lldb_private::formatters;
// we define this for all values of type but only implement it for those we
// care about that's good because we get linker errors for any unsupported type
template <lldb_private::formatters::StringPrinter::StringElementType type>
static StringPrinter::StringPrinterBufferPointer<>
GetPrintableImpl(uint8_t *buffer, uint8_t *buffer_end, uint8_t *&next);
// mimic isprint() for Unicode codepoints
static bool isprint(char32_t codepoint) {
if (codepoint <= 0x1F || codepoint == 0x7F) // C0
{
return false;
}
if (codepoint >= 0x80 && codepoint <= 0x9F) // C1
{
return false;
}
if (codepoint == 0x2028 || codepoint == 0x2029) // line/paragraph separators
{
return false;
}
if (codepoint == 0x200E || codepoint == 0x200F ||
(codepoint >= 0x202A &&
codepoint <= 0x202E)) // bidirectional text control
{
return false;
}
if (codepoint >= 0xFFF9 &&
codepoint <= 0xFFFF) // interlinears and generally specials
{
return false;
}
return true;
}
template <>
StringPrinter::StringPrinterBufferPointer<>
GetPrintableImpl<StringPrinter::StringElementType::ASCII>(uint8_t *buffer,
uint8_t *buffer_end,
uint8_t *&next) {
StringPrinter::StringPrinterBufferPointer<> retval = {nullptr};
switch (*buffer) {
case 0:
retval = {"\\0", 2};
break;
case '\a':
retval = {"\\a", 2};
break;
case '\b':
retval = {"\\b", 2};
break;
case '\f':
retval = {"\\f", 2};
break;
case '\n':
retval = {"\\n", 2};
break;
case '\r':
retval = {"\\r", 2};
break;
case '\t':
retval = {"\\t", 2};
break;
case '\v':
retval = {"\\v", 2};
break;
case '\"':
retval = {"\\\"", 2};
break;
case '\\':
retval = {"\\\\", 2};
break;
default:
if (isprint(*buffer))
retval = {buffer, 1};
else {
uint8_t *data = new uint8_t[5];
sprintf((char *)data, "\\x%02x", *buffer);
retval = {data, 4, [](const uint8_t *c) { delete[] c; }};
break;
}
}
next = buffer + 1;
return retval;
}
static char32_t ConvertUTF8ToCodePoint(unsigned char c0, unsigned char c1) {
return (c0 - 192) * 64 + (c1 - 128);
}
static char32_t ConvertUTF8ToCodePoint(unsigned char c0, unsigned char c1,
unsigned char c2) {
return (c0 - 224) * 4096 + (c1 - 128) * 64 + (c2 - 128);
}
static char32_t ConvertUTF8ToCodePoint(unsigned char c0, unsigned char c1,
unsigned char c2, unsigned char c3) {
return (c0 - 240) * 262144 + (c2 - 128) * 4096 + (c2 - 128) * 64 + (c3 - 128);
}
template <>
StringPrinter::StringPrinterBufferPointer<>
GetPrintableImpl<StringPrinter::StringElementType::UTF8>(uint8_t *buffer,
uint8_t *buffer_end,
uint8_t *&next) {
StringPrinter::StringPrinterBufferPointer<> retval{nullptr};
unsigned utf8_encoded_len = llvm::getNumBytesForUTF8(*buffer);
if (1u + std::distance(buffer, buffer_end) < utf8_encoded_len) {
// I don't have enough bytes - print whatever I have left
retval = {buffer, static_cast<size_t>(1 + buffer_end - buffer)};
next = buffer_end + 1;
return retval;
}
char32_t codepoint = 0;
switch (utf8_encoded_len) {
case 1:
// this is just an ASCII byte - ask ASCII
return GetPrintableImpl<StringPrinter::StringElementType::ASCII>(
buffer, buffer_end, next);
case 2:
codepoint = ConvertUTF8ToCodePoint((unsigned char)*buffer,
(unsigned char)*(buffer + 1));
break;
case 3:
codepoint = ConvertUTF8ToCodePoint((unsigned char)*buffer,
(unsigned char)*(buffer + 1),
(unsigned char)*(buffer + 2));
break;
case 4:
codepoint = ConvertUTF8ToCodePoint(
(unsigned char)*buffer, (unsigned char)*(buffer + 1),
(unsigned char)*(buffer + 2), (unsigned char)*(buffer + 3));
break;
default:
// this is probably some bogus non-character thing just print it as-is and
// hope to sync up again soon
retval = {buffer, 1};
next = buffer + 1;
return retval;
}
if (codepoint) {
switch (codepoint) {
case 0:
retval = {"\\0", 2};
break;
case '\a':
retval = {"\\a", 2};
break;
case '\b':
retval = {"\\b", 2};
break;
case '\f':
retval = {"\\f", 2};
break;
case '\n':
retval = {"\\n", 2};
break;
case '\r':
retval = {"\\r", 2};
break;
case '\t':
retval = {"\\t", 2};
break;
case '\v':
retval = {"\\v", 2};
break;
case '\"':
retval = {"\\\"", 2};
break;
case '\\':
retval = {"\\\\", 2};
break;
default:
if (isprint(codepoint))
retval = {buffer, utf8_encoded_len};
else {
uint8_t *data = new uint8_t[11];
sprintf((char *)data, "\\U%08x", (unsigned)codepoint);
retval = {data, 10, [](const uint8_t *c) { delete[] c; }};
break;
}
}
next = buffer + utf8_encoded_len;
return retval;
}
// this should not happen - but just in case.. try to resync at some point
retval = {buffer, 1};
next = buffer + 1;
return retval;
}
// Given a sequence of bytes, this function returns: a sequence of bytes to
// actually print out + a length the following unscanned position of the buffer
// is in next
static StringPrinter::StringPrinterBufferPointer<>
GetPrintable(StringPrinter::StringElementType type, uint8_t *buffer,
uint8_t *buffer_end, uint8_t *&next) {
if (!buffer)
return {nullptr};
switch (type) {
case StringPrinter::StringElementType::ASCII:
return GetPrintableImpl<StringPrinter::StringElementType::ASCII>(
buffer, buffer_end, next);
case StringPrinter::StringElementType::UTF8:
return GetPrintableImpl<StringPrinter::StringElementType::UTF8>(
buffer, buffer_end, next);
default:
return {nullptr};
}
}
StringPrinter::EscapingHelper
StringPrinter::GetDefaultEscapingHelper(GetPrintableElementType elem_type) {
switch (elem_type) {
case GetPrintableElementType::UTF8:
return [](uint8_t *buffer, uint8_t *buffer_end,
uint8_t *&next) -> StringPrinter::StringPrinterBufferPointer<> {
return GetPrintable(StringPrinter::StringElementType::UTF8, buffer,
buffer_end, next);
};
case GetPrintableElementType::ASCII:
return [](uint8_t *buffer, uint8_t *buffer_end,
uint8_t *&next) -> StringPrinter::StringPrinterBufferPointer<> {
return GetPrintable(StringPrinter::StringElementType::ASCII, buffer,
buffer_end, next);
};
}
llvm_unreachable("bad element type");
}
// use this call if you already have an LLDB-side buffer for the data
template <typename SourceDataType>
static bool DumpUTFBufferToStream(
llvm::ConversionResult (*ConvertFunction)(const SourceDataType **,
const SourceDataType *,
llvm::UTF8 **, llvm::UTF8 *,
llvm::ConversionFlags),
const StringPrinter::ReadBufferAndDumpToStreamOptions &dump_options) {
Stream &stream(*dump_options.GetStream());
if (dump_options.GetPrefixToken() != nullptr)
stream.Printf("%s", dump_options.GetPrefixToken());
if (dump_options.GetQuote() != 0)
stream.Printf("%c", dump_options.GetQuote());
auto data(dump_options.GetData());
auto source_size(dump_options.GetSourceSize());
if (data.GetByteSize() && data.GetDataStart() && data.GetDataEnd()) {
const int bufferSPSize = data.GetByteSize();
if (dump_options.GetSourceSize() == 0) {
const int origin_encoding = 8 * sizeof(SourceDataType);
source_size = bufferSPSize / (origin_encoding / 4);
}
const SourceDataType *data_ptr =
(const SourceDataType *)data.GetDataStart();
const SourceDataType *data_end_ptr = data_ptr + source_size;
const bool zero_is_terminator = dump_options.GetBinaryZeroIsTerminator();
if (zero_is_terminator) {
while (data_ptr < data_end_ptr) {
if (!*data_ptr) {
data_end_ptr = data_ptr;
break;
}
data_ptr++;
}
data_ptr = (const SourceDataType *)data.GetDataStart();
}
lldb::DataBufferSP utf8_data_buffer_sp;
llvm::UTF8 *utf8_data_ptr = nullptr;
llvm::UTF8 *utf8_data_end_ptr = nullptr;
if (ConvertFunction) {
utf8_data_buffer_sp =
std::make_shared<DataBufferHeap>(4 * bufferSPSize, 0);
utf8_data_ptr = (llvm::UTF8 *)utf8_data_buffer_sp->GetBytes();
utf8_data_end_ptr = utf8_data_ptr + utf8_data_buffer_sp->GetByteSize();
ConvertFunction(&data_ptr, data_end_ptr, &utf8_data_ptr,
utf8_data_end_ptr, llvm::lenientConversion);
if (!zero_is_terminator)
utf8_data_end_ptr = utf8_data_ptr;
// needed because the ConvertFunction will change the value of the
// data_ptr.
utf8_data_ptr =
(llvm::UTF8 *)utf8_data_buffer_sp->GetBytes();
} else {
// just copy the pointers - the cast is necessary to make the compiler
// happy but this should only happen if we are reading UTF8 data
utf8_data_ptr = const_cast<llvm::UTF8 *>(
reinterpret_cast<const llvm::UTF8 *>(data_ptr));
utf8_data_end_ptr = const_cast<llvm::UTF8 *>(
reinterpret_cast<const llvm::UTF8 *>(data_end_ptr));
}
const bool escape_non_printables = dump_options.GetEscapeNonPrintables();
lldb_private::formatters::StringPrinter::EscapingHelper escaping_callback;
if (escape_non_printables) {
if (Language *language = Language::FindPlugin(dump_options.GetLanguage()))
escaping_callback = language->GetStringPrinterEscapingHelper(
lldb_private::formatters::StringPrinter::GetPrintableElementType::
UTF8);
else
escaping_callback =
lldb_private::formatters::StringPrinter::GetDefaultEscapingHelper(
lldb_private::formatters::StringPrinter::
GetPrintableElementType::UTF8);
}
// since we tend to accept partial data (and even partially malformed data)
// we might end up with no NULL terminator before the end_ptr hence we need
// to take a slower route and ensure we stay within boundaries
for (; utf8_data_ptr < utf8_data_end_ptr;) {
if (zero_is_terminator && !*utf8_data_ptr)
break;
if (escape_non_printables) {
uint8_t *next_data = nullptr;
auto printable =
escaping_callback(utf8_data_ptr, utf8_data_end_ptr, next_data);
auto printable_bytes = printable.GetBytes();
auto printable_size = printable.GetSize();
if (!printable_bytes || !next_data) {
// GetPrintable() failed on us - print one byte in a desperate resync
// attempt
printable_bytes = utf8_data_ptr;
printable_size = 1;
next_data = utf8_data_ptr + 1;
}
for (unsigned c = 0; c < printable_size; c++)
stream.Printf("%c", *(printable_bytes + c));
utf8_data_ptr = (uint8_t *)next_data;
} else {
stream.Printf("%c", *utf8_data_ptr);
utf8_data_ptr++;
}
}
}
if (dump_options.GetQuote() != 0)
stream.Printf("%c", dump_options.GetQuote());
if (dump_options.GetSuffixToken() != nullptr)
stream.Printf("%s", dump_options.GetSuffixToken());
if (dump_options.GetIsTruncated())
stream.Printf("...");
return true;
}
lldb_private::formatters::StringPrinter::ReadStringAndDumpToStreamOptions::
ReadStringAndDumpToStreamOptions(ValueObject &valobj)
: ReadStringAndDumpToStreamOptions() {
SetEscapeNonPrintables(
valobj.GetTargetSP()->GetDebugger().GetEscapeNonPrintables());
}
lldb_private::formatters::StringPrinter::ReadBufferAndDumpToStreamOptions::
ReadBufferAndDumpToStreamOptions(ValueObject &valobj)
: ReadBufferAndDumpToStreamOptions() {
SetEscapeNonPrintables(
valobj.GetTargetSP()->GetDebugger().GetEscapeNonPrintables());
}
lldb_private::formatters::StringPrinter::ReadBufferAndDumpToStreamOptions::
ReadBufferAndDumpToStreamOptions(
const ReadStringAndDumpToStreamOptions &options)
: ReadBufferAndDumpToStreamOptions() {
SetStream(options.GetStream());
SetPrefixToken(options.GetPrefixToken());
SetSuffixToken(options.GetSuffixToken());
SetQuote(options.GetQuote());
SetEscapeNonPrintables(options.GetEscapeNonPrintables());
SetBinaryZeroIsTerminator(options.GetBinaryZeroIsTerminator());
SetLanguage(options.GetLanguage());
}
namespace lldb_private {
namespace formatters {
template <>
bool StringPrinter::ReadStringAndDumpToStream<
StringPrinter::StringElementType::ASCII>(
const ReadStringAndDumpToStreamOptions &options) {
assert(options.GetStream() && "need a Stream to print the string to");
Status my_error;
ProcessSP process_sp(options.GetProcessSP());
if (process_sp.get() == nullptr || options.GetLocation() == 0)
return false;
size_t size;
const auto max_size = process_sp->GetTarget().GetMaximumSizeOfStringSummary();
bool is_truncated = false;
if (options.GetSourceSize() == 0)
size = max_size;
else if (!options.GetIgnoreMaxLength()) {
size = options.GetSourceSize();
if (size > max_size) {
size = max_size;
is_truncated = true;
}
} else
size = options.GetSourceSize();
lldb::DataBufferSP buffer_sp(new DataBufferHeap(size, 0));
process_sp->ReadCStringFromMemory(
options.GetLocation(), (char *)buffer_sp->GetBytes(), size, my_error);
if (my_error.Fail())
return false;
const char *prefix_token = options.GetPrefixToken();
char quote = options.GetQuote();
if (prefix_token != nullptr)
options.GetStream()->Printf("%s%c", prefix_token, quote);
else if (quote != 0)
options.GetStream()->Printf("%c", quote);
uint8_t *data_end = buffer_sp->GetBytes() + buffer_sp->GetByteSize();
const bool escape_non_printables = options.GetEscapeNonPrintables();
lldb_private::formatters::StringPrinter::EscapingHelper escaping_callback;
if (escape_non_printables) {
if (Language *language = Language::FindPlugin(options.GetLanguage()))
escaping_callback = language->GetStringPrinterEscapingHelper(
lldb_private::formatters::StringPrinter::GetPrintableElementType::
ASCII);
else
escaping_callback =
lldb_private::formatters::StringPrinter::GetDefaultEscapingHelper(
lldb_private::formatters::StringPrinter::GetPrintableElementType::
ASCII);
}
// since we tend to accept partial data (and even partially malformed data)
// we might end up with no NULL terminator before the end_ptr hence we need
// to take a slower route and ensure we stay within boundaries
for (uint8_t *data = buffer_sp->GetBytes(); *data && (data < data_end);) {
if (escape_non_printables) {
uint8_t *next_data = nullptr;
auto printable = escaping_callback(data, data_end, next_data);
auto printable_bytes = printable.GetBytes();
auto printable_size = printable.GetSize();
if (!printable_bytes || !next_data) {
// GetPrintable() failed on us - print one byte in a desperate resync
// attempt
printable_bytes = data;
printable_size = 1;
next_data = data + 1;
}
for (unsigned c = 0; c < printable_size; c++)
options.GetStream()->Printf("%c", *(printable_bytes + c));
data = (uint8_t *)next_data;
} else {
options.GetStream()->Printf("%c", *data);
data++;
}
}
const char *suffix_token = options.GetSuffixToken();
if (suffix_token != nullptr)
options.GetStream()->Printf("%c%s", quote, suffix_token);
else if (quote != 0)
options.GetStream()->Printf("%c", quote);
if (is_truncated)
options.GetStream()->Printf("...");
return true;
}
template <typename SourceDataType>
static bool ReadUTFBufferAndDumpToStream(
const StringPrinter::ReadStringAndDumpToStreamOptions &options,
llvm::ConversionResult (*ConvertFunction)(const SourceDataType **,
const SourceDataType *,
llvm::UTF8 **, llvm::UTF8 *,
llvm::ConversionFlags)) {
assert(options.GetStream() && "need a Stream to print the string to");
if (options.GetLocation() == 0 ||
options.GetLocation() == LLDB_INVALID_ADDRESS)
return false;
lldb::ProcessSP process_sp(options.GetProcessSP());
if (!process_sp)
return false;
const int type_width = sizeof(SourceDataType);
const int origin_encoding = 8 * type_width;
if (origin_encoding != 8 && origin_encoding != 16 && origin_encoding != 32)
return false;
// if not UTF8, I need a conversion function to return proper UTF8
if (origin_encoding != 8 && !ConvertFunction)
return false;
if (!options.GetStream())
return false;
uint32_t sourceSize = options.GetSourceSize();
bool needs_zero_terminator = options.GetNeedsZeroTermination();
bool is_truncated = false;
const auto max_size = process_sp->GetTarget().GetMaximumSizeOfStringSummary();
if (!sourceSize) {
sourceSize = max_size;
needs_zero_terminator = true;
} else if (!options.GetIgnoreMaxLength()) {
if (sourceSize > max_size) {
sourceSize = max_size;
is_truncated = true;
}
}
const int bufferSPSize = sourceSize * type_width;
lldb::DataBufferSP buffer_sp(new DataBufferHeap(bufferSPSize, 0));
if (!buffer_sp->GetBytes())
return false;
Status error;
char *buffer = reinterpret_cast<char *>(buffer_sp->GetBytes());
if (needs_zero_terminator)
process_sp->ReadStringFromMemory(options.GetLocation(), buffer,
bufferSPSize, error, type_width);
else
process_sp->ReadMemoryFromInferior(options.GetLocation(),
(char *)buffer_sp->GetBytes(),
bufferSPSize, error);
if (error.Fail()) {
options.GetStream()->Printf("unable to read data");
return true;
}
DataExtractor data(buffer_sp, process_sp->GetByteOrder(),
process_sp->GetAddressByteSize());
StringPrinter::ReadBufferAndDumpToStreamOptions dump_options(options);
dump_options.SetData(data);
dump_options.SetSourceSize(sourceSize);
dump_options.SetIsTruncated(is_truncated);
return DumpUTFBufferToStream(ConvertFunction, dump_options);
}
template <>
bool StringPrinter::ReadStringAndDumpToStream<
StringPrinter::StringElementType::UTF8>(
const ReadStringAndDumpToStreamOptions &options) {
return ReadUTFBufferAndDumpToStream<llvm::UTF8>(options, nullptr);
}
template <>
bool StringPrinter::ReadStringAndDumpToStream<
StringPrinter::StringElementType::UTF16>(
const ReadStringAndDumpToStreamOptions &options) {
return ReadUTFBufferAndDumpToStream<llvm::UTF16>(options,
llvm::ConvertUTF16toUTF8);
}
template <>
bool StringPrinter::ReadStringAndDumpToStream<
StringPrinter::StringElementType::UTF32>(
const ReadStringAndDumpToStreamOptions &options) {
return ReadUTFBufferAndDumpToStream<llvm::UTF32>(options,
llvm::ConvertUTF32toUTF8);
}
template <>
bool StringPrinter::ReadBufferAndDumpToStream<
StringPrinter::StringElementType::UTF8>(
const ReadBufferAndDumpToStreamOptions &options) {
assert(options.GetStream() && "need a Stream to print the string to");
return DumpUTFBufferToStream<llvm::UTF8>(nullptr, options);
}
template <>
bool StringPrinter::ReadBufferAndDumpToStream<
StringPrinter::StringElementType::ASCII>(
const ReadBufferAndDumpToStreamOptions &options) {
// treat ASCII the same as UTF8
// FIXME: can we optimize ASCII some more?
return ReadBufferAndDumpToStream<StringElementType::UTF8>(options);
}
template <>
bool StringPrinter::ReadBufferAndDumpToStream<
StringPrinter::StringElementType::UTF16>(
const ReadBufferAndDumpToStreamOptions &options) {
assert(options.GetStream() && "need a Stream to print the string to");
return DumpUTFBufferToStream(llvm::ConvertUTF16toUTF8, options);
}
template <>
bool StringPrinter::ReadBufferAndDumpToStream<
StringPrinter::StringElementType::UTF32>(
const ReadBufferAndDumpToStreamOptions &options) {
assert(options.GetStream() && "need a Stream to print the string to");
return DumpUTFBufferToStream(llvm::ConvertUTF32toUTF8, options);
}
} // namespace formatters
} // namespace lldb_private