rom-properties/extlib/libfmt/support/printable.py
David Korth fa932de93f [libfmt] Add an internal copy of libfmt-11.1.2.
Currently used as a header-only build. Will eventually be changed to
build a DLL for Windows and Mac OS X.

Other changes needed to get the Windows version to build with libfmt:

Add #include <fmt/xchar.h> to some files for full wchar_t support
in libfmt. Otherwise, errors like this appear:

src\amiibo-data\amiiboc.cpp(175,8): error C2665: 'fmt::v11::print': no overloaded function could convert all the argument types
extlib\libfmt\include\fmt\base.h(2925,17): message : could be 'void fmt::v11::print<TCHAR*&>(FILE *,fmt::v11::fstring<TCHAR *&>,TCHAR *&)'
src\amiibo-data\amiiboc.cpp(175,8): message : 'void fmt::v11::print<TCHAR*&>(FILE *,fmt::v11::fstring<TCHAR *&>,TCHAR *&)': cannot convert argument 2 from 'wmain::<lambda_1>::()::FMT_COMPILE_STRING' to 'fmt::v11::fstring<TCHAR *&>'
src\amiibo-data\amiiboc.cpp(175,22): message : No user-defined-conversion operator available that can perform this conversion, or the operator cannot be called
extlib\libfmt\include\fmt\base.h(2908,17): message : or       'void fmt::v11::print<wmain::<lambda_1>::()::FMT_COMPILE_STRING,TCHAR*&>(fmt::v11::fstring<wmain::<lambda_1>::()::FMT_COMPILE_STRING,TCHAR *&>,wmain::<lambda_1>::()::FMT_COMPILE_STRING &&,TCHAR *&)'
src\amiibo-data\amiiboc.cpp(175,8): message : 'void fmt::v11::print<wmain::<lambda_1>::()::FMT_COMPILE_STRING,TCHAR*&>(fmt::v11::fstring<wmain::<lambda_1>::()::FMT_COMPILE_STRING,TCHAR *&>,wmain::<lambda_1>::()::FMT_COMPILE_STRING &&,TCHAR *&)': cannot convert argument 1 from 'FILE *' to 'fmt::v11::fstring<wmain::<lambda_1>::()::FMT_COMPILE_STRING,TCHAR *&>'
src\amiibo-data\amiiboc.cpp(175,14): message : No constructor could take the source type, or constructor overload resolution was ambiguous
src\amiibo-data\amiiboc.cpp(175,8): message : while trying to match the argument list '(FILE *, wmain::<lambda_1>::()::FMT_COMPILE_STRING, TCHAR *)' [build.vc17_64\src\amiibo-data\amiiboc.vcxproj]

Files modified for xchar.h:
- amiibo-data/amiiboc.cpp
- libromdata/stdafx.h (needed by WiiUPackage)
- libromdata/tests/RomHeaderTest.cpp

NOTE: Only included on Windows. xchar.h was added in libfmt-8.0.0,
which was first added (in Ubuntu LTS releases) in Ubuntu 20.04.
It's not needed on Linux, anyway.

New option USE_INTERNAL_FMT to force the use of the internal copy of
libfmt on Linux, for testing purposes. (...and also for Ubuntu 16.04)
- TODO: Maybe use it on 18.04, etc. for improved performance?
2025-01-21 23:59:20 -05:00

202 lines
6.0 KiB
Python
Executable File
Vendored

#!/usr/bin/env python3
# This script is based on
# https://github.com/rust-lang/rust/blob/master/library/core/src/unicode/printable.py
# distributed under https://github.com/rust-lang/rust/blob/master/LICENSE-MIT.
# This script uses the following Unicode tables:
# - UnicodeData.txt
from collections import namedtuple
import csv
import os
import subprocess
NUM_CODEPOINTS=0x110000
def to_ranges(iter):
current = None
for i in iter:
if current is None or i != current[1] or i in (0x10000, 0x20000):
if current is not None:
yield tuple(current)
current = [i, i + 1]
else:
current[1] += 1
if current is not None:
yield tuple(current)
def get_escaped(codepoints):
for c in codepoints:
if (c.class_ or "Cn") in "Cc Cf Cs Co Cn Zl Zp Zs".split() and c.value != ord(' '):
yield c.value
def get_file(f):
try:
return open(os.path.basename(f))
except FileNotFoundError:
subprocess.run(["curl", "-O", f], check=True)
return open(os.path.basename(f))
Codepoint = namedtuple('Codepoint', 'value class_')
def get_codepoints(f):
r = csv.reader(f, delimiter=";")
prev_codepoint = 0
class_first = None
for row in r:
codepoint = int(row[0], 16)
name = row[1]
class_ = row[2]
if class_first is not None:
if not name.endswith("Last>"):
raise ValueError("Missing Last after First")
for c in range(prev_codepoint + 1, codepoint):
yield Codepoint(c, class_first)
class_first = None
if name.endswith("First>"):
class_first = class_
yield Codepoint(codepoint, class_)
prev_codepoint = codepoint
if class_first is not None:
raise ValueError("Missing Last after First")
for c in range(prev_codepoint + 1, NUM_CODEPOINTS):
yield Codepoint(c, None)
def compress_singletons(singletons):
uppers = [] # (upper, # items in lowers)
lowers = []
for i in singletons:
upper = i >> 8
lower = i & 0xff
if len(uppers) == 0 or uppers[-1][0] != upper:
uppers.append((upper, 1))
else:
upper, count = uppers[-1]
uppers[-1] = upper, count + 1
lowers.append(lower)
return uppers, lowers
def compress_normal(normal):
# lengths 0x00..0x7f are encoded as 00, 01, ..., 7e, 7f
# lengths 0x80..0x7fff are encoded as 80 80, 80 81, ..., ff fe, ff ff
compressed = [] # [truelen, (truelenaux), falselen, (falselenaux)]
prev_start = 0
for start, count in normal:
truelen = start - prev_start
falselen = count
prev_start = start + count
assert truelen < 0x8000 and falselen < 0x8000
entry = []
if truelen > 0x7f:
entry.append(0x80 | (truelen >> 8))
entry.append(truelen & 0xff)
else:
entry.append(truelen & 0x7f)
if falselen > 0x7f:
entry.append(0x80 | (falselen >> 8))
entry.append(falselen & 0xff)
else:
entry.append(falselen & 0x7f)
compressed.append(entry)
return compressed
def print_singletons(uppers, lowers, uppersname, lowersname):
print(" static constexpr singleton {}[] = {{".format(uppersname))
for u, c in uppers:
print(" {{{:#04x}, {}}},".format(u, c))
print(" };")
print(" static constexpr unsigned char {}[] = {{".format(lowersname))
for i in range(0, len(lowers), 8):
print(" {}".format(" ".join("{:#04x},".format(l) for l in lowers[i:i+8])))
print(" };")
def print_normal(normal, normalname):
print(" static constexpr unsigned char {}[] = {{".format(normalname))
for v in normal:
print(" {}".format(" ".join("{:#04x},".format(i) for i in v)))
print(" };")
def main():
file = get_file("https://www.unicode.org/Public/UNIDATA/UnicodeData.txt")
codepoints = get_codepoints(file)
CUTOFF=0x10000
singletons0 = []
singletons1 = []
normal0 = []
normal1 = []
extra = []
for a, b in to_ranges(get_escaped(codepoints)):
if a > 2 * CUTOFF:
extra.append((a, b - a))
elif a == b - 1:
if a & CUTOFF:
singletons1.append(a & ~CUTOFF)
else:
singletons0.append(a)
elif a == b - 2:
if a & CUTOFF:
singletons1.append(a & ~CUTOFF)
singletons1.append((a + 1) & ~CUTOFF)
else:
singletons0.append(a)
singletons0.append(a + 1)
else:
if a >= 2 * CUTOFF:
extra.append((a, b - a))
elif a & CUTOFF:
normal1.append((a & ~CUTOFF, b - a))
else:
normal0.append((a, b - a))
singletons0u, singletons0l = compress_singletons(singletons0)
singletons1u, singletons1l = compress_singletons(singletons1)
normal0 = compress_normal(normal0)
normal1 = compress_normal(normal1)
print("""\
FMT_FUNC auto is_printable(uint32_t cp) -> bool {\
""")
print_singletons(singletons0u, singletons0l, 'singletons0', 'singletons0_lower')
print_singletons(singletons1u, singletons1l, 'singletons1', 'singletons1_lower')
print_normal(normal0, 'normal0')
print_normal(normal1, 'normal1')
print("""\
auto lower = static_cast<uint16_t>(cp);
if (cp < 0x10000) {
return is_printable(lower, singletons0,
sizeof(singletons0) / sizeof(*singletons0),
singletons0_lower, normal0, sizeof(normal0));
}
if (cp < 0x20000) {
return is_printable(lower, singletons1,
sizeof(singletons1) / sizeof(*singletons1),
singletons1_lower, normal1, sizeof(normal1));
}\
""")
for a, b in extra:
print(" if (0x{:x} <= cp && cp < 0x{:x}) return false;".format(a, a + b))
print("""\
return cp < 0x{:x};
}}\
""".format(NUM_CODEPOINTS))
if __name__ == '__main__':
main()