[libromdata] IsoPartition: Add basic support for Joliet directories.

If a Supplementary Volume Descriptor is present, check for the Joliet
UCS-2 escape sequences. If found, use the SVD root directory.

Filenames in the SVD root directory are encoded in UCS-2 (big-endian).
The filename length is in bytes, not code points.

NOTE: Currently using a very cheap hack to convert UCS-2 to cp1252,
or more accurately, ISO-8859-1. It's good enough for our purposes
for now. (specifically, getting the icon from AUTORUN.INF.)

TODO: Better character set conversion.
This commit is contained in:
David Korth 2025-06-08 14:46:56 -04:00
parent a5d22511e0
commit 3f1f92b18a
2 changed files with 123 additions and 14 deletions

View File

@ -39,8 +39,15 @@ public:
off64_t partition_offset;
off64_t partition_size; // Calculated partition size
// ISO primary volume descriptor
ISO_Primary_Volume_Descriptor pvd;
// ISO volume descriptors
ISO_Primary_Volume_Descriptor pvd, svd;
enum class JolietSVDType : uint8_t {
None = 0,
UCS2_Level1 = 1, // NOTE: UCS-2 BE
UCS2_Level2 = 2, // NOTE: UCS-2 BE
UCS2_Level3 = 3, // NOTE: UCS-2 BE
};
JolietSVDType jolietSVDType;
// Directories
// - Key: Directory name, WITHOUT leading slash. (Root == empty string) [cp1252]
@ -157,11 +164,13 @@ IsoPartitionPrivate::IsoPartitionPrivate(IsoPartition *q,
: q_ptr(q)
, partition_offset(partition_offset)
, partition_size(0)
, jolietSVDType(JolietSVDType::None)
, iso_start_offset(iso_start_offset)
, fstDirCount(0)
{
// Clear the PVD struct.
// Clear the Volume Descriptor structs.
memset(&pvd, 0, sizeof(pvd));
memset(&svd, 0, sizeof(svd));
if (!q->m_file) {
q->m_lastError = EIO;
@ -197,6 +206,47 @@ IsoPartitionPrivate::IsoPartitionPrivate(IsoPartition *q,
return;
}
// Attempt to load the Supplementary Volume Descriptor.
// TODO: Keep loading VDs until we reach 0xFF?
size = q->m_file->seekAndRead(partition_offset + ISO_SVD_ADDRESS_2048, &svd, sizeof(svd));
// Verify the signature and volume descriptor type.
if (size == sizeof(svd) &&
svd.header.type == ISO_VDT_SUPPLEMENTARY && svd.header.version == ISO_VD_VERSION &&
!memcmp(svd.header.identifier, ISO_VD_MAGIC, sizeof(svd.header.identifier)))
{
// This is a supplementary volume descriptor.
// Check the escape sequences.
// Escape sequence format: '%', '/', x
const char *const p_end = &svd.svd_escape_sequences[sizeof(svd.svd_escape_sequences)-3];
for (const char *p = svd.svd_escape_sequences; p < p_end && *p != '\0'; p++) {
if (p[0] != '%' || p[1] != '/') {
continue;
}
// Check if this is a valid UCS-2 level seqeunce.
// NOTE: Using the highest level specified.
switch (p[2]) {
case '@':
if (jolietSVDType < JolietSVDType::UCS2_Level1) {
jolietSVDType = JolietSVDType::UCS2_Level1;
}
break;
case 'C':
if (jolietSVDType < JolietSVDType::UCS2_Level2) {
jolietSVDType = JolietSVDType::UCS2_Level2;
}
break;
case 'E':
if (jolietSVDType < JolietSVDType::UCS2_Level3) {
jolietSVDType = JolietSVDType::UCS2_Level3;
}
break;
default:
break;
}
}
}
// Load the root directory.
getDirectory("/");
}
@ -255,6 +305,10 @@ const ISO_DirEntry *IsoPartitionPrivate::lookup_int(const DirData_t *pDir, const
const ISO_DirEntry *dirEntry_found = nullptr;
const uint8_t *p = pDir->data();
const uint8_t *const p_end = p + pDir->size();
// Temporary buffer for converting Joliet UCS-2 filenames to cp1252.
char joliet_cp1252_buf[128];
while ((p + sizeof(ISO_DirEntry)) < p_end) {
const ISO_DirEntry *dirEntry = reinterpret_cast<const ISO_DirEntry*>(p);
if (dirEntry->entry_length == 0) {
@ -278,7 +332,7 @@ const ISO_DirEntry *IsoPartitionPrivate::lookup_int(const DirData_t *pDir, const
break;
}
const char *const entry_filename = reinterpret_cast<const char*>(p) + sizeof(*dirEntry);
const char *entry_filename = reinterpret_cast<const char*>(p) + sizeof(*dirEntry);
if (entry_filename + dirEntry->filename_length > reinterpret_cast<const char*>(p_end)) {
// Filename is out of bounds.
break;
@ -295,10 +349,27 @@ const ISO_DirEntry *IsoPartitionPrivate::lookup_int(const DirData_t *pDir, const
}
}
// If using Joliet, the filename is encoded as UCS-2 (UTF-16).
// Use a quick-and-dirty (and not necessarily accurate) conversion to cp1252.
// FIXME: Proper conversion?
uint8_t dirEntry_filename_len = dirEntry->filename_length;
if (jolietSVDType > JolietSVDType::None) {
// dirEntry_filename_len is in bytes, which means it's double
// the number of UCS-2 code points.
// NOTE: UCS-2 *Big-Endian*.
dirEntry_filename_len /= 2;
unsigned int i = 0;
for (; i < dirEntry_filename_len; i++) {
joliet_cp1252_buf[i] = entry_filename[(i * 2) + 1];
}
joliet_cp1252_buf[i] = '\0';
entry_filename = joliet_cp1252_buf;
}
// Check the filename.
// 1990s and early 2000s CD-ROM games usually have
// ";1" filenames, so check for that first.
if (dirEntry->filename_length == filename_len + 2) {
if (dirEntry_filename_len == filename_len + 2) {
// +2 length match.
// This might have ";1".
if (!strncasecmp(entry_filename, filename, filename_len)) {
@ -320,7 +391,7 @@ const ISO_DirEntry *IsoPartitionPrivate::lookup_int(const DirData_t *pDir, const
break;
}
}
} else if (dirEntry->filename_length == filename_len) {
} else if (dirEntry_filename_len == filename_len) {
// Exact length match.
if (!strncasecmp(entry_filename, filename, filename_len)) {
// Found it!
@ -396,7 +467,10 @@ const IsoPartitionPrivate::DirData_t *IsoPartitionPrivate::getDirectory(const ch
// Loading the root directory.
// Check the root directory entry.
const ISO_DirEntry *const rootdir = &pvd.dir_entry_root;
const ISO_DirEntry *const rootdir = (jolietSVDType > JolietSVDType::None)
? &svd.dir_entry_root
: &pvd.dir_entry_root;
if (rootdir->size.he > 16*1024*1024) {
// Root directory is too big.
q->m_lastError = EIO;
@ -842,9 +916,30 @@ const IFst::DirEnt *IsoPartition::readdir(IFst::Dir *dirp)
// TODO: Remove ";1" from the filename, if present?
char *extra = static_cast<char*>(dirp->entry.extra);
delete[] extra;
extra = new char[dirEntry->filename_length + 1];
memcpy(extra, entry_filename, dirEntry->filename_length);
extra[dirEntry->filename_length] = '\0';
// If using Joliet, the filename is encoded as UCS-2 (UTF-16).
// Use a quick-and-dirty (and not necessarily accurate) conversion to cp1252.
// FIXME: Proper conversion?
// TODO: Convert to UTF-8 for readdir()?
RP_D(IsoPartition);
uint8_t dirEntry_filename_len = dirEntry->filename_length;
if (d->jolietSVDType > IsoPartitionPrivate::JolietSVDType::None) {
// dirEntry_filename_len is in bytes, which means it's double
// the number of UCS-2 code points.
// NOTE: UCS-2 *Big-Endian*.
dirEntry_filename_len /= 2;
extra = new char[dirEntry_filename_len + 1];
unsigned int i = 0;
for (; i < dirEntry_filename_len; i++) {
extra[i] = entry_filename[(i * 2) + 1];
}
extra[i] = '\0';
} else {
// TODO: Convert from cp1252 to UTF-8 for readdir()?
extra = new char[dirEntry_filename_len + 1];
memcpy(extra, entry_filename, dirEntry_filename_len);
extra[dirEntry_filename_len] = '\0';
}
dirp->entry.name = extra;
dirp->entry.extra = extra;

View File

@ -192,16 +192,22 @@ ASSERT_STRUCT(ISO_Boot_Volume_Descriptor, ISO_SECTOR_SIZE_MODE1_COOKED);
* Primary volume descriptor.
*
* NOTE: All fields are space-padded. (0x20, ' ')
*
* NOTE 2: SVD fields are only valid in Supplementary Volume Descriptors.
* In PVDs, these fields should be all zero.
*/
typedef struct _ISO_Primary_Volume_Descriptor {
ISO_Volume_Descriptor_Header header;
uint8_t reserved1; // [0x007] 0x00
uint8_t svd_volume_flags; // [0x007] Bit 0, if clear, indicates escape sequences *only* has
// valid sequences from ISO 2375. If set, it has sequences
// that aren't in ISO 2375.
char sysID[32]; // [0x008] (strA) System identifier.
char volID[32]; // [0x028] (strD) Volume identifier.
uint8_t reserved2[8]; // [0x048] All zeroes.
uint32_lsb_msb_t volume_space_size; // [0x050] Size of volume, in blocks.
uint8_t reserved3[32]; // [0x058] All zeroes.
char svd_escape_sequences[32]; // [0x058] SVD: Escape sequences (indicates character sets)
uint16_lsb_msb_t volume_set_size; // [0x078] Size of the logical volume. (number of discs)
uint16_lsb_msb_t volume_seq_number; // [0x07C] Disc number in the volume set.
uint16_lsb_msb_t logical_block_size; // [0x080] Logical block size. (usually 2048)
@ -245,14 +251,22 @@ ASSERT_STRUCT(ISO_Primary_Volume_Descriptor, ISO_SECTOR_SIZE_MODE1_COOKED);
/**
* Volume descriptor.
*
* Primary volume descriptor is located at sector 0x10. (0x8000)
* Primary Volume Descriptor is located at sector 0x10. (0x8000)
* Supplementary Volume Descriptor is usually located at sector 0x11, if present. (0x8800)
*/
#define ISO_VD_MAGIC "CD001"
#define ISO_VD_VERSION 0x01
#define ISO_PVD_LBA 0x10
#define ISO_PVD_ADDRESS_2048 (ISO_PVD_LBA * ISO_SECTOR_SIZE_MODE1_COOKED)
#define ISO_PVD_ADDRESS_2352 (ISO_PVD_LBA * ISO_SECTOR_SIZE_MODE1_RAW)
#define ISO_PVD_ADDRESS_2448 (ISO_PVD_LBA * ISO_SECTOR_SIZE_MODE1_RAW_SUBCHAN)
#define ISO_SVD_LBA 0x11
#define ISO_SVD_ADDRESS_2048 (ISO_SVD_LBA * ISO_SECTOR_SIZE_MODE1_COOKED)
#define ISO_SVD_ADDRESS_2352 (ISO_SVD_LBA * ISO_SECTOR_SIZE_MODE1_RAW)
#define ISO_SVD_ADDRESS_2448 (ISO_SVD_LBA * ISO_SECTOR_SIZE_MODE1_RAW_SUBCHAN)
typedef union _ISO_Volume_Descriptor {
ISO_Volume_Descriptor_Header header;