mirror of
https://github.com/GerbilSoft/rom-properties.git
synced 2025-06-18 11:35:38 -04:00

librpbyteswap is still embedded into libromdata and has exported functions. librpcpuid is a standalone static library and will be linked into anything that needs CPU flags, instead of using __builtin_cpu_supports(). librpcpuid's version is more efficient because it has fewer tests and it doesn't do string comparisons. Remove #include "byteorder.h" from a few files, since it isn't actually used: - librpbase/img/RpPng.cpp - librpbase/img/RpPngWriter.cpp - libromdata/disc/xdvdfs_structs.h - libromdata/Media/hsfs_structs.h [gtk,librpbase] Remove #include "librpcpu/cpu_dispatch.h" from stdafx.h, since it's only used by a few files. [gtk3] CMakeLists.txt: Removed SSSE3 checks. SSSE3 is only used by GdkImageConv, which is only used by the XFCE (GTK2) UI frontend. [gtk3] CairoImageConv.hpp: Remove #include "librpcpu/cpu_dispatch.h", since it isn't actually used here. [xfce] GdkImageConv_ifunc.cpp, [librpbyteswap] byteswap_ifunc.c: - #include "config.librpcpuid.h" before checking for HAVE_IFUNC. This was indirectly included before, but explicitly including it allows us to skip the other inclusion if IFUNC is not available.
654 lines
23 KiB
C++
Vendored
654 lines
23 KiB
C++
Vendored
/*!
|
|
\brief Implementation of the Texture Decompression functions.
|
|
\file PVRCore/texture/PVRTDecompress.cpp
|
|
\author PowerVR by Imagination, Developer Technology Team
|
|
\copyright Copyright (c) Imagination Technologies Limited.
|
|
*/
|
|
//!\cond NO_DOXYGEN
|
|
|
|
#include <cstdlib>
|
|
#include <cstdio>
|
|
#include <climits>
|
|
#include <cmath>
|
|
#include <algorithm>
|
|
#include <cstring>
|
|
#include "PVRTDecompress.h"
|
|
#include <cassert>
|
|
#include <vector>
|
|
|
|
// rom-properties: Use librpbyteswap's byteorder macros.
|
|
// NOTE: Not able to detect built-in byteswapping intrinsics here.
|
|
#include "../../src/librpbyteswap/byteorder.h"
|
|
#define __swab32(x) \
|
|
((uint32_t)((((uint32_t)x) << 24) | (((uint32_t)x) >> 24) | \
|
|
((((uint32_t)x) & 0x0000FF00UL) << 8) | \
|
|
((((uint32_t)x) & 0x00FF0000UL) >> 8)))
|
|
#if SYS_BYTEORDER == SYS_LIL_ENDIAN
|
|
# define le32_to_cpu(x) (x)
|
|
# define cpu_to_le32(x) (x)
|
|
#else /* SYS_BYTEORDER == SYS_BIG_ENDIAN */
|
|
# define le32_to_cpu(x) __swab32(x)
|
|
# define cpu_to_le32(x) __swab32(x)
|
|
#endif
|
|
|
|
namespace pvr {
|
|
struct Pixel32
|
|
{
|
|
#if SYS_BYTEORDER == SYS_LIL_ENDIAN
|
|
# ifdef PVRTC_SWAP_R_B_CHANNELS
|
|
uint8_t blue, green, red, alpha;
|
|
# else /* !PVRTC_SWAP_R_B_CHANNELS */
|
|
uint8_t red, green, blue, alpha;
|
|
# endif /* PVRTC_SWAP_R_B_CHANNELS */
|
|
#else /* SYS_BYTEORDER == SYS_BIG_ENDIAN */
|
|
# ifdef PVRTC_SWAP_R_B_CHANNELS
|
|
uint8_t alpha, red, green, blue;
|
|
# else /* !PVRTC_SWAP_R_B_CHANNELS */
|
|
uint8_t alpha, blue, green, red;
|
|
# endif /* PVRTC_SWAP_R_B_CHANNELS */
|
|
#endif
|
|
};
|
|
|
|
struct Pixel128S
|
|
{
|
|
int32_t red, green, blue, alpha;
|
|
};
|
|
|
|
struct PVRTCWord
|
|
{
|
|
uint32_t modulationData;
|
|
uint32_t colorData;
|
|
};
|
|
|
|
struct PVRTCWordIndices
|
|
{
|
|
int P[2], Q[2], R[2], S[2];
|
|
};
|
|
|
|
template<bool PVRTCII>
|
|
static Pixel32 getColorA(uint32_t colorData)
|
|
{
|
|
Pixel32 color;
|
|
|
|
// Opaque Color Mode - RGB 554
|
|
const uint32_t opaque_flag = (PVRTCII ? 0x80000000 : 0x8000);
|
|
if ((colorData & opaque_flag) != 0)
|
|
{
|
|
color.red = static_cast<uint8_t>((colorData & 0x7c00) >> 10); // 5->5 bits
|
|
color.green = static_cast<uint8_t>((colorData & 0x3e0) >> 5); // 5->5 bits
|
|
color.blue = static_cast<uint8_t>(colorData & 0x1e) | ((colorData & 0x1e) >> 4); // 4->5 bits
|
|
color.alpha = static_cast<uint8_t>(0xf); // 0->4 bits
|
|
}
|
|
// Transparent Color Mode - ARGB 3443
|
|
else
|
|
{
|
|
color.red = static_cast<uint8_t>((colorData & 0xf00) >> 7) | ((colorData & 0xf00) >> 11); // 4->5 bits
|
|
color.green = static_cast<uint8_t>((colorData & 0xf0) >> 3) | ((colorData & 0xf0) >> 7); // 4->5 bits
|
|
color.blue = static_cast<uint8_t>((colorData & 0xe) << 1) | ((colorData & 0xe) >> 2); // 3->5 bits
|
|
color.alpha = static_cast<uint8_t>((colorData & 0x7000) >> 11); // 3->4 bits - note 0 at right
|
|
}
|
|
|
|
return color;
|
|
}
|
|
|
|
template<bool PVRTCII>
|
|
static Pixel32 getColorB(uint32_t colorData)
|
|
{
|
|
Pixel32 color;
|
|
|
|
// Opaque Color Mode - RGB 555
|
|
if (colorData & 0x80000000)
|
|
{
|
|
color.red = static_cast<uint8_t>((colorData & 0x7c000000) >> 26); // 5->5 bits
|
|
color.green = static_cast<uint8_t>((colorData & 0x3e00000) >> 21); // 5->5 bits
|
|
color.blue = static_cast<uint8_t>((colorData & 0x1f0000) >> 16); // 5->5 bits
|
|
color.alpha = static_cast<uint8_t>(0xf); // 0 bits
|
|
}
|
|
// Transparent Color Mode - ARGB 3444
|
|
else
|
|
{
|
|
color.red = static_cast<uint8_t>(((colorData & 0xf000000) >> 23) | ((colorData & 0xf000000) >> 27)); // 4->5 bits
|
|
color.green = static_cast<uint8_t>(((colorData & 0xf00000) >> 19) | ((colorData & 0xf00000) >> 23)); // 4->5 bits
|
|
color.blue = static_cast<uint8_t>(((colorData & 0xf0000) >> 15) | ((colorData & 0xf0000) >> 19)); // 4->5 bits
|
|
color.alpha = static_cast<uint8_t>((colorData & 0x70000000) >> 27); // 3->4 bits - note 0 at right
|
|
if (PVRTCII) {
|
|
// PVRTC-II sets the low alpha bit of Color B to 1, not 0.
|
|
color.alpha |= 1;
|
|
}
|
|
}
|
|
|
|
return color;
|
|
}
|
|
|
|
static void interpolateColors(Pixel32 P, Pixel32 Q, Pixel32 R, Pixel32 S, Pixel128S* pPixel, uint8_t bpp)
|
|
{
|
|
uint32_t wordWidth = 4;
|
|
uint32_t wordHeight = 4;
|
|
if (bpp == 2) { wordWidth = 8; }
|
|
|
|
// Convert to int 32.
|
|
Pixel128S hP = { static_cast<int32_t>(P.red), static_cast<int32_t>(P.green), static_cast<int32_t>(P.blue), static_cast<int32_t>(P.alpha) };
|
|
Pixel128S hQ = { static_cast<int32_t>(Q.red), static_cast<int32_t>(Q.green), static_cast<int32_t>(Q.blue), static_cast<int32_t>(Q.alpha) };
|
|
Pixel128S hR = { static_cast<int32_t>(R.red), static_cast<int32_t>(R.green), static_cast<int32_t>(R.blue), static_cast<int32_t>(R.alpha) };
|
|
Pixel128S hS = { static_cast<int32_t>(S.red), static_cast<int32_t>(S.green), static_cast<int32_t>(S.blue), static_cast<int32_t>(S.alpha) };
|
|
|
|
// Get vectors.
|
|
Pixel128S QminusP = { hQ.red - hP.red, hQ.green - hP.green, hQ.blue - hP.blue, hQ.alpha - hP.alpha };
|
|
Pixel128S SminusR = { hS.red - hR.red, hS.green - hR.green, hS.blue - hR.blue, hS.alpha - hR.alpha };
|
|
|
|
// Multiply colors.
|
|
hP.red *= wordWidth;
|
|
hP.green *= wordWidth;
|
|
hP.blue *= wordWidth;
|
|
hP.alpha *= wordWidth;
|
|
hR.red *= wordWidth;
|
|
hR.green *= wordWidth;
|
|
hR.blue *= wordWidth;
|
|
hR.alpha *= wordWidth;
|
|
|
|
if (bpp == 2)
|
|
{
|
|
// Loop through pixels to achieve results.
|
|
for (uint32_t x = 0; x < wordWidth; x++)
|
|
{
|
|
Pixel128S result = { 4 * hP.red, 4 * hP.green, 4 * hP.blue, 4 * hP.alpha };
|
|
Pixel128S dY = { hR.red - hP.red, hR.green - hP.green, hR.blue - hP.blue, hR.alpha - hP.alpha };
|
|
|
|
for (uint32_t y = 0; y < wordHeight; y++)
|
|
{
|
|
pPixel[y * wordWidth + x].red = static_cast<int32_t>((result.red >> 7) + (result.red >> 2));
|
|
pPixel[y * wordWidth + x].green = static_cast<int32_t>((result.green >> 7) + (result.green >> 2));
|
|
pPixel[y * wordWidth + x].blue = static_cast<int32_t>((result.blue >> 7) + (result.blue >> 2));
|
|
pPixel[y * wordWidth + x].alpha = static_cast<int32_t>((result.alpha >> 5) + (result.alpha >> 1));
|
|
|
|
result.red += dY.red;
|
|
result.green += dY.green;
|
|
result.blue += dY.blue;
|
|
result.alpha += dY.alpha;
|
|
}
|
|
|
|
hP.red += QminusP.red;
|
|
hP.green += QminusP.green;
|
|
hP.blue += QminusP.blue;
|
|
hP.alpha += QminusP.alpha;
|
|
|
|
hR.red += SminusR.red;
|
|
hR.green += SminusR.green;
|
|
hR.blue += SminusR.blue;
|
|
hR.alpha += SminusR.alpha;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
// Loop through pixels to achieve results.
|
|
for (uint32_t y = 0; y < wordHeight; y++)
|
|
{
|
|
Pixel128S result = { 4 * hP.red, 4 * hP.green, 4 * hP.blue, 4 * hP.alpha };
|
|
Pixel128S dY = { hR.red - hP.red, hR.green - hP.green, hR.blue - hP.blue, hR.alpha - hP.alpha };
|
|
|
|
for (uint32_t x = 0; x < wordWidth; x++)
|
|
{
|
|
pPixel[y * wordWidth + x].red = static_cast<int32_t>((result.red >> 6) + (result.red >> 1));
|
|
pPixel[y * wordWidth + x].green = static_cast<int32_t>((result.green >> 6) + (result.green >> 1));
|
|
pPixel[y * wordWidth + x].blue = static_cast<int32_t>((result.blue >> 6) + (result.blue >> 1));
|
|
pPixel[y * wordWidth + x].alpha = static_cast<int32_t>((result.alpha >> 4) + (result.alpha));
|
|
|
|
result.red += dY.red;
|
|
result.green += dY.green;
|
|
result.blue += dY.blue;
|
|
result.alpha += dY.alpha;
|
|
}
|
|
|
|
hP.red += QminusP.red;
|
|
hP.green += QminusP.green;
|
|
hP.blue += QminusP.blue;
|
|
hP.alpha += QminusP.alpha;
|
|
|
|
hR.red += SminusR.red;
|
|
hR.green += SminusR.green;
|
|
hR.blue += SminusR.blue;
|
|
hR.alpha += SminusR.alpha;
|
|
}
|
|
}
|
|
}
|
|
|
|
static void unpackModulations(const PVRTCWord& word, int32_t offsetX, int32_t offsetY, int32_t modulationValues[16][8], int32_t modulationModes[16][8], uint8_t bpp)
|
|
{
|
|
uint32_t WordModMode = word.colorData & 0x1;
|
|
uint32_t ModulationBits = word.modulationData;
|
|
|
|
// Unpack differently depending on 2bpp or 4bpp modes.
|
|
if (bpp == 2)
|
|
{
|
|
if (WordModMode)
|
|
{
|
|
// determine which of the three modes are in use:
|
|
|
|
// If this is the either the H-only or V-only interpolation mode...
|
|
if (ModulationBits & 0x1)
|
|
{
|
|
// look at the "LSB" for the "centre" (V=2,H=4) texel. Its LSB is now
|
|
// actually used to indicate whether it's the H-only mode or the V-only...
|
|
|
|
// The centre texel data is the at (y==2, x==4) and so its LSB is at bit 20.
|
|
if (ModulationBits & (0x1 << 20))
|
|
{
|
|
// This is the V-only mode
|
|
WordModMode = 3;
|
|
}
|
|
else
|
|
{
|
|
// This is the H-only mode
|
|
WordModMode = 2;
|
|
}
|
|
|
|
// Create an extra bit for the centre pixel so that it looks like
|
|
// we have 2 actual bits for this texel. It makes later coding much easier.
|
|
if (ModulationBits & (0x1 << 21))
|
|
{
|
|
// set it to produce code for 1.0
|
|
ModulationBits |= (0x1 << 20);
|
|
}
|
|
else
|
|
{
|
|
// clear it to produce 0.0 code
|
|
ModulationBits &= ~(0x1 << 20);
|
|
}
|
|
} // end if H-Only or V-Only interpolation mode was chosen
|
|
|
|
if (ModulationBits & 0x2) { ModulationBits |= 0x1; /*set it*/ }
|
|
else
|
|
{
|
|
ModulationBits &= ~0x1; /*clear it*/
|
|
}
|
|
|
|
// run through all the pixels in the block. Note we can now treat all the
|
|
// "stored" values as if they have 2bits (even when they didn't!)
|
|
for (uint8_t y = 0; y < 4; y++)
|
|
{
|
|
for (uint8_t x = 0; x < 8; x++)
|
|
{
|
|
modulationModes[static_cast<uint32_t>(x + offsetX)][static_cast<uint32_t>(y + offsetY)] = WordModMode;
|
|
|
|
// if this is a stored value...
|
|
if (((x ^ y) & 1) == 0) {modulationValues[static_cast<uint32_t>(x + offsetX)][static_cast<uint32_t>(y + offsetY)] = ModulationBits & 3;
|
|
ModulationBits >>= 2;
|
|
}
|
|
}
|
|
} // end for y
|
|
}
|
|
// else if direct encoded 2bit mode - i.e. 1 mode bit per pixel
|
|
else
|
|
{
|
|
for (uint8_t y = 0; y < 4; y++)
|
|
{
|
|
for (uint8_t x = 0; x < 8; x++)
|
|
{
|
|
modulationModes[static_cast<uint32_t>(x + offsetX)][static_cast<uint32_t>(y + offsetY)] = WordModMode;
|
|
|
|
/*
|
|
// double the bits so 0=> 00, and 1=>11
|
|
*/
|
|
if (ModulationBits & 1) { modulationValues[static_cast<uint32_t>(x + offsetX)][static_cast<uint32_t>(y + offsetY)] = 0x3; }
|
|
else
|
|
{
|
|
modulationValues[static_cast<uint32_t>(x + offsetX)][static_cast<uint32_t>(y + offsetY)] = 0x0;
|
|
}
|
|
ModulationBits >>= 1;
|
|
}
|
|
} // end for y
|
|
}
|
|
}
|
|
else
|
|
{
|
|
// Much simpler than the 2bpp decompression, only two modes, so the n/8 values are set directly.
|
|
// run through all the pixels in the word.
|
|
if (WordModMode)
|
|
{
|
|
for (uint8_t y = 0; y < 4; y++)
|
|
{
|
|
for (uint8_t x = 0; x < 4; x++)
|
|
{
|
|
modulationValues[static_cast<uint32_t>(y + offsetY)][static_cast<uint32_t>(x + offsetX)] = ModulationBits & 3;
|
|
// if (modulationValues==0) {}. We don't need to check 0, 0 = 0/8.
|
|
if (modulationValues[static_cast<uint32_t>(y + offsetY)][static_cast<uint32_t>(x + offsetX)] == 1)
|
|
{ modulationValues[static_cast<uint32_t>(y + offsetY)][static_cast<uint32_t>(x + offsetX)] = 4; }
|
|
else if (modulationValues[static_cast<uint32_t>(y + offsetY)][static_cast<uint32_t>(x + offsetX)] == 2)
|
|
{
|
|
modulationValues[static_cast<uint32_t>(y + offsetY)][static_cast<uint32_t>(x + offsetX)] = 14; //+10 tells the decompressor to punch through alpha.
|
|
}
|
|
else if (modulationValues[static_cast<uint32_t>(y + offsetY)][static_cast<uint32_t>(x + offsetX)] == 3)
|
|
{
|
|
modulationValues[static_cast<uint32_t>(y + offsetY)][static_cast<uint32_t>(x + offsetX)] = 8;
|
|
}
|
|
ModulationBits >>= 2;
|
|
} // end for x
|
|
} // end for y
|
|
}
|
|
else
|
|
{
|
|
for (uint8_t y = 0; y < 4; y++)
|
|
{
|
|
for (uint8_t x = 0; x < 4; x++)
|
|
{
|
|
modulationValues[static_cast<uint32_t>(y + offsetY)][static_cast<uint32_t>(x + offsetX)] = ModulationBits & 3;
|
|
modulationValues[static_cast<uint32_t>(y + offsetY)][static_cast<uint32_t>(x + offsetX)] *= 3;
|
|
if (modulationValues[static_cast<uint32_t>(y + offsetY)][static_cast<uint32_t>(x + offsetX)] > 3)
|
|
{ modulationValues[static_cast<uint32_t>(y + offsetY)][static_cast<uint32_t>(x + offsetX)] -= 1; }
|
|
ModulationBits >>= 2;
|
|
} // end for x
|
|
} // end for y
|
|
}
|
|
}
|
|
}
|
|
|
|
static int32_t getModulationValues(int32_t modulationValues[16][8], int32_t modulationModes[16][8], uint32_t xPos, uint32_t yPos, uint8_t bpp)
|
|
{
|
|
if (bpp == 2)
|
|
{
|
|
static const uint8_t RepVals0[4] = { 0, 3, 5, 8 };
|
|
|
|
// extract the modulation value. If a simple encoding
|
|
if (modulationModes[xPos][yPos] == 0) { return RepVals0[modulationValues[xPos][yPos]]; }
|
|
else
|
|
{
|
|
// if this is a stored value
|
|
if (((xPos ^ yPos) & 1) == 0) { return RepVals0[modulationValues[xPos][yPos]]; }
|
|
|
|
// else average from the neighbours
|
|
// if H&V interpolation...
|
|
else if (modulationModes[xPos][yPos] == 1)
|
|
{
|
|
return (RepVals0[modulationValues[xPos][yPos - 1]] + RepVals0[modulationValues[xPos][yPos + 1]] + RepVals0[modulationValues[xPos - 1][yPos]] +
|
|
RepVals0[modulationValues[xPos + 1][yPos]] + 2) /
|
|
4;
|
|
}
|
|
// else if H-Only
|
|
else if (modulationModes[xPos][yPos] == 2)
|
|
{
|
|
return (RepVals0[modulationValues[xPos - 1][yPos]] + RepVals0[modulationValues[xPos + 1][yPos]] + 1) / 2;
|
|
}
|
|
// else it's V-Only
|
|
else
|
|
{
|
|
return (RepVals0[modulationValues[xPos][yPos - 1]] + RepVals0[modulationValues[xPos][yPos + 1]] + 1) / 2;
|
|
}
|
|
}
|
|
}
|
|
else if (bpp == 4)
|
|
{
|
|
return modulationValues[xPos][yPos];
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
template<bool PVRTCII>
|
|
static void pvrtcGetDecompressedPixels(const PVRTCWord& P, const PVRTCWord& Q, const PVRTCWord& R, const PVRTCWord& S, Pixel32* pColorData, uint8_t bpp)
|
|
{
|
|
// 4bpp only needs 8*8 values, but 2bpp needs 16*8, so rather than wasting processor time we just statically allocate 16*8.
|
|
int32_t modulationValues[16][8];
|
|
// Only 2bpp needs this.
|
|
int32_t modulationModes[16][8];
|
|
// 4bpp only needs 16 values, but 2bpp needs 32, so rather than wasting processor time we just statically allocate 32.
|
|
Pixel128S upscaledColorA[32];
|
|
Pixel128S upscaledColorB[32];
|
|
|
|
uint32_t wordWidth = 4;
|
|
uint32_t wordHeight = 4;
|
|
if (bpp == 2) { wordWidth = 8; }
|
|
|
|
// Get the modulations from each word.
|
|
unpackModulations(P, 0, 0, modulationValues, modulationModes, bpp);
|
|
unpackModulations(Q, wordWidth, 0, modulationValues, modulationModes, bpp);
|
|
unpackModulations(R, 0, wordHeight, modulationValues, modulationModes, bpp);
|
|
unpackModulations(S, wordWidth, wordHeight, modulationValues, modulationModes, bpp);
|
|
|
|
// Bilinear upscale image data from 2x2 -> 4x4
|
|
interpolateColors(getColorA<PVRTCII>(P.colorData), getColorA<PVRTCII>(Q.colorData),
|
|
getColorA<PVRTCII>(R.colorData), getColorA<PVRTCII>(S.colorData), upscaledColorA, bpp);
|
|
interpolateColors(getColorB<PVRTCII>(P.colorData), getColorB<PVRTCII>(Q.colorData),
|
|
getColorB<PVRTCII>(R.colorData), getColorB<PVRTCII>(S.colorData), upscaledColorB, bpp);
|
|
|
|
for (uint32_t y = 0; y < wordHeight; y++)
|
|
{
|
|
for (uint32_t x = 0; x < wordWidth; x++)
|
|
{
|
|
int32_t mod = getModulationValues(modulationValues, modulationModes, x + wordWidth / 2, y + wordHeight / 2, bpp);
|
|
bool punchthroughAlpha = false;
|
|
if (mod > 10)
|
|
{
|
|
punchthroughAlpha = true;
|
|
mod -= 10;
|
|
}
|
|
|
|
Pixel128S result;
|
|
if (PVRTCII && punchthroughAlpha)
|
|
{
|
|
// PVRTC-II: Punch-through alpha sets the RGB values to 0.
|
|
result.red = 0;
|
|
result.green = 0;
|
|
result.blue = 0;
|
|
result.alpha = 0;
|
|
}
|
|
else
|
|
{
|
|
result.red = (upscaledColorA[y * wordWidth + x].red * (8 - mod) + upscaledColorB[y * wordWidth + x].red * mod) / 8;
|
|
result.green = (upscaledColorA[y * wordWidth + x].green * (8 - mod) + upscaledColorB[y * wordWidth + x].green * mod) / 8;
|
|
result.blue = (upscaledColorA[y * wordWidth + x].blue * (8 - mod) + upscaledColorB[y * wordWidth + x].blue * mod) / 8;
|
|
if (punchthroughAlpha) { result.alpha = 0; }
|
|
else
|
|
{
|
|
result.alpha = (upscaledColorA[y * wordWidth + x].alpha * (8 - mod) + upscaledColorB[y * wordWidth + x].alpha * mod) / 8;
|
|
}
|
|
}
|
|
|
|
// Convert the 32bit precision Result to 8 bit per channel color.
|
|
if (bpp == 2)
|
|
{
|
|
pColorData[y * wordWidth + x].red = static_cast<uint8_t>(result.red);
|
|
pColorData[y * wordWidth + x].green = static_cast<uint8_t>(result.green);
|
|
pColorData[y * wordWidth + x].blue = static_cast<uint8_t>(result.blue);
|
|
pColorData[y * wordWidth + x].alpha = static_cast<uint8_t>(result.alpha);
|
|
}
|
|
else if (bpp == 4)
|
|
{
|
|
pColorData[y + x * wordHeight].red = static_cast<uint8_t>(result.red);
|
|
pColorData[y + x * wordHeight].green = static_cast<uint8_t>(result.green);
|
|
pColorData[y + x * wordHeight].blue = static_cast<uint8_t>(result.blue);
|
|
pColorData[y + x * wordHeight].alpha = static_cast<uint8_t>(result.alpha);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
static uint32_t wrapWordIndex(uint32_t numWords, int word) { return ((word + numWords) % numWords); }
|
|
|
|
static bool isPowerOf2(uint32_t input)
|
|
{
|
|
uint32_t minus1;
|
|
|
|
if (!input) { return 0; }
|
|
|
|
minus1 = input - 1;
|
|
return ((input | minus1) == (input ^ minus1));
|
|
}
|
|
|
|
template<bool PVRTCII>
|
|
static uint32_t TwiddleUV(uint32_t XSize, uint32_t YSize, uint32_t XPos, uint32_t YPos)
|
|
{
|
|
// Check the sizes are valid.
|
|
assert(YPos < YSize);
|
|
assert(XPos < XSize);
|
|
assert(isPowerOf2(YSize));
|
|
assert(isPowerOf2(XSize));
|
|
|
|
if (PVRTCII) {
|
|
// PVRTC-II uses linear order, not Morton order.
|
|
return (YPos * XSize) + XPos;
|
|
} else {
|
|
// Initially assume X is the larger size.
|
|
uint32_t MinDimension = XSize;
|
|
uint32_t MaxValue = YPos;
|
|
uint32_t Twiddled = 0;
|
|
uint32_t SrcBitPos = 1;
|
|
uint32_t DstBitPos = 1;
|
|
int ShiftCount = 0;
|
|
|
|
// If Y is the larger dimension - switch the min/max values.
|
|
if (YSize < XSize)
|
|
{
|
|
MinDimension = YSize;
|
|
MaxValue = XPos;
|
|
}
|
|
|
|
// Step through all the bits in the "minimum" dimension
|
|
while (SrcBitPos < MinDimension)
|
|
{
|
|
if (YPos & SrcBitPos) { Twiddled |= DstBitPos; }
|
|
|
|
if (XPos & SrcBitPos) { Twiddled |= (DstBitPos << 1); }
|
|
|
|
SrcBitPos <<= 1;
|
|
DstBitPos <<= 2;
|
|
ShiftCount += 1;
|
|
}
|
|
|
|
// Prepend any unused bits
|
|
MaxValue >>= ShiftCount;
|
|
Twiddled |= (MaxValue << (2 * ShiftCount));
|
|
|
|
return Twiddled;
|
|
}
|
|
}
|
|
|
|
static void mapDecompressedData(Pixel32* pOutput, uint32_t width, const Pixel32* pWord, const PVRTCWordIndices& words, uint8_t bpp)
|
|
{
|
|
uint32_t wordWidth = 4;
|
|
uint32_t wordHeight = 4;
|
|
if (bpp == 2) { wordWidth = 8; }
|
|
|
|
for (uint32_t y = 0; y < wordHeight / 2; y++)
|
|
{
|
|
for (uint32_t x = 0; x < wordWidth / 2; x++)
|
|
{
|
|
pOutput[(((words.P[1] * wordHeight) + y + wordHeight / 2) * width + words.P[0] * wordWidth + x + wordWidth / 2)] = pWord[y * wordWidth + x]; // map P
|
|
|
|
pOutput[(((words.Q[1] * wordHeight) + y + wordHeight / 2) * width + words.Q[0] * wordWidth + x)] = pWord[y * wordWidth + x + wordWidth / 2]; // map Q
|
|
|
|
pOutput[(((words.R[1] * wordHeight) + y) * width + words.R[0] * wordWidth + x + wordWidth / 2)] = pWord[(y + wordHeight / 2) * wordWidth + x]; // map R
|
|
|
|
pOutput[(((words.S[1] * wordHeight) + y) * width + words.S[0] * wordWidth + x)] = pWord[(y + wordHeight / 2) * wordWidth + x + wordWidth / 2]; // map S
|
|
}
|
|
}
|
|
}
|
|
template<bool PVRTCII>
|
|
static uint32_t pvrtcDecompress(uint8_t* pCompressedData, Pixel32* pDecompressedData, uint32_t width, uint32_t height, uint8_t bpp)
|
|
{
|
|
uint32_t wordWidth = 4;
|
|
uint32_t wordHeight = 4;
|
|
if (bpp == 2) { wordWidth = 8; }
|
|
|
|
uint32_t* pWordMembers = (uint32_t*)pCompressedData;
|
|
Pixel32* pOutData = pDecompressedData;
|
|
|
|
// Calculate number of words
|
|
int i32NumXWords = static_cast<int>(width / wordWidth);
|
|
int i32NumYWords = static_cast<int>(height / wordHeight);
|
|
|
|
// Structs used for decompression
|
|
PVRTCWordIndices indices;
|
|
std::vector<Pixel32> pPixels(wordWidth * wordHeight * sizeof(Pixel32));
|
|
|
|
// For each row of words
|
|
for (int32_t wordY = -1; wordY < i32NumYWords - 1; wordY++)
|
|
{
|
|
// for each column of words
|
|
for (int32_t wordX = -1; wordX < i32NumXWords - 1; wordX++)
|
|
{
|
|
indices.P[0] = static_cast<int>(wrapWordIndex(i32NumXWords, wordX));
|
|
indices.P[1] = static_cast<int>(wrapWordIndex(i32NumYWords, wordY));
|
|
indices.Q[0] = static_cast<int>(wrapWordIndex(i32NumXWords, wordX + 1));
|
|
indices.Q[1] = static_cast<int>(wrapWordIndex(i32NumYWords, wordY));
|
|
indices.R[0] = static_cast<int>(wrapWordIndex(i32NumXWords, wordX));
|
|
indices.R[1] = static_cast<int>(wrapWordIndex(i32NumYWords, wordY + 1));
|
|
indices.S[0] = static_cast<int>(wrapWordIndex(i32NumXWords, wordX + 1));
|
|
indices.S[1] = static_cast<int>(wrapWordIndex(i32NumYWords, wordY + 1));
|
|
|
|
// Work out the offsets into the twiddle structs, multiply by two as there are two members per word.
|
|
uint32_t WordOffsets[4] = {
|
|
TwiddleUV<PVRTCII>(i32NumXWords, i32NumYWords, indices.P[0], indices.P[1]) * 2,
|
|
TwiddleUV<PVRTCII>(i32NumXWords, i32NumYWords, indices.Q[0], indices.Q[1]) * 2,
|
|
TwiddleUV<PVRTCII>(i32NumXWords, i32NumYWords, indices.R[0], indices.R[1]) * 2,
|
|
TwiddleUV<PVRTCII>(i32NumXWords, i32NumYWords, indices.S[0], indices.S[1]) * 2,
|
|
};
|
|
|
|
// Access individual elements to fill out PVRTCWord
|
|
PVRTCWord P, Q, R, S;
|
|
P.colorData = static_cast<uint32_t>(le32_to_cpu(pWordMembers[WordOffsets[0] + 1]));
|
|
P.modulationData = static_cast<uint32_t>(le32_to_cpu(pWordMembers[WordOffsets[0]]));
|
|
Q.colorData = static_cast<uint32_t>(le32_to_cpu(pWordMembers[WordOffsets[1] + 1]));
|
|
Q.modulationData = static_cast<uint32_t>(le32_to_cpu(pWordMembers[WordOffsets[1]]));
|
|
R.colorData = static_cast<uint32_t>(le32_to_cpu(pWordMembers[WordOffsets[2] + 1]));
|
|
R.modulationData = static_cast<uint32_t>(le32_to_cpu(pWordMembers[WordOffsets[2]]));
|
|
S.colorData = static_cast<uint32_t>(le32_to_cpu(pWordMembers[WordOffsets[3] + 1]));
|
|
S.modulationData = static_cast<uint32_t>(le32_to_cpu(pWordMembers[WordOffsets[3]]));
|
|
|
|
// assemble 4 words into struct to get decompressed pixels from
|
|
pvrtcGetDecompressedPixels<PVRTCII>(P, Q, R, S, pPixels.data(), bpp);
|
|
mapDecompressedData(pOutData, width, pPixels.data(), indices, bpp);
|
|
|
|
} // for each word
|
|
} // for each row of words
|
|
|
|
// Return the data size
|
|
return width * height / static_cast<uint32_t>((wordWidth / 2));
|
|
}
|
|
|
|
template<bool PVRTCII>
|
|
static uint32_t PVRTDecompressPVRTC_int(const void* pCompressedData, uint32_t Do2bitMode, uint32_t XDim, uint32_t YDim, uint8_t* pResultImage)
|
|
{
|
|
// Cast the output buffer to a Pixel32 pointer.
|
|
Pixel32* pDecompressedData = (Pixel32*)pResultImage;
|
|
|
|
// Check the X and Y values are at least the minimum size.
|
|
uint32_t XTrueDim = std::max(XDim, ((Do2bitMode == 1u) ? 16u : 8u));
|
|
uint32_t YTrueDim = std::max(YDim, 8u);
|
|
|
|
// If the dimensions aren't correct, we need to create a new buffer instead of just using the provided one, as the buffer will overrun otherwise.
|
|
// rom-properties: make sure we don't hit this case
|
|
assert(XTrueDim == XDim);
|
|
assert(YTrueDim == YDim);
|
|
if (XTrueDim != XDim || YTrueDim != YDim) { pDecompressedData = new Pixel32[XTrueDim * YTrueDim]; }
|
|
|
|
// Decompress the surface.
|
|
uint32_t retval = pvrtcDecompress<PVRTCII>((uint8_t*)pCompressedData,
|
|
pDecompressedData, XTrueDim, YTrueDim, uint8_t(Do2bitMode == 1 ? 2 : 4));
|
|
|
|
// If the dimensions were too small, then copy the new buffer back into the output buffer.
|
|
if (XTrueDim != XDim || YTrueDim != YDim)
|
|
{
|
|
// Loop through all the required pixels.
|
|
for (uint32_t x = 0; x < XDim; ++x)
|
|
{
|
|
for (uint32_t y = 0; y < YDim; ++y) { ((Pixel32*)pResultImage)[x + y * XDim] = pDecompressedData[x + y * XTrueDim]; }
|
|
}
|
|
|
|
// Free the temporary buffer.
|
|
delete[] pDecompressedData;
|
|
}
|
|
return retval;
|
|
}
|
|
|
|
uint32_t PVRTDecompressPVRTC(const void* pCompressedData, uint32_t Do2bitMode, uint32_t XDim, uint32_t YDim, uint8_t* pResultImage)
|
|
{
|
|
return PVRTDecompressPVRTC_int<false>(pCompressedData, Do2bitMode, XDim, YDim, pResultImage);
|
|
}
|
|
|
|
uint32_t PVRTDecompressPVRTCII(const void* pCompressedData, uint32_t Do2bitMode, uint32_t XDim, uint32_t YDim, uint8_t* pResultImage)
|
|
{
|
|
return PVRTDecompressPVRTC_int<true>(pCompressedData, Do2bitMode, XDim, YDim, pResultImage);
|
|
}
|
|
} // namespace pvr
|
|
//!\endcond
|