[librptexture] Use the PowerVR Native SDK (well, a subset) to decode PVRTC.

It seems that the R and B channels are backwards, so we'll need to fix that next. Other than that, both 2bpp and 4bpp decoding seems to work. (A lot better than my terrible attempt, at least.) [cmake] options.cmake: Added an option for PVRTC. The code is licensed under the MIT license, but we might as well provide an option for it because it's third-party code instead of my own code. TODO: - Fix R/B channel ordering. - PVRTC-II decoding? - Add PVRTC decoding to KTX and DDS.
2025-06-18 11:35:38 -04:00 · 2019-12-10 21:35:45 -05:00 · 2019-12-10 21:35:45 -05:00 · e51803a4fe
commit e51803a4fe
parent 451a8440a6
14 changed files with 958 additions and 402 deletions
--- a/cmake/options.cmake
+++ b/cmake/options.cmake
@ -80,6 +80,9 @@ OPTION(ENABLE_UNICE68 "Enable UnICE68 for Atari ST SNDH files. (GPLv3)" ON)
 # Enable libmspack-xenia for Xbox 360 executables.
 OPTION(ENABLE_LIBMSPACK "Enable libmspack-xenia for Xbox 360 executables." ON)

+# Enable the PowerVR Native SDK subset for PVRTC decompression.
+OPTION(ENABLE_PVRTC "Enable the PowerVR Native SDK subset for PVRTC decompression." ON)
+
 # Link-time optimization.
 # FIXME: Not working in clang builds and Ubuntu's gcc...
 IF(MSVC)
--- a/debian/copyright
+++ b/debian/copyright
@ -120,6 +120,11 @@ Files:
 Copyright: 2013 Ben Vanik. All rights reserved.
 License: BSD-3-clause

+Files:
+ extlib/PowerVR/*
+Copyright: (c) Imagination Technologies Ltd.
+License: MIT
+
 License: BSD-3-clause
 Redistribution and use in source and binary forms, with or without
 modification, are permitted provided that the following conditions
@ -471,3 +476,25 @@ License: BSD-BY-LC-NE
 POSSIBILITY OF SUCH DAMAGE.
 .
 The complete text can be found in README-turbo.txt, supplied with the source.
+
+License: MIT
+ The MIT License (MIT)
+ Copyright (c) <YEAR> <COPYRIGHT HOLDER>
+ .
+ Permission is hereby granted, free of charge, to any person obtaining a copy
+ of this software and associated documentation files (the "Software"), to deal
+ in the Software without restriction, including without limitation the rights
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ copies of the Software, and to permit persons to whom the Software is
+ furnished to do so, subject to the following conditions:
+ .
+ The above copyright notice and this permission notice shall be included in
+ all copies or substantial portions of the Software.
+ .
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ THE SOFTWARE.
--- a/debian/rules
+++ b/debian/rules
@ -19,6 +19,7 @@ CMAKE_OPTIONS := \
 	-DBUILD_GNOME=ON \
 	-DBUILD_MATE=OFF \
 	-DBUILD_CLI=ON \
+	-DENABLE_PVRTC=ON \
 	-DENABLE_LTO=ON
 ifeq (,$(filter nocheck,$(DEB_BUILD_OPTIONS)))
 CMAKE_OPTIONS += -DBUILD_TESTING=ON
--- a/extlib/CMakeLists.txt
+++ b/extlib/CMakeLists.txt
@ -182,6 +182,14 @@ IF(ENABLE_LIBMSPACK)
 	SET_EXTLIB_PROPERTIES(libmspack)
 ENDIF(ENABLE_LIBMSPACK)

+# PowerVR texture decompression.
+IF(ENABLE_PVRTC)
+	SET(BUILD_STATIC_LIBS ON)
+	SET(BUILD_SHARED_LIBS OFF)
+	ADD_SUBDIRECTORY(PowerVR)
+	SET_EXTLIB_PROPERTIES(pvrtc)
+ENDIF(ENABLE_PVRTC)
+
 # Google Test
 IF(BUILD_TESTING)
 	# Reference: http://stackoverflow.com/questions/12540970/how-to-make-gtest-build-mdd-instead-of-mtd-by-default-using-cmake
--- a/extlib/PowerVR/CMakeLists.txt
+++ b/extlib/PowerVR/CMakeLists.txt
@ -0,0 +1,28 @@
+PROJECT(PowerVR CXX)
+# PowerVR Texture Compression decompressor from the PowerVR Native SDK.
+# Copyright (c) Imagination Technologies Ltd.
+# Licensed under the MIT License.
+# References:
+# - PowerVR commit: c1605c99281797e5cd4c8439e1bc679706bbb311
+# - https://github.com/powervr-graphics/Native_SDK
+
+# Sources.
+SET(libpvrtc_SRCS PVRTDecompress.cpp)
+# Headers.
+SET(libpvrtc_H PVRTDecompress.h)
+
+######################
+# Build the library. #
+######################
+
+ADD_LIBRARY(pvrtc STATIC ${libpvrtc_SRCS} ${libpvrtc_H})
+TARGET_INCLUDE_DIRECTORIES(pvrtc
+	INTERFACE ${CMAKE_CURRENT_SOURCE_DIR}
+	PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}
+	)
+
+# Unix: Add -fpic/-fPIC in order to use this static library in plugins.
+IF(UNIX AND NOT APPLE)
+	SET(CMAKE_C_FLAGS	"${CMAKE_C_FLAGS} -fpic -fPIC")
+	SET(CMAKE_CXX_FLAGS	"${CMAKE_CXX_FLAGS} -fpic -fPIC")
+ENDIF(UNIX AND NOT APPLE)
--- a/extlib/PowerVR/LICENSE.md
+++ b/extlib/PowerVR/LICENSE.md
@ -0,0 +1,22 @@
+The MIT License (MIT)
+Copyright (c) Imagination Technologies Ltd.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
+
--- a/extlib/PowerVR/PVRTDecompress.cpp
+++ b/extlib/PowerVR/PVRTDecompress.cpp
@ -0,0 +1,757 @@
+/*!
+\brief Implementation of the Texture Decompression functions.
+\file PVRCore/texture/PVRTDecompress.cpp
+\author PowerVR by Imagination, Developer Technology Team
+\copyright Copyright (c) Imagination Technologies Limited.
+*/
+//!\cond NO_DOXYGEN
+
+#include <cstdlib>
+#include <cstdio>
+#include <climits>
+#include <cmath>
+#include <algorithm>
+#include <cstring>
+#include "PVRTDecompress.h"
+#include <cassert>
+#include <vector>
+
+namespace pvr {
+enum
+{
+	ETC_MIN_TEXWIDTH = 4,
+	ETC_MIN_TEXHEIGHT = 4,
+	DXT_MIN_TEXWIDTH = 4,
+	DXT_MIN_TEXHEIGHT = 4,
+};
+
+struct Pixel32
+{
+	uint8_t red, green, blue, alpha;
+};
+
+struct Pixel128S
+{
+	int32_t red, green, blue, alpha;
+};
+
+struct PVRTCWord
+{
+	uint32_t modulationData;
+	uint32_t colorData;
+};
+
+struct PVRTCWordIndices
+{
+	int P[2], Q[2], R[2], S[2];
+};
+
+static Pixel32 getColorA(uint32_t colorData)
+{
+	Pixel32 color;
+
+	// Opaque Color Mode - RGB 554
+	if ((colorData & 0x8000) != 0)
+	{
+		color.red = static_cast<uint8_t>((colorData & 0x7c00) >> 10); // 5->5 bits
+		color.green = static_cast<uint8_t>((colorData & 0x3e0) >> 5); // 5->5 bits
+		color.blue = static_cast<uint8_t>(colorData & 0x1e) | ((colorData & 0x1e) >> 4); // 4->5 bits
+		color.alpha = static_cast<uint8_t>(0xf); // 0->4 bits
+	}
+	// Transparent Color Mode - ARGB 3443
+	else
+	{
+		color.red = static_cast<uint8_t>((colorData & 0xf00) >> 7) | ((colorData & 0xf00) >> 11); // 4->5 bits
+		color.green = static_cast<uint8_t>((colorData & 0xf0) >> 3) | ((colorData & 0xf0) >> 7); // 4->5 bits
+		color.blue = static_cast<uint8_t>((colorData & 0xe) << 1) | ((colorData & 0xe) >> 2); // 3->5 bits
+		color.alpha = static_cast<uint8_t>((colorData & 0x7000) >> 11); // 3->4 bits - note 0 at right
+	}
+
+	return color;
+}
+
+static Pixel32 getColorB(uint32_t colorData)
+{
+	Pixel32 color;
+
+	// Opaque Color Mode - RGB 555
+	if (colorData & 0x80000000)
+	{
+		color.red = static_cast<uint8_t>((colorData & 0x7c000000) >> 26); // 5->5 bits
+		color.green = static_cast<uint8_t>((colorData & 0x3e00000) >> 21); // 5->5 bits
+		color.blue = static_cast<uint8_t>((colorData & 0x1f0000) >> 16); // 5->5 bits
+		color.alpha = static_cast<uint8_t>(0xf); // 0 bits
+	}
+	// Transparent Color Mode - ARGB 3444
+	else
+	{
+		color.red = static_cast<uint8_t>(((colorData & 0xf000000) >> 23) | ((colorData & 0xf000000) >> 27)); // 4->5 bits
+		color.green = static_cast<uint8_t>(((colorData & 0xf00000) >> 19) | ((colorData & 0xf00000) >> 23)); // 4->5 bits
+		color.blue = static_cast<uint8_t>(((colorData & 0xf0000) >> 15) | ((colorData & 0xf0000) >> 19)); // 4->5 bits
+		color.alpha = static_cast<uint8_t>((colorData & 0x70000000) >> 27); // 3->4 bits - note 0 at right
+	}
+
+	return color;
+}
+
+static void interpolateColors(Pixel32 P, Pixel32 Q, Pixel32 R, Pixel32 S, Pixel128S* pPixel, uint8_t bpp)
+{
+	uint32_t wordWidth = 4;
+	uint32_t wordHeight = 4;
+	if (bpp == 2) { wordWidth = 8; }
+
+	// Convert to int 32.
+	Pixel128S hP = { static_cast<int32_t>(P.red), static_cast<int32_t>(P.green), static_cast<int32_t>(P.blue), static_cast<int32_t>(P.alpha) };
+	Pixel128S hQ = { static_cast<int32_t>(Q.red), static_cast<int32_t>(Q.green), static_cast<int32_t>(Q.blue), static_cast<int32_t>(Q.alpha) };
+	Pixel128S hR = { static_cast<int32_t>(R.red), static_cast<int32_t>(R.green), static_cast<int32_t>(R.blue), static_cast<int32_t>(R.alpha) };
+	Pixel128S hS = { static_cast<int32_t>(S.red), static_cast<int32_t>(S.green), static_cast<int32_t>(S.blue), static_cast<int32_t>(S.alpha) };
+
+	// Get vectors.
+	Pixel128S QminusP = { hQ.red - hP.red, hQ.green - hP.green, hQ.blue - hP.blue, hQ.alpha - hP.alpha };
+	Pixel128S SminusR = { hS.red - hR.red, hS.green - hR.green, hS.blue - hR.blue, hS.alpha - hR.alpha };
+
+	// Multiply colors.
+	hP.red *= wordWidth;
+	hP.green *= wordWidth;
+	hP.blue *= wordWidth;
+	hP.alpha *= wordWidth;
+	hR.red *= wordWidth;
+	hR.green *= wordWidth;
+	hR.blue *= wordWidth;
+	hR.alpha *= wordWidth;
+
+	if (bpp == 2)
+	{
+		// Loop through pixels to achieve results.
+		for (uint32_t x = 0; x < wordWidth; x++)
+		{
+			Pixel128S result = { 4 * hP.red, 4 * hP.green, 4 * hP.blue, 4 * hP.alpha };
+			Pixel128S dY = { hR.red - hP.red, hR.green - hP.green, hR.blue - hP.blue, hR.alpha - hP.alpha };
+
+			for (uint32_t y = 0; y < wordHeight; y++)
+			{
+				pPixel[y * wordWidth + x].red = static_cast<int32_t>((result.red >> 7) + (result.red >> 2));
+				pPixel[y * wordWidth + x].green = static_cast<int32_t>((result.green >> 7) + (result.green >> 2));
+				pPixel[y * wordWidth + x].blue = static_cast<int32_t>((result.blue >> 7) + (result.blue >> 2));
+				pPixel[y * wordWidth + x].alpha = static_cast<int32_t>((result.alpha >> 5) + (result.alpha >> 1));
+
+				result.red += dY.red;
+				result.green += dY.green;
+				result.blue += dY.blue;
+				result.alpha += dY.alpha;
+			}
+
+			hP.red += QminusP.red;
+			hP.green += QminusP.green;
+			hP.blue += QminusP.blue;
+			hP.alpha += QminusP.alpha;
+
+			hR.red += SminusR.red;
+			hR.green += SminusR.green;
+			hR.blue += SminusR.blue;
+			hR.alpha += SminusR.alpha;
+		}
+	}
+	else
+	{
+		// Loop through pixels to achieve results.
+		for (uint32_t y = 0; y < wordHeight; y++)
+		{
+			Pixel128S result = { 4 * hP.red, 4 * hP.green, 4 * hP.blue, 4 * hP.alpha };
+			Pixel128S dY = { hR.red - hP.red, hR.green - hP.green, hR.blue - hP.blue, hR.alpha - hP.alpha };
+
+			for (uint32_t x = 0; x < wordWidth; x++)
+			{
+				pPixel[y * wordWidth + x].red = static_cast<int32_t>((result.red >> 6) + (result.red >> 1));
+				pPixel[y * wordWidth + x].green = static_cast<int32_t>((result.green >> 6) + (result.green >> 1));
+				pPixel[y * wordWidth + x].blue = static_cast<int32_t>((result.blue >> 6) + (result.blue >> 1));
+				pPixel[y * wordWidth + x].alpha = static_cast<int32_t>((result.alpha >> 4) + (result.alpha));
+
+				result.red += dY.red;
+				result.green += dY.green;
+				result.blue += dY.blue;
+				result.alpha += dY.alpha;
+			}
+
+			hP.red += QminusP.red;
+			hP.green += QminusP.green;
+			hP.blue += QminusP.blue;
+			hP.alpha += QminusP.alpha;
+
+			hR.red += SminusR.red;
+			hR.green += SminusR.green;
+			hR.blue += SminusR.blue;
+			hR.alpha += SminusR.alpha;
+		}
+	}
+}
+
+static void unpackModulations(const PVRTCWord& word, int32_t offsetX, int32_t offsetY, int32_t modulationValues[16][8], int32_t modulationModes[16][8], uint8_t bpp)
+{
+	uint32_t WordModMode = word.colorData & 0x1;
+	uint32_t ModulationBits = word.modulationData;
+
+	// Unpack differently depending on 2bpp or 4bpp modes.
+	if (bpp == 2)
+	{
+		if (WordModMode)
+		{
+			// determine which of the three modes are in use:
+
+			// If this is the either the H-only or V-only interpolation mode...
+			if (ModulationBits & 0x1)
+			{
+				// look at the "LSB" for the "centre" (V=2,H=4) texel. Its LSB is now
+				// actually used to indicate whether it's the H-only mode or the V-only...
+
+				// The centre texel data is the at (y==2, x==4) and so its LSB is at bit 20.
+				if (ModulationBits & (0x1 << 20))
+				{
+					// This is the V-only mode
+					WordModMode = 3;
+				}
+				else
+				{
+					// This is the H-only mode
+					WordModMode = 2;
+				}
+
+				// Create an extra bit for the centre pixel so that it looks like
+				// we have 2 actual bits for this texel. It makes later coding much easier.
+				if (ModulationBits & (0x1 << 21))
+				{
+					// set it to produce code for 1.0
+					ModulationBits |= (0x1 << 20);
+				}
+				else
+				{
+					// clear it to produce 0.0 code
+					ModulationBits &= ~(0x1 << 20);
+				}
+			} // end if H-Only or V-Only interpolation mode was chosen
+
+			if (ModulationBits & 0x2) { ModulationBits |= 0x1; /*set it*/ }
+			else
+			{
+				ModulationBits &= ~0x1; /*clear it*/
+			}
+
+			// run through all the pixels in the block. Note we can now treat all the
+			// "stored" values as if they have 2bits (even when they didn't!)
+			for (uint8_t y = 0; y < 4; y++)
+			{
+				for (uint8_t x = 0; x < 8; x++)
+				{
+					modulationModes[static_cast<uint32_t>(x + offsetX)][static_cast<uint32_t>(y + offsetY)] = WordModMode;
+
+					// if this is a stored value...
+					if (((x ^ y) & 1) == 0) {modulationValues[static_cast<uint32_t>(x + offsetX)][static_cast<uint32_t>(y + offsetY)] = ModulationBits & 3;
+						ModulationBits >>= 2;
+					}
+				}
+			} // end for y
+		}
+		// else if direct encoded 2bit mode - i.e. 1 mode bit per pixel
+		else
+		{
+			for (uint8_t y = 0; y < 4; y++)
+			{
+				for (uint8_t x = 0; x < 8; x++)
+				{
+					modulationModes[static_cast<uint32_t>(x + offsetX)][static_cast<uint32_t>(y + offsetY)] = WordModMode;
+
+					/*
+					// double the bits so 0=> 00, and 1=>11
+					*/
+					if (ModulationBits & 1) { modulationValues[static_cast<uint32_t>(x + offsetX)][static_cast<uint32_t>(y + offsetY)] = 0x3; }
+					else
+					{
+						modulationValues[static_cast<uint32_t>(x + offsetX)][static_cast<uint32_t>(y + offsetY)] = 0x0;
+					}
+					ModulationBits >>= 1;
+				}
+			} // end for y
+		}
+	}
+	else
+	{
+		// Much simpler than the 2bpp decompression, only two modes, so the n/8 values are set directly.
+		// run through all the pixels in the word.
+		if (WordModMode)
+		{
+			for (uint8_t y = 0; y < 4; y++)
+			{
+				for (uint8_t x = 0; x < 4; x++)
+				{
+					modulationValues[static_cast<uint32_t>(y + offsetY)][static_cast<uint32_t>(x + offsetX)] = ModulationBits & 3;
+					// if (modulationValues==0) {}. We don't need to check 0, 0 = 0/8.
+					if (modulationValues[static_cast<uint32_t>(y + offsetY)][static_cast<uint32_t>(x + offsetX)] == 1)
+					{ modulationValues[static_cast<uint32_t>(y + offsetY)][static_cast<uint32_t>(x + offsetX)] = 4; }
+					else if (modulationValues[static_cast<uint32_t>(y + offsetY)][static_cast<uint32_t>(x + offsetX)] == 2)
+					{
+						modulationValues[static_cast<uint32_t>(y + offsetY)][static_cast<uint32_t>(x + offsetX)] = 14; //+10 tells the decompressor to punch through alpha.
+					}
+					else if (modulationValues[static_cast<uint32_t>(y + offsetY)][static_cast<uint32_t>(x + offsetX)] == 3)
+					{
+						modulationValues[static_cast<uint32_t>(y + offsetY)][static_cast<uint32_t>(x + offsetX)] = 8;
+					}
+					ModulationBits >>= 2;
+				} // end for x
+			} // end for y
+		}
+		else
+		{
+			for (uint8_t y = 0; y < 4; y++)
+			{
+				for (uint8_t x = 0; x < 4; x++)
+				{
+					modulationValues[static_cast<uint32_t>(y + offsetY)][static_cast<uint32_t>(x + offsetX)] = ModulationBits & 3;
+					modulationValues[static_cast<uint32_t>(y + offsetY)][static_cast<uint32_t>(x + offsetX)] *= 3;
+					if (modulationValues[static_cast<uint32_t>(y + offsetY)][static_cast<uint32_t>(x + offsetX)] > 3)
+					{ modulationValues[static_cast<uint32_t>(y + offsetY)][static_cast<uint32_t>(x + offsetX)] -= 1; }
+					ModulationBits >>= 2;
+				} // end for x
+			} // end for y
+		}
+	}
+}
+
+static int32_t getModulationValues(int32_t modulationValues[16][8], int32_t modulationModes[16][8], uint32_t xPos, uint32_t yPos, uint8_t bpp)
+{
+	if (bpp == 2)
+	{
+		const int32_t RepVals0[4] = { 0, 3, 5, 8 };
+
+		// extract the modulation value. If a simple encoding
+		if (modulationModes[xPos][yPos] == 0) { return RepVals0[modulationValues[xPos][yPos]]; }
+		else
+		{
+			// if this is a stored value
+			if (((xPos ^ yPos) & 1) == 0) { return RepVals0[modulationValues[xPos][yPos]]; }
+
+			// else average from the neighbours
+			// if H&V interpolation...
+			else if (modulationModes[xPos][yPos] == 1)
+			{
+				return (RepVals0[modulationValues[xPos][yPos - 1]] + RepVals0[modulationValues[xPos][yPos + 1]] + RepVals0[modulationValues[xPos - 1][yPos]] +
+						   RepVals0[modulationValues[xPos + 1][yPos]] + 2) /
+					4;
+			}
+			// else if H-Only
+			else if (modulationModes[xPos][yPos] == 2)
+			{
+				return (RepVals0[modulationValues[xPos - 1][yPos]] + RepVals0[modulationValues[xPos + 1][yPos]] + 1) / 2;
+			}
+			// else it's V-Only
+			else
+			{
+				return (RepVals0[modulationValues[xPos][yPos - 1]] + RepVals0[modulationValues[xPos][yPos + 1]] + 1) / 2;
+			}
+		}
+	}
+	else if (bpp == 4)
+	{
+		return modulationValues[xPos][yPos];
+	}
+
+	return 0;
+}
+
+static void pvrtcGetDecompressedPixels(const PVRTCWord& P, const PVRTCWord& Q, const PVRTCWord& R, const PVRTCWord& S, Pixel32* pColorData, uint8_t bpp)
+{
+	// 4bpp only needs 8*8 values, but 2bpp needs 16*8, so rather than wasting processor time we just statically allocate 16*8.
+	int32_t modulationValues[16][8];
+	// Only 2bpp needs this.
+	int32_t modulationModes[16][8];
+	// 4bpp only needs 16 values, but 2bpp needs 32, so rather than wasting processor time we just statically allocate 32.
+	Pixel128S upscaledColorA[32];
+	Pixel128S upscaledColorB[32];
+
+	uint32_t wordWidth = 4;
+	uint32_t wordHeight = 4;
+	if (bpp == 2) { wordWidth = 8; }
+
+	// Get the modulations from each word.
+	unpackModulations(P, 0, 0, modulationValues, modulationModes, bpp);
+	unpackModulations(Q, wordWidth, 0, modulationValues, modulationModes, bpp);
+	unpackModulations(R, 0, wordHeight, modulationValues, modulationModes, bpp);
+	unpackModulations(S, wordWidth, wordHeight, modulationValues, modulationModes, bpp);
+
+	// Bilinear upscale image data from 2x2 -> 4x4
+	interpolateColors(getColorA(P.colorData), getColorA(Q.colorData), getColorA(R.colorData), getColorA(S.colorData), upscaledColorA, bpp);
+	interpolateColors(getColorB(P.colorData), getColorB(Q.colorData), getColorB(R.colorData), getColorB(S.colorData), upscaledColorB, bpp);
+
+	for (uint32_t y = 0; y < wordHeight; y++)
+	{
+		for (uint32_t x = 0; x < wordWidth; x++)
+		{
+			int32_t mod = getModulationValues(modulationValues, modulationModes, x + wordWidth / 2, y + wordHeight / 2, bpp);
+			bool punchthroughAlpha = false;
+			if (mod > 10)
+			{
+				punchthroughAlpha = true;
+				mod -= 10;
+			}
+
+			Pixel128S result;
+			result.red = (upscaledColorA[y * wordWidth + x].red * (8 - mod) + upscaledColorB[y * wordWidth + x].red * mod) / 8;
+			result.green = (upscaledColorA[y * wordWidth + x].green * (8 - mod) + upscaledColorB[y * wordWidth + x].green * mod) / 8;
+			result.blue = (upscaledColorA[y * wordWidth + x].blue * (8 - mod) + upscaledColorB[y * wordWidth + x].blue * mod) / 8;
+			if (punchthroughAlpha) { result.alpha = 0; }
+			else
+			{
+				result.alpha = (upscaledColorA[y * wordWidth + x].alpha * (8 - mod) + upscaledColorB[y * wordWidth + x].alpha * mod) / 8;
+			}
+
+			// Convert the 32bit precision Result to 8 bit per channel color.
+			if (bpp == 2)
+			{
+				pColorData[y * wordWidth + x].red = static_cast<uint8_t>(result.red);
+				pColorData[y * wordWidth + x].green = static_cast<uint8_t>(result.green);
+				pColorData[y * wordWidth + x].blue = static_cast<uint8_t>(result.blue);
+				pColorData[y * wordWidth + x].alpha = static_cast<uint8_t>(result.alpha);
+			}
+			else if (bpp == 4)
+			{
+				pColorData[y + x * wordHeight].red = static_cast<uint8_t>(result.red);
+				pColorData[y + x * wordHeight].green = static_cast<uint8_t>(result.green);
+				pColorData[y + x * wordHeight].blue = static_cast<uint8_t>(result.blue);
+				pColorData[y + x * wordHeight].alpha = static_cast<uint8_t>(result.alpha);
+			}
+		}
+	}
+}
+
+static uint32_t wrapWordIndex(uint32_t numWords, int word) { return ((word + numWords) % numWords); }
+
+static bool isPowerOf2(uint32_t input)
+{
+	uint32_t minus1;
+
+	if (!input) { return 0; }
+
+	minus1 = input - 1;
+	return ((input | minus1) == (input ^ minus1));
+}
+
+static uint32_t TwiddleUV(uint32_t XSize, uint32_t YSize, uint32_t XPos, uint32_t YPos)
+{
+	// Initially assume X is the larger size.
+	uint32_t MinDimension = XSize;
+	uint32_t MaxValue = YPos;
+	uint32_t Twiddled = 0;
+	uint32_t SrcBitPos = 1;
+	uint32_t DstBitPos = 1;
+	int ShiftCount = 0;
+
+	// Check the sizes are valid.
+	assert(YPos < YSize);
+	assert(XPos < XSize);
+	assert(isPowerOf2(YSize));
+	assert(isPowerOf2(XSize));
+
+	// If Y is the larger dimension - switch the min/max values.
+	if (YSize < XSize)
+	{
+		MinDimension = YSize;
+		MaxValue = XPos;
+	}
+
+	// Step through all the bits in the "minimum" dimension
+	while (SrcBitPos < MinDimension)
+	{
+		if (YPos & SrcBitPos) { Twiddled |= DstBitPos; }
+
+		if (XPos & SrcBitPos) { Twiddled |= (DstBitPos << 1); }
+
+		SrcBitPos <<= 1;
+		DstBitPos <<= 2;
+		ShiftCount += 1;
+	}
+
+	// Prepend any unused bits
+	MaxValue >>= ShiftCount;
+	Twiddled |= (MaxValue << (2 * ShiftCount));
+
+	return Twiddled;
+}
+
+static void mapDecompressedData(Pixel32* pOutput, uint32_t width, const Pixel32* pWord, const PVRTCWordIndices& words, uint8_t bpp)
+{
+	uint32_t wordWidth = 4;
+	uint32_t wordHeight = 4;
+	if (bpp == 2) { wordWidth = 8; }
+
+	for (uint32_t y = 0; y < wordHeight / 2; y++)
+	{
+		for (uint32_t x = 0; x < wordWidth / 2; x++)
+		{
+			pOutput[(((words.P[1] * wordHeight) + y + wordHeight / 2) * width + words.P[0] * wordWidth + x + wordWidth / 2)] = pWord[y * wordWidth + x]; // map P
+
+			pOutput[(((words.Q[1] * wordHeight) + y + wordHeight / 2) * width + words.Q[0] * wordWidth + x)] = pWord[y * wordWidth + x + wordWidth / 2]; // map Q
+
+			pOutput[(((words.R[1] * wordHeight) + y) * width + words.R[0] * wordWidth + x + wordWidth / 2)] = pWord[(y + wordHeight / 2) * wordWidth + x]; // map R
+
+			pOutput[(((words.S[1] * wordHeight) + y) * width + words.S[0] * wordWidth + x)] = pWord[(y + wordHeight / 2) * wordWidth + x + wordWidth / 2]; // map S
+		}
+	}
+}
+static uint32_t pvrtcDecompress(uint8_t* pCompressedData, Pixel32* pDecompressedData, uint32_t width, uint32_t height, uint8_t bpp)
+{
+	uint32_t wordWidth = 4;
+	uint32_t wordHeight = 4;
+	if (bpp == 2) { wordWidth = 8; }
+
+	uint32_t* pWordMembers = (uint32_t*)pCompressedData;
+	Pixel32* pOutData = pDecompressedData;
+
+	// Calculate number of words
+	int i32NumXWords = static_cast<int>(width / wordWidth);
+	int i32NumYWords = static_cast<int>(height / wordHeight);
+
+	// Structs used for decompression
+	PVRTCWordIndices indices;
+	std::vector<Pixel32> pPixels(wordWidth * wordHeight * sizeof(Pixel32));
+
+	// For each row of words
+	for (int32_t wordY = -1; wordY < i32NumYWords - 1; wordY++)
+	{
+		// for each column of words
+		for (int32_t wordX = -1; wordX < i32NumXWords - 1; wordX++)
+		{
+			indices.P[0] = static_cast<int>(wrapWordIndex(i32NumXWords, wordX));
+			indices.P[1] = static_cast<int>(wrapWordIndex(i32NumYWords, wordY));
+			indices.Q[0] = static_cast<int>(wrapWordIndex(i32NumXWords, wordX + 1));
+			indices.Q[1] = static_cast<int>(wrapWordIndex(i32NumYWords, wordY));
+			indices.R[0] = static_cast<int>(wrapWordIndex(i32NumXWords, wordX));
+			indices.R[1] = static_cast<int>(wrapWordIndex(i32NumYWords, wordY + 1));
+			indices.S[0] = static_cast<int>(wrapWordIndex(i32NumXWords, wordX + 1));
+			indices.S[1] = static_cast<int>(wrapWordIndex(i32NumYWords, wordY + 1));
+
+			// Work out the offsets into the twiddle structs, multiply by two as there are two members per word.
+			uint32_t WordOffsets[4] = {
+				TwiddleUV(i32NumXWords, i32NumYWords, indices.P[0], indices.P[1]) * 2,
+				TwiddleUV(i32NumXWords, i32NumYWords, indices.Q[0], indices.Q[1]) * 2,
+				TwiddleUV(i32NumXWords, i32NumYWords, indices.R[0], indices.R[1]) * 2,
+				TwiddleUV(i32NumXWords, i32NumYWords, indices.S[0], indices.S[1]) * 2,
+			};
+
+			// Access individual elements to fill out PVRTCWord
+			PVRTCWord P, Q, R, S;
+			P.colorData = static_cast<uint32_t>(pWordMembers[WordOffsets[0] + 1]);
+			P.modulationData = static_cast<uint32_t>(pWordMembers[WordOffsets[0]]);
+			Q.colorData = static_cast<uint32_t>(pWordMembers[WordOffsets[1] + 1]);
+			Q.modulationData = static_cast<uint32_t>(pWordMembers[WordOffsets[1]]);
+			R.colorData = static_cast<uint32_t>(pWordMembers[WordOffsets[2] + 1]);
+			R.modulationData = static_cast<uint32_t>(pWordMembers[WordOffsets[2]]);
+			S.colorData = static_cast<uint32_t>(pWordMembers[WordOffsets[3] + 1]);
+			S.modulationData = static_cast<uint32_t>(pWordMembers[WordOffsets[3]]);
+
+			// assemble 4 words into struct to get decompressed pixels from
+			pvrtcGetDecompressedPixels(P, Q, R, S, pPixels.data(), bpp);
+			mapDecompressedData(pOutData, width, pPixels.data(), indices, bpp);
+
+		} // for each word
+	} // for each row of words
+
+	// Return the data size
+	return width * height / static_cast<uint32_t>((wordWidth / 2));
+}
+
+uint32_t PVRTDecompressPVRTC(const void* pCompressedData, uint32_t Do2bitMode, uint32_t XDim, uint32_t YDim, uint8_t* pResultImage)
+{
+	// Cast the output buffer to a Pixel32 pointer.
+	Pixel32* pDecompressedData = (Pixel32*)pResultImage;
+
+	// Check the X and Y values are at least the minimum size.
+	uint32_t XTrueDim = std::max(XDim, ((Do2bitMode == 1u) ? 16u : 8u));
+	uint32_t YTrueDim = std::max(YDim, 8u);
+
+	// If the dimensions aren't correct, we need to create a new buffer instead of just using the provided one, as the buffer will overrun otherwise.
+	if (XTrueDim != XDim || YTrueDim != YDim) { pDecompressedData = new Pixel32[XTrueDim * YTrueDim]; }
+
+	// Decompress the surface.
+	uint32_t retval = pvrtcDecompress((uint8_t*)pCompressedData, pDecompressedData, XTrueDim, YTrueDim, uint8_t(Do2bitMode == 1 ? 2 : 4));
+
+	// If the dimensions were too small, then copy the new buffer back into the output buffer.
+	if (XTrueDim != XDim || YTrueDim != YDim)
+	{
+		// Loop through all the required pixels.
+		for (uint32_t x = 0; x < XDim; ++x)
+		{
+			for (uint32_t y = 0; y < YDim; ++y) { ((Pixel32*)pResultImage)[x + y * XDim] = pDecompressedData[x + y * XTrueDim]; }
+		}
+
+		// Free the temporary buffer.
+		delete[] pDecompressedData;
+	}
+	return retval;
+}
+
+////////////////////////////////////// ETC Compression //////////////////////////////////////
+
+#define _CLAMP_(X, Xmin, Xmax) ((X) < (Xmax) ? ((X) < (Xmin) ? (Xmin) : (X)) : (Xmax))
+
+uint32_t ETC_FLIP = 0x01000000;
+uint32_t ETC_DIFF = 0x02000000;
+const int mod[8][4] = { { 2, 8, -2, -8 }, { 5, 17, -5, -17 }, { 9, 29, -9, -29 }, { 13, 42, -13, -42 }, { 18, 60, -18, -60 }, { 24, 80, -24, -80 }, { 33, 106, -33, -106 },
+	{ 47, 183, -47, -183 } };
+
+static uint32_t modifyPixel(int red, int green, int blue, int x, int y, uint32_t modBlock, int modTable)
+{
+	int index = x * 4 + y, pixelMod;
+	uint32_t mostSig = modBlock << 1;
+
+	if (index < 8) { pixelMod = mod[modTable][((modBlock >> (index + 24)) & 0x1) + ((mostSig >> (index + 8)) & 0x2)]; }
+	else
+	{
+		pixelMod = mod[modTable][((modBlock >> (index + 8)) & 0x1) + ((mostSig >> (index - 8)) & 0x2)];
+	}
+
+	red = _CLAMP_(red + pixelMod, 0, 255);
+	green = _CLAMP_(green + pixelMod, 0, 255);
+	blue = _CLAMP_(blue + pixelMod, 0, 255);
+
+	return ((red << 16) + (green << 8) + blue) | 0xff000000;
+}
+
+static uint32_t ETCTextureDecompress(const void* pSrcData, uint32_t x, uint32_t y, void* pDestData, uint32_t /*nMode*/)
+{
+	uint32_t* output;
+	uint32_t blockTop, blockBot;
+	const uint32_t* input = static_cast<const uint32_t*>(pSrcData);
+	unsigned char red1, green1, blue1, red2, green2, blue2;
+	bool bFlip, bDiff;
+	int modtable1, modtable2;
+
+	for (uint32_t i = 0; i < y; i += 4)
+	{
+		for (uint32_t m = 0; m < x; m += 4)
+		{
+			blockTop = *(input++);
+			blockBot = *(input++);
+
+			output = (uint32_t*)pDestData + i * x + m;
+
+			// check flipbit
+			bFlip = (blockTop & ETC_FLIP) != 0;
+			bDiff = (blockTop & ETC_DIFF) != 0;
+
+			if (bDiff)
+			{
+				// differential mode 5 color bits + 3 difference bits
+				// get base color for subblock 1
+				blue1 = static_cast<unsigned char>((blockTop & 0xf80000) >> 16u);
+				green1 = static_cast<unsigned char>((blockTop & 0xf800) >> 8u);
+				red1 = static_cast<unsigned char>(blockTop & 0xf8);
+
+				// get differential color for subblock 2
+				signed char blues = static_cast<signed char>(blue1 >> 3) + (static_cast<signed char>((blockTop & 0x70000) >> 11) >> 5);
+				signed char greens = static_cast<signed char>(green1 >> 3) + (static_cast<signed char>((blockTop & 0x700) >> 3) >> 5);
+				signed char reds = static_cast<signed char>(red1 >> 3) + (static_cast<signed char>((blockTop & 0x7) << 5) >> 5);
+
+				blue2 = static_cast<unsigned char>(blues);
+				green2 = static_cast<unsigned char>(greens);
+				red2 = static_cast<unsigned char>(reds);
+
+				red1 = static_cast<unsigned char>(red1 + (red1 >> 5u)); // copy bits to lower sig
+				green1 = static_cast<unsigned char>(green1 + (green1 >> 5u)); // copy bits to lower sig
+				blue1 = static_cast<unsigned char>(blue1 + (blue1 >> 5u)); // copy bits to lower sig
+
+				red2 = static_cast<unsigned char>((red2 << 3u) + (red2 >> 2u)); // copy bits to lower sig
+				green2 = static_cast<unsigned char>((green2 << 3u) + (green2 >> 2u)); // copy bits to lower sig
+				blue2 = static_cast<unsigned char>((blue2 << 3u) + (blue2 >> 2u)); // copy bits to lower sig
+			}
+			else
+			{
+				// individual mode 4 + 4 color bits
+				// get base color for subblock 1
+				blue1 = static_cast<unsigned char>((blockTop & 0xf00000) >> 16);
+				blue1 = static_cast<unsigned char>(blue1 + (blue1 >> 4)); // copy bits to lower sig
+				green1 = static_cast<unsigned char>((blockTop & 0xf000) >> 8);
+				green1 = static_cast<unsigned char>(green1 + (green1 >> 4)); // copy bits to lower sig
+				red1 = static_cast<unsigned char>(blockTop & 0xf0);
+				red1 = static_cast<unsigned char>(red1 + (red1 >> 4)); // copy bits to lower sig
+
+				// get base color for subblock 2
+				blue2 = static_cast<unsigned char>((blockTop & 0xf0000) >> 12);
+				blue2 = static_cast<unsigned char>(blue2 + (blue2 >> 4)); // copy bits to lower sig
+				green2 = static_cast<unsigned char>((blockTop & 0xf00) >> 4);
+				green2 = static_cast<unsigned char>(green2 + (green2 >> 4)); // copy bits to lower sig
+				red2 = static_cast<unsigned char>((blockTop & 0xf) << 4);
+				red2 = static_cast<unsigned char>(red2 + (red2 >> 4)); // copy bits to lower sig
+			}
+			// get the modtables for each subblock
+			modtable1 = static_cast<int>((blockTop >> 29) & 0x7);
+			modtable2 = static_cast<int>((blockTop >> 26) & 0x7);
+
+			if (!bFlip)
+			{
+				// 2 2x4 blocks side by side
+
+				for (uint8_t j = 0; j < 4; j++) // vertical
+				{
+					for (uint8_t k = 0; k < 2; k++) // horizontal
+					{
+						*(output + j * x + k) = modifyPixel(red1, green1, blue1, k, j, blockBot, modtable1);
+						*(output + j * x + k + 2) = modifyPixel(red2, green2, blue2, k + 2, j, blockBot, modtable2);
+					}
+				}
+			}
+			else
+			{
+				// 2 4x2 blocks on top of each other
+				for (uint8_t j = 0; j < 2; j++)
+				{
+					for (uint8_t k = 0; k < 4; k++)
+					{
+						*(output + j * x + k) = modifyPixel(red1, green1, blue1, k, j, blockBot, modtable1);
+						*(output + (j + 2) * x + k) = modifyPixel(red2, green2, blue2, k, j + 2, blockBot, modtable2);
+					}
+				}
+			}
+		}
+	}
+
+	return x * y / 2;
+}
+
+uint32_t PVRTDecompressETC(const void* pSrcData, uint32_t x, uint32_t y, void* pDestData, uint32_t nMode)
+{
+	uint32_t i32read;
+
+	if (x < ETC_MIN_TEXWIDTH || y < ETC_MIN_TEXHEIGHT)
+	{
+		// decompress into a buffer big enough to take the minimum size
+		char* pTempBuffer = new char[std::max<uint32_t>(x, ETC_MIN_TEXWIDTH) * std::max<uint32_t>(y, ETC_MIN_TEXHEIGHT) * 4];
+		i32read = ETCTextureDecompress(pSrcData, std::max<uint32_t>(x, ETC_MIN_TEXWIDTH), std::max<uint32_t>(y, ETC_MIN_TEXHEIGHT), pTempBuffer, nMode);
+
+		for (uint32_t i = 0; i < y; i++)
+		{
+			// copy from larger temp buffer to output data
+			memcpy(static_cast<char*>(pDestData) + i * x * 4, pTempBuffer + std::max<uint32_t>(x, ETC_MIN_TEXWIDTH) * 4 * i, x * 4);
+		}
+
+		delete[] pTempBuffer;
+	}
+	else // decompress larger MIP levels straight into the output data
+	{
+		i32read = ETCTextureDecompress(pSrcData, x, y, pDestData, nMode);
+	}
+
+	// swap r and b channels
+	unsigned char *pSwap = static_cast<unsigned char*>(pDestData), swap;
+
+	for (uint32_t i = 0; i < y; i++)
+		for (uint32_t j = 0; j < x; j++)
+		{
+			swap = pSwap[0];
+			pSwap[0] = pSwap[2];
+			pSwap[2] = swap;
+			pSwap += 4;
+		}
+
+	return i32read;
+}
+} // namespace pvr
+//!\endcond
--- a/extlib/PowerVR/PVRTDecompress.h
+++ b/extlib/PowerVR/PVRTDecompress.h
@ -0,0 +1,28 @@
+/*!
+\brief Contains functions to decompress PVRTC or ETC formats into RGBA8888.
+\file PVRCore/texture/PVRTDecompress.h
+\author PowerVR by Imagination, Developer Technology Team
+\copyright Copyright (c) Imagination Technologies Limited.
+*/
+#pragma once
+#include <stdint.h>
+namespace pvr {
+
+/// <summary>Decompresses PVRTC to RGBA 8888.</summary>
+/// <param name="compressedData">The PVRTC texture data to decompress</param>
+/// <param name="do2bitMode">Signifies whether the data is PVRTC2 or PVRTC4</param>
+/// <param name="xDim">X dimension of the texture</param>
+/// <param name="yDim">Y dimension of the texture</param>
+/// <param name="outResultImage">The decompressed texture data</param>
+/// <returns>Return the amount of data that was decompressed.</returns>
+uint32_t PVRTDecompressPVRTC(const void* compressedData, uint32_t do2bitMode, uint32_t xDim, uint32_t yDim, uint8_t* outResultImage);
+
+/// <summary>Decompresses ETC to RGBA 8888.</summary>
+/// <param name="srcData">The ETC texture data to decompress</param>
+/// <param name="xDim">X dimension of the texture</param>
+/// <param name="yDim">Y dimension of the texture</param>
+/// <param name="dstData">The decompressed texture data</param>
+/// <param name="mode">The format of the data</param>
+/// <returns>Return The number of bytes of ETC data decompressed</returns>
+uint32_t PVRTDecompressETC(const void* srcData, uint32_t xDim, uint32_t yDim, void* dstData, uint32_t mode);
+} // namespace pvr
--- a/extlib/PowerVR/_MODIFIED_POWERVR_NATIVE_SDK.txt
+++ b/extlib/PowerVR/_MODIFIED_POWERVR_NATIVE_SDK.txt
@ -0,0 +1,13 @@
+This is a modified subset of the original PowerVR Native SDK.
+
+Commit c1605c99281797e5cd4c8439e1bc679706bbb311
+Updated gradle API usage.
+
+The following changes have been made to the original:
+
+- Added CMakeLists.txt.
+
+- Only the PVRTC decompressor is included.
+
+To obtain the original PowerVR Native SDK, see the GitHub repository:
+- https://github.com/powervr-graphics/Native_SDK
--- a/src/librptexture/CMakeLists.txt
+++ b/src/librptexture/CMakeLists.txt
@ -197,6 +197,11 @@ IF(NOT ZLIB_LIBRARY)
 ENDIF(NOT ZLIB_LIBRARY)
 TARGET_LINK_LIBRARIES(rptexture PRIVATE ${ZLIB_LIBRARY})

+# PowerVR Native SDK
+IF(ENABLE_PVRTC)
+	TARGET_LINK_LIBRARIES(rptexture PRIVATE pvrtc)
+ENDIF(ENABLE_PVRTC)
+
 # Other libraries.
 IF(WIN32)
 	# libwin32common
--- a/src/librptexture/config.librptexture.h.in
+++ b/src/librptexture/config.librptexture.h.in
@ -12,4 +12,7 @@
 /* Define to 1 if librpbase RomFields support should be enabled. */
 #define ENABLE_LIBRPBASE_ROMFIELDS 1

+/* Define to 1 if PVRTC decompression should be enabled. */
+#define ENABLE_PVRTC 1
+
 #endif /* __ROMPROPERTIES_LIBRPTEXTURE_CONFIG_H__ */
--- a/src/librptexture/decoder/ImageDecoder.hpp
+++ b/src/librptexture/decoder/ImageDecoder.hpp
@ -10,6 +10,8 @@
 #define __ROMPROPERTIES_LIBRPTEXTURE_DECODER_IMAGEDECODER_HPP__

 #include "config.librpbase.h"
+#include "config.librptexture.h"
+
 #include "common.h"
 #include "cpu_dispatch.h"

@ -667,29 +669,22 @@ rp_image *fromETC2_RGBA(int width, int height,
 rp_image *fromETC2_RGB_A1(int width, int height,
 	const uint8_t *RESTRICT img_buf, int img_siz);

+#ifdef ENABLE_PVRTC
 /* PVRTC */

 /**
- * Convert a PVRTC 2bpp image to rp_image.
+ * Convert a PVRTC 2bpp or 4bpp image to rp_image.
 * @param width Image width.
 * @param height Image height.
 * @param img_buf PVRTC image buffer.
 * @param img_siz Size of image data. [must be >= (w*h)/4]
+ * @param do2bitMode True for 2bpp; false for 4bpp.
 * @return rp_image, or nullptr on error.
 */
-rp_image *fromPVRTC_2bpp(int width, int height,
-	const uint8_t *RESTRICT img_buf, int img_siz);
-
-/**
- * Convert a PVRTC 2bpp image to rp_image.
- * @param width Image width.
- * @param height Image height.
- * @param img_buf PVRTC image buffer.
- * @param img_siz Size of image data. [must be >= (w*h)/2]
- * @return rp_image, or nullptr on error.
- */
-rp_image *fromPVRTC_4bpp(int width, int height,
-	const uint8_t *RESTRICT img_buf, int img_siz);
+rp_image *fromPVRTC(int width, int height,
+	const uint8_t *RESTRICT img_buf, int img_siz,
+	bool do2bitMode);
+#endif /* ENABLE_PVRTC */

 /* BC7 */

--- a/src/librptexture/decoder/ImageDecoder_PVRTC.cpp
+++ b/src/librptexture/decoder/ImageDecoder_PVRTC.cpp
@ -6,11 +6,14 @@
 * SPDX-License-Identifier: GPL-2.0-or-later                               *
 ***************************************************************************/

+#include "config.librptexture.h"
+
 #include "ImageDecoder.hpp"
 #include "ImageDecoder_p.hpp"

-#include "PixelConversion.hpp"
-using namespace LibRpTexture::PixelConversion;
+#ifdef ENABLE_PVRTC
+# include "PVRTDecompress.h"
+#endif /* ENABLE_PVRTC */

 // References:
 // - https://www.khronos.org/registry/OpenGL/extensions/IMG/IMG_texture_compression_pvrtc.txt
@ -21,288 +24,49 @@ using namespace LibRpTexture::PixelConversion;

 namespace LibRpTexture { namespace ImageDecoder {

-// PVRTC data block.
-union pvrtc_block {
-	struct {
-		// Modulation data.
-		uint32_t mod_data;
-
-		// Color B:
-		// - Bit 15: Opaque bit 'Q'
-		// - Bits 1-14:
-		//   - If Q == 1:  RGB554
-		//   - If Q == 0: ARGB3443
-		// - Bit 0: Mode bit
-		uint16_t colorB;
-
-		// Color A:
-		// - Bit 15: Opaque bit 'Q'
-		// - Bits 0-14:
-		//   - If Q == 1:  RGB555
-		//   - If Q == 0: ARGB3444
-		// NOTE: This format is the same as GCN RGB5A3.
-		uint16_t colorA;
-	};
-
-	uint64_t u64;
-};
-
 /**
- * Convert color A to ARGB32.
- * @param px16 Color A.
- * @return ARGB32.
- */
-static inline uint32_t colorAtoARGB32(uint16_t px16)
-{
-	// Color A uses the same format as GCN RGB5A3.
-	return RGB5A3_to_ARGB32(px16);
-}
-
-/**
- * Convert color B to ARGB32.
- * @param px16 Color B.
- * @return ARGB32.
- */
-static inline uint32_t colorBtoARGB32(uint16_t px16)
-{
-	// Color A is almost the same as GCN RGB5A3,
-	// except the blue channel is smaller.
-	uint32_t px32;
-
-	if (px16 & 0x8000) {
-		// BGR555: xRRRRRGG GGGBBBBx
-		// ARGB32: AAAAAAAA RRRRRRRR GGGGGGGG BBBBBBBB
-		px32  = 0xFF000000U;	// no alpha channel
-		px32 |= (((px16 << 3) & 0x0000F0) | ((px16 >> 1) & 0x00000F));	// B
-		px32 |= (((px16 << 6) & 0x00F800) | ((px16 << 1) & 0x000700));	// G
-		px32 |= (((px16 << 9) & 0xF80000) | ((px16 << 4) & 0x070000));	// R
-	} else {
-		// RGB4A3: xAAARRRR GGGGBBBx
-		// ARGB32: AAAAAAAA RRRRRRRR GGGGGGGG BBBBBBBB
-		px32  = ((px16 & 0x00F0) << 4);	// G
-		px32 |= ((px16 & 0x0F00) << 8);	// R
-		px32 |= (px32 << 4);		// Copy to the top nybble.
-
-		// Calculate the blue channel.
-		uint8_t b = ((px16 << 4) & 0xE0);
-		b |= (b >> 3);
-		b |= (b >> 3);
-
-		// Calculate the alpha channel.
-		uint8_t a = ((px16 >> 7) & 0xE0);
-		a |= (a >> 3);
-		a |= (a >> 3);
-
-		// Apply the alpha and blue channels.
-		px32 |= (a << 24);
-		px32 |=  b;
-	}
-
-	return px32;
-}
-
-// Temporary RGBA structure that allows us to clamp it later.
-// TODO: Use SSE2?
-struct ColorRGBA {
-	int B;
-	int G;
-	int R;
-	int A;
-};
-
-/**
- * Clamp a ColorRGBA struct and convert it to ARGB32.
- * @param color ColorRGBA struct.
- * @return ARGB32 value.
- */
-static inline uint32_t clamp_ColorRGBA(const ColorRGBA &color)
-{
-	uint32_t argb32 = 0;
-	if (color.B > 255) {
-		argb32 = 255;
-	} else if (color.B > 0) {
-		argb32 = color.B;
-	}
-	if (color.G > 255) {
-		argb32 |= (255 << 8);
-	} else if (color.G > 0) {
-		argb32 |= (color.G << 8);
-	}
-	if (color.R > 255) {
-		argb32 |= (255 << 16);
-	} else if (color.R > 0) {
-		argb32 |= (color.R << 16);
-	}
-	if (color.A > 255) {
-		argb32 |= (255 << 24);
-	} else if (color.A > 0) {
-		argb32 |= (color.A << 24);
-	}
-	return argb32;
-}
-
-/**
- * Mode 0 color interpolation.
- * @param colors Array containing two ARGB32 colors.
- * @param mod_data 2-bit modulation data.
- * @return Interpolated color.
- */
-static inline uint32_t interp_colors_mode0(const uint32_t color[2], unsigned int mod_data)
-{
-	if (mod_data == 0) {
-		// No modulation.
-		return color[0];
-	}
-
-	// TODO: Optimize using SSE.
-	argb32_t argb[2];
-	argb[0].u32 = color[0];
-	argb[1].u32 = color[1];
-
-	// Interpolation formula: Output = A + Mod*(B - A)
-	ColorRGBA rgba;
-	rgba.B = argb[1].b - argb[0].b;
-	rgba.G = argb[1].g - argb[0].g;
-	rgba.R = argb[1].r - argb[0].r;
-	switch (mod_data) {
-		default:
-			assert(!"Unhandled modulation data.");
-			return color[0];
-
-		case 1:
-			// Weight: 4/8
-			rgba.B = rgba.B / 2;
-			rgba.G = rgba.G / 2;
-			rgba.R = rgba.R / 2;
-
-			rgba.A = argb[1].a - argb[0].a;
-			rgba.A = rgba.A / 2;
-			break;
-
-		case 2:
-			// Weight: 4/8, punch-through alpha
-			// NOTE: Color values are kept as-is,
-			// even though A=0.
-			rgba.B = rgba.B / 2;
-			rgba.G = rgba.G / 2;
-			rgba.R = rgba.R / 2;
-			rgba.A = 0;
-			break;
-
-		case 3:
-			// Weight: 1
-			rgba.A = argb[1].a - argb[0].a;
-			break;
-	}
-
-	rgba.B += argb[0].b;
-	rgba.G += argb[0].g;
-	rgba.R += argb[0].r;
-	if (mod_data != 2) {
-		// TODO: Move into the switch/case?
-		rgba.A += argb[0].a;
-	}
-
-	// Clamp the color components.
-	return clamp_ColorRGBA(rgba);
-}
-
-/**
- * Mode 1 color interpolation.
- * @param colors Array containing two ARGB32 colors.
- * @param mod_data 2-bit modulation data.
- * @return Interpolated color.
- */
-static inline uint32_t interp_colors_mode1(const uint32_t color[2], unsigned int mod_data)
-{
-	if (mod_data == 0) {
-		// No modulation.
-		return color[0];
-	}
-
-	// TODO: Optimize using SSE.
-	argb32_t argb[2];
-	argb[0].u32 = color[0];
-	argb[1].u32 = color[1];
-
-	// Interpolation formula: Output = A + Mod*(B - A)
-	ColorRGBA rgba;
-	rgba.B = argb[1].b - argb[0].b;
-	rgba.G = argb[1].g - argb[0].g;
-	rgba.R = argb[1].r - argb[0].r;
-	rgba.A = argb[1].a - argb[0].a;
-	switch (mod_data) {
-		default:
-			assert(!"Unhandled modulation data.");
-			return color[0];
-
-		case 1:
-			// Weight: 3/8
-			rgba.B = rgba.B * 8 / 3;
-			rgba.G = rgba.G * 8 / 3;
-			rgba.R = rgba.R * 8 / 3;
-			rgba.A = rgba.A * 8 / 3;
-			break;
-
-		case 2:
-			// Weight: 5/8
-			rgba.B = rgba.B * 8 / 5;
-			rgba.G = rgba.G * 8 / 5;
-			rgba.R = rgba.R * 8 / 5;
-			rgba.A = rgba.A * 8 / 5;
-			break;
-
-		case 3:
-			// Weight: 1
-			break;
-	}
-
-	rgba.B += argb[0].b;
-	rgba.G += argb[0].g;
-	rgba.R += argb[0].r;
-	rgba.A += argb[0].a;
-
-	// Clamp the color components.
-	return clamp_ColorRGBA(rgba);
-}
-
-// Pixels are reordered (twiddled) in PVRTC. Bits of x coordinate are interleaved with bits of y.
-// TODO: Optimize into lookup table.
-// Reference: https://gist.github.com/andreysm/bf835e634de37c2ee48d
-#define TWIDTAB(x) ( (x&1)|((x&2)<<1)|((x&4)<<2)|((x&8)<<3)|((x&16)<<4)|((x&32)<<5)|((x&64)<<6)|((x&128)<<7)|((x&256)<<8)|((x&512)<<9) )
-#define TWIDOUT(x, y) ( TWIDTAB((y)) | (TWIDTAB((x)) << 1) )
-
-/**
- * Convert a PVRTC 2bpp image to rp_image.
+ * Convert a PVRTC 2bpp or 4bpp image to rp_image.
 * @param width Image width.
 * @param height Image height.
- * @param img_buf ETC1 image buffer.
+ * @param img_buf PVRTC image buffer.
 * @param img_siz Size of image data. [must be >= (w*h)/4]
+ * @param do2bitMode True for 2bpp; false for 4bpp.
 * @return rp_image, or nullptr on error.
 */
-rp_image *fromPVRTC_2bpp(int width, int height,
-	const uint8_t *RESTRICT img_buf, int img_siz)
+rp_image *fromPVRTC(int width, int height,
+	const uint8_t *RESTRICT img_buf, int img_siz,
+	bool do2bitMode)
 {
 	// Verify parameters.
 	assert(img_buf != nullptr);
 	assert(width > 0);
 	assert(height > 0);
-	assert(img_siz >= ((width * height) / 4));
+
+	// Expected size to be read by the PowerVR Native SDK.
+	const uint32_t expected_size_in = ((width * height) / (do2bitMode ? 4 : 2));
+
+	assert(img_siz >= static_cast<int>(expected_size_in));
 	if (!img_buf || width <= 0 || height <= 0 ||
-	    img_siz < ((width * height) / 4))
+	    img_siz < static_cast<int>(expected_size_in))
 	{
 		return nullptr;
 	}

 	// PVRTC 2bpp uses 8x4 tiles.
-	assert(width % 8 == 0);
-	assert(height % 4 == 0);
-	if (width % 8 != 0 || height % 4 != 0)
-		return nullptr;
-
-	// Calculate the total number of tiles.
-	const unsigned int tilesX = (unsigned int)(width / 8);
-	const unsigned int tilesY = (unsigned int)(height / 4);
+	// PVRTC 4bpp uses 8x4 tiles.
+	if (do2bitMode) {
+		// PVRTC 2bpp
+		assert(width % 8 == 0);
+		assert(height % 4 == 0);
+		if (width % 8 != 0 || height % 4 != 0)
+			return nullptr;
+	} else {
+		// PVRTC 4bpp
+		assert(width % 4 == 0);
+		assert(height % 4 == 0);
+		if (width % 4 != 0 || height % 4 != 0)
+			return nullptr;
+	}

 	// Create an rp_image.
 	rp_image *img = new rp_image(width, height, rp_image::FORMAT_ARGB32);
@ -312,128 +76,20 @@ rp_image *fromPVRTC_2bpp(int width, int height,
 		return nullptr;
 	}

-	// NOTE: PVRTC block indexes are twiddled.
-	const pvrtc_block *const pvrtc_src = reinterpret_cast<const pvrtc_block*>(img_buf);
-
-	// Temporary tile buffer.
-	uint32_t tileBuf[8*4];
-
-	for (unsigned int y = 0; y < tilesY; y++) {
-	for (unsigned int x = 0; x < tilesX; x++) {
-		// TODO: Endianness conversion?
-		const pvrtc_block *src = &pvrtc_src[TWIDOUT(x, y)];
-
-		// Get the two color values.
-		uint32_t color[2];
-		color[0] = colorAtoARGB32(src->colorA);
-		color[1] = colorBtoARGB32(src->colorB);
-
-		uint32_t mod_data = src->mod_data;
-		if (!(src->colorB & 0x01)) {
-			// Modulation mode 0: Each bit is 0 for A, 1 for B.
-			for (unsigned int i = 0; i < 32; i++, mod_data >>= 1) {
-				tileBuf[i] = color[mod_data & 1];
-			}
-		} else {
-			// Modulation mode 1: Each bit represents two pixels,
-			// which allows interpolation to be used.
-			// TODO: Verify this. There's probably some other
-			// interpolation between the two pixels...
-			// TODO: Is the interpolation correct?
-			// NOTE: Should be checkerboard pattern, with interpolation.
-			for (unsigned int i = 0; i < 32; i += 2, mod_data >>= 2) {
-				const uint32_t interp = interp_colors_mode1(color, mod_data & 3);
-				tileBuf[i+0] = interp;
-				tileBuf[i+1] = interp;
-			}
-		}
-
-		// Blit the tile to the main image buffer.
-		ImageDecoderPrivate::BlitTile<uint32_t, 8, 4>(img, tileBuf, x, y);
-	} }
-
-	// Set the sBIT metadata.
-	static const rp_image::sBIT_t sBIT = {8,8,8,0,8};
-	img->set_sBIT(&sBIT);
-
-	// Image has been converted.
-	return img;
-}
-
-/**
- * Convert a PVRTC 4bpp image to rp_image.
- * @param width Image width.
- * @param height Image height.
- * @param img_buf ETC1 image buffer.
- * @param img_siz Size of image data. [must be >= (w*h)/2]
- * @return rp_image, or nullptr on error.
- */
-rp_image *fromPVRTC_4bpp(int width, int height,
-	const uint8_t *RESTRICT img_buf, int img_siz)
-{
-	// Verify parameters.
-	assert(img_buf != nullptr);
-	assert(width > 0);
-	assert(height > 0);
-	assert(img_siz >= ((width * height) / 2));
-	if (!img_buf || width <= 0 || height <= 0 ||
-	    img_siz < ((width * height) / 2))
-	{
-		return nullptr;
-	}
-
-	// PVRTC 4bpp uses 4x4 tiles.
-	assert(width % 4 == 0);
-	assert(height % 4 == 0);
-	if (width % 4 != 0 || height % 4 != 0)
-		return nullptr;
-
-	// Calculate the total number of tiles.
-	const unsigned int tilesX = (unsigned int)(width / 4);
-	const unsigned int tilesY = (unsigned int)(height / 4);
-
-	// Create an rp_image.
-	rp_image *img = new rp_image(width, height, rp_image::FORMAT_ARGB32);
-	if (!img->isValid()) {
-		// Could not allocate the image.
+	// Use the PowerVR Native SDK to decompress the texture.
+	// Return value is the size of the *input* data that was decompressed.
+	// TODO: Row padding?
+	uint32_t size = pvr::PVRTDecompressPVRTC(img_buf, do2bitMode, width, height,
+		static_cast<uint8_t*>(img->bits()));
+	assert(size == expected_size_in);
+	if (size != expected_size_in) {
+		// Read error...
 		delete img;
 		return nullptr;
 	}

-	// NOTE: PVRTC block indexes are twiddled.
-	const pvrtc_block *const pvrtc_src = reinterpret_cast<const pvrtc_block*>(img_buf);
-
-	// Temporary tile buffer.
-	uint32_t tileBuf[4*4];
-
-	for (unsigned int y = 0; y < tilesY; y++) {
-	for (unsigned int x = 0; x < tilesX; x++) {
-		// TODO: Endianness conversion?
-		const pvrtc_block *src = &pvrtc_src[TWIDOUT(x, y)];
-
-		// Get the two color values.
-		uint32_t color[2];
-		color[0] = colorAtoARGB32(src->colorA);
-		color[1] = colorBtoARGB32(src->colorB);
-
-		uint32_t mod_data = src->mod_data;
-		if (!(src->colorB & 0x01)) {
-			// Modulation mode 0.
-			for (unsigned int i = 0; i < 16; i++, mod_data >>= 2) {
-				tileBuf[i] = interp_colors_mode0(color, mod_data & 3);
-			}
-		} else {
-			// Modulation mode 1.
-			for (unsigned int i = 0; i < 16; i++, mod_data >>= 2) {
-				tileBuf[i] = interp_colors_mode1(color, mod_data & 3);
-			}
-		}
-
-		// Blit the tile to the main image buffer.
-		ImageDecoderPrivate::BlitTile<uint32_t, 4, 4>(img, tileBuf, x, y);
-	} }
-
 	// Set the sBIT metadata.
+	// TODO: Check for alpha?
 	static const rp_image::sBIT_t sBIT = {8,8,8,0,8};
 	img->set_sBIT(&sBIT);

--- a/src/librptexture/fileformat/PowerVR3.cpp
+++ b/src/librptexture/fileformat/PowerVR3.cpp
@ -11,6 +11,8 @@
 * - http://cdn.imgtec.com/sdk-documentation/PVR+File+Format.Specification.pdf
 */

+#include "config.librptexture.h"
+
 #include "PowerVR3.hpp"
 #include "FileFormat_p.hpp"

@ -309,16 +311,22 @@ const rp_image *PowerVR3Private::loadImage(int mip)
 	} else {
 		// Compressed format.
 		switch (pvr3Header.pixel_format) {
+#ifdef ENABLE_PVRTC
 			case PVR3_PXF_PVRTC_2bpp_RGB:
 			case PVR3_PXF_PVRTC_2bpp_RGBA:
 			case PVR3_PXF_PVRTCII_2bpp:
-				// 2bpp formats
+				// 2bpp formats (PVRTC)
 				expected_size = width * height / 4;
 				break;

 			case PVR3_PXF_PVRTC_4bpp_RGB:
 			case PVR3_PXF_PVRTC_4bpp_RGBA:
 			case PVR3_PXF_PVRTCII_4bpp:
+				// 4bpp formats (PVRTC)
+				expected_size = width * height / 2;
+				break;
+#endif /* ENABLE_PVRTC */
+
 			case PVR3_PXF_ETC1:
 			case PVR3_PXF_DXT1:
 			case PVR3_PXF_BC4:
@ -344,7 +352,7 @@ const rp_image *PowerVR3Private::loadImage(int mip)

 			default:
 				// TODO: ASTC, other formats that aren't actually compressed.
-				assert(!"Unsupported PowerVR3 compressed format.");
+				//assert(!"Unsupported PowerVR3 compressed format.");
 				return nullptr;
 		}
 	}
@ -433,12 +441,13 @@ const rp_image *PowerVR3Private::loadImage(int mip)
 	} else {
 		// Compressed format.
 		switch (pvr3Header.pixel_format) {
+#ifdef ENABLE_PVRTC
 			case PVR3_PXF_PVRTC_2bpp_RGB:
 			case PVR3_PXF_PVRTC_2bpp_RGBA:
 				// PVRTC, 2bpp.
 				// NOTE: RGB and RGBA use the same data format.
 				// TODO: Mask out the alpha channel for RGB?
-				img = ImageDecoder::fromPVRTC_2bpp(width, height, buf.get(), expected_size);
+				img = ImageDecoder::fromPVRTC(width, height, buf.get(), expected_size, true);
 				break;

 			case PVR3_PXF_PVRTC_4bpp_RGB:
@ -446,8 +455,9 @@ const rp_image *PowerVR3Private::loadImage(int mip)
 				// PVRTC, 4bpp.
 				// NOTE: RGB and RGBA use the same data format.
 				// TODO: Mask out the alpha channel for RGB?
-				img = ImageDecoder::fromPVRTC_4bpp(width, height, buf.get(), expected_size);
+				img = ImageDecoder::fromPVRTC(width, height, buf.get(), expected_size, false);
 				break;
+#endif /* ENABLE_PVRTC */

 			case PVR3_PXF_ETC1:
 				// ETC1-compressed texture.