mirror of
https://github.com/wavemotion-dave/GimliDS.git
synced 2025-06-18 13:55:32 -04:00
2325 lines
57 KiB
C++
2325 lines
57 KiB
C++
/**
|
|
* @file lzav.h
|
|
*
|
|
* @version 4.17
|
|
*
|
|
* @brief Self-contained inclusion file for the "LZAV" in-memory data
|
|
* compression and decompression algorithms.
|
|
*
|
|
* The source code is written in ISO C99, with full C++ compliance enabled
|
|
* conditionally and automatically, if compiled with a C++ compiler.
|
|
*
|
|
* Description is available at https://github.com/avaneev/lzav
|
|
*
|
|
* E-mail: aleksey.vaneev@gmail.com or info@voxengo.com
|
|
*
|
|
* LICENSE:
|
|
*
|
|
* Copyright (c) 2023-2025 Aleksey Vaneev
|
|
*
|
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
|
* copy of this software and associated documentation files (the "Software"),
|
|
* to deal in the Software without restriction, including without limitation
|
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
* and/or sell copies of the Software, and to permit persons to whom the
|
|
* Software is furnished to do so, subject to the following conditions:
|
|
*
|
|
* The above copyright notice and this permission notice shall be included in
|
|
* all copies or substantial portions of the Software.
|
|
*
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
|
* DEALINGS IN THE SOFTWARE.
|
|
*/
|
|
|
|
#ifndef LZAV_INCLUDED
|
|
#define LZAV_INCLUDED
|
|
|
|
#define LZAV_API_VER 0x107 ///< API version, unrelated to source code version.
|
|
#define LZAV_VER_STR "4.17" ///< LZAV source code version string.
|
|
|
|
/**
|
|
* @def LZAV_FMT_MIN
|
|
* @brief Minimal stream format id supported by the decompressor. A value of 2
|
|
* can be defined externally, to reduce decompressor's code size.
|
|
*/
|
|
|
|
#if !defined( LZAV_FMT_MIN )
|
|
#define LZAV_FMT_MIN 1
|
|
#endif // !defined( LZAV_FMT_MIN )
|
|
|
|
/**
|
|
* @def LZAV_NS_CUSTOM
|
|
* @brief If this macro is defined externally, all symbols will be placed
|
|
* into the namespace specified by the macro, and won't be exported to the
|
|
* global namespace. WARNING: if the defined value of the macro is empty, the
|
|
* symbols will be placed into the global namespace anyway.
|
|
*/
|
|
|
|
/**
|
|
* @def LZAV_NOEX
|
|
* @brief Macro that defines the "noexcept" function specifier for C++
|
|
* environment.
|
|
*/
|
|
|
|
/**
|
|
* @def LZAV_NULL
|
|
* @brief Macro that defines "nullptr" value, for C++ guidelines conformance.
|
|
*/
|
|
|
|
/**
|
|
* @def LZAV_NS
|
|
* @brief Macro that defines an actual implementation namespace in C++
|
|
* environment, with export of relevant symbols to the global namespace
|
|
* (if @ref LZAV_NS_CUSTOM is undefined).
|
|
*/
|
|
|
|
#if defined( __cplusplus )
|
|
|
|
#include <cstring>
|
|
#include <cstdlib>
|
|
|
|
#if __cplusplus >= 201103L
|
|
|
|
#include <cstdint>
|
|
|
|
#define LZAV_NOEX noexcept
|
|
#define LZAV_NULL nullptr
|
|
|
|
#else // __cplusplus >= 201103L
|
|
|
|
#include <stdint.h>
|
|
|
|
#define LZAV_NOEX throw()
|
|
#define LZAV_NULL NULL
|
|
|
|
#endif // __cplusplus >= 201103L
|
|
|
|
#if defined( LZAV_NS_CUSTOM )
|
|
#define LZAV_NS LZAV_NS_CUSTOM
|
|
#else // defined( LZAV_NS_CUSTOM )
|
|
#define LZAV_NS lzav
|
|
#endif // defined( LZAV_NS_CUSTOM )
|
|
|
|
#else // defined( __cplusplus )
|
|
|
|
#include <string.h>
|
|
#include <stdlib.h>
|
|
#include <stdint.h>
|
|
|
|
#define LZAV_NOEX
|
|
#define LZAV_NULL 0
|
|
|
|
#endif // defined( __cplusplus )
|
|
|
|
#if SIZE_MAX < 0xFFFFFFFFU
|
|
|
|
#error LZAV: the platform or the compiler has incompatible size_t type.
|
|
|
|
#endif // size_t check
|
|
|
|
/**
|
|
* @def LZAV_X86
|
|
* @brief Macro is defined if `x86` or `x86_64` platform was detected.
|
|
*/
|
|
|
|
#if defined( i386 ) || defined( __i386 ) || defined( __i386__ ) || \
|
|
defined( _X86_ ) || defined( __x86_64 ) || defined( __x86_64__ ) || \
|
|
defined( __amd64 ) || defined( __amd64__ ) || defined( _M_IX86 ) || \
|
|
( defined( _M_AMD64 ) && !defined( _M_ARM64EC ))
|
|
|
|
#define LZAV_X86
|
|
|
|
#endif // x86 platform check
|
|
|
|
/**
|
|
* @def LZAV_LITTLE_ENDIAN
|
|
* @brief Endianness definition macro, can be used as a logical constant.
|
|
*/
|
|
|
|
#if defined( __LITTLE_ENDIAN__ ) || defined( __LITTLE_ENDIAN ) || \
|
|
defined( _LITTLE_ENDIAN ) || ( defined( __BYTE_ORDER__ ) && \
|
|
__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ ) || \
|
|
defined( LZAV_X86 ) || defined( _WIN32 ) || defined( _M_ARM ) || \
|
|
defined( _M_ARM64EC )
|
|
|
|
#define LZAV_LITTLE_ENDIAN 1
|
|
|
|
#elif defined( __BIG_ENDIAN__ ) || defined( __BIG_ENDIAN ) || \
|
|
defined( _BIG_ENDIAN ) || ( defined( __BYTE_ORDER__ ) && \
|
|
__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ ) || \
|
|
defined( __SYSC_ZARCH__ ) || defined( __zarch__ ) || \
|
|
defined( __s390x__ ) || defined( __sparc ) || defined( __sparc__ )
|
|
|
|
#define LZAV_LITTLE_ENDIAN 0
|
|
|
|
#else // defined( __BIG_ENDIAN__ )
|
|
|
|
#warning LZAV: cannot determine endianness, assuming little-endian.
|
|
|
|
#define LZAV_LITTLE_ENDIAN 1
|
|
|
|
#endif // defined( __BIG_ENDIAN__ )
|
|
|
|
/**
|
|
* @def LZAV_PTR32
|
|
* @brief Macro denotes that pointers are likely 32-bit (pointer overflow
|
|
* checks are required).
|
|
*/
|
|
|
|
#if SIZE_MAX <= 0xFFFFFFFFU && \
|
|
( !defined( UINTPTR_MAX ) || UINTPTR_MAX <= 0xFFFFFFFFU )
|
|
|
|
#define LZAV_PTR32
|
|
|
|
#endif // 32-bit pointers check
|
|
|
|
/**
|
|
* @def LZAV_ARCH64
|
|
* @brief Macro that denotes availability of 64-bit instructions.
|
|
*/
|
|
|
|
#if defined( __LP64__ ) || defined( _LP64 ) || !defined( LZAV_PTR32 ) || \
|
|
defined( __x86_64__ ) || defined( __aarch64__ ) || \
|
|
defined( _M_X64 ) || defined( _M_ARM64 )
|
|
|
|
#define LZAV_ARCH64
|
|
|
|
#endif // 64-bit availability check
|
|
|
|
/**
|
|
* @def LZAV_GCC_BUILTINS
|
|
* @brief Macro that denotes availability of GCC-style built-in functions.
|
|
*/
|
|
|
|
#if defined( __GNUC__ ) || defined( __clang__ ) || \
|
|
defined( __IBMC__ ) || defined( __IBMCPP__ ) || \
|
|
defined( __COMPCERT__ ) || ( defined( __INTEL_COMPILER ) && \
|
|
__INTEL_COMPILER >= 1300 && !defined( _MSC_VER ))
|
|
|
|
#define LZAV_GCC_BUILTINS
|
|
|
|
#endif // GCC built-ins check
|
|
|
|
/**
|
|
* @def LZAV_IEC32( x )
|
|
* @brief In-place endianness-correction macro, for singular 32-bit variables.
|
|
* @param x Value to correct in-place.
|
|
*/
|
|
|
|
#if LZAV_LITTLE_ENDIAN
|
|
|
|
#define LZAV_IEC32( x ) (void) 0
|
|
|
|
#else // LZAV_LITTLE_ENDIAN
|
|
|
|
#if defined( LZAV_GCC_BUILTINS )
|
|
|
|
#define LZAV_IEC32( x ) x = __builtin_bswap32( x )
|
|
|
|
#elif defined( _MSC_VER )
|
|
|
|
#define LZAV_IEC32( x ) x = _byteswap_ulong( x )
|
|
|
|
#else // defined( _MSC_VER )
|
|
|
|
#define LZAV_IEC32( x ) x = (uint32_t) ( \
|
|
x >> 24 | \
|
|
( x & 0x00FF0000 ) >> 8 | \
|
|
( x & 0x0000FF00 ) << 8 | \
|
|
x << 24 )
|
|
|
|
#endif // defined( _MSC_VER )
|
|
|
|
#endif // LZAV_LITTLE_ENDIAN
|
|
|
|
/**
|
|
* @def LZAV_LIKELY( x )
|
|
* @brief Likelihood macro that is used for manually-guided
|
|
* micro-optimization.
|
|
* @param x Expression that is likely to be evaluated to 1.
|
|
*/
|
|
|
|
/**
|
|
* @def LZAV_UNLIKELY( x )
|
|
* @brief Unlikelihood macro that is used for manually-guided
|
|
* micro-optimization.
|
|
* @param x Expression that is unlikely to be evaluated to 1.
|
|
*/
|
|
|
|
#if defined( LZAV_GCC_BUILTINS ) && \
|
|
!( defined( __aarch64__ ) && defined( __APPLE__ ))
|
|
|
|
#define LZAV_LIKELY( x ) __builtin_expect( x, 1 )
|
|
#define LZAV_UNLIKELY( x ) __builtin_expect( x, 0 )
|
|
|
|
#else // Likelihood macros
|
|
|
|
#define LZAV_LIKELY( x ) ( x )
|
|
#define LZAV_UNLIKELY( x ) ( x )
|
|
|
|
#endif // Likelihood macros
|
|
|
|
#if defined( _MSC_VER ) && !defined( LZAV_GCC_BUILTINS )
|
|
#include <intrin.h> // For _BitScanForward.
|
|
#endif // defined( _MSC_VER ) && !defined( LZAV_GCC_BUILTINS )
|
|
|
|
#if defined( LZAV_NS )
|
|
|
|
namespace LZAV_NS {
|
|
|
|
using std :: memcpy;
|
|
using std :: memset;
|
|
using std :: malloc;
|
|
using std :: free;
|
|
using std :: size_t;
|
|
|
|
#if __cplusplus >= 201103L
|
|
|
|
using std :: intptr_t;
|
|
using std :: uint16_t;
|
|
using std :: uint32_t;
|
|
using uint8_t = unsigned char; ///< For C++ type aliasing compliance.
|
|
|
|
#if defined( LZAV_ARCH64 )
|
|
using std :: uint64_t;
|
|
#endif // defined( LZAV_ARCH64 )
|
|
|
|
#endif // __cplusplus >= 201103L
|
|
|
|
namespace enum_wrapper {
|
|
|
|
#endif // defined( LZAV_NS )
|
|
|
|
/**
|
|
* @brief Decompression error codes.
|
|
*/
|
|
|
|
enum LZAV_ERROR
|
|
{
|
|
LZAV_E_PARAMS = -1, ///< Incorrect function parameters.
|
|
LZAV_E_SRCOOB = -2, ///< Source buffer OOB.
|
|
LZAV_E_DSTOOB = -3, ///< Destination buffer OOB.
|
|
LZAV_E_REFOOB = -4, ///< Back-reference OOB.
|
|
LZAV_E_DSTLEN = -5, ///< Decompressed length mismatch.
|
|
LZAV_E_UNKFMT = -6, ///< Unknown stream format.
|
|
LZAV_E_PTROVR = -7 ///< Pointer overflow.
|
|
};
|
|
|
|
#if defined( LZAV_NS )
|
|
|
|
} // namespace enum_wrapper
|
|
|
|
using namespace enum_wrapper;
|
|
|
|
#endif // defined( LZAV_NS )
|
|
|
|
#define LZAV_HASH_C1 0x243F6A88 ///< Hash function constant 1.
|
|
#define LZAV_HASH_C2 0x85A308D3 ///< Hash function constant 2.
|
|
|
|
/**
|
|
* Enumeration used to define compression algorithm's parameters.
|
|
*/
|
|
|
|
enum LZAV_PARAM
|
|
{
|
|
LZAV_WIN_LEN = ( 1 << 23 ), ///< LZ77 window length, in bytes.
|
|
LZAV_REF_LEN = ( 15 + 255 + 254 ), ///< Max ref length, minus `mref`.
|
|
LZAV_LIT_FIN = 6, ///< The number of literals required at finish.
|
|
LZAV_OFS_MIN = 8, ///< The minimal reference offset to use.
|
|
LZAV_OFS_TH1 = (( 1 << 10 ) - 1 ), ///< Reference offset threshold 1.
|
|
LZAV_OFS_TH2 = (( 1 << 18 ) - 1 ), ///< Reference offset threshold 2.
|
|
LZAV_FMT_CUR = 2 ///< Stream format identifier used by the compressor.
|
|
};
|
|
|
|
/**
|
|
* @brief Data match length finding function.
|
|
*
|
|
* Function finds the number of continuously-matching leading bytes between
|
|
* two buffers. This function is well-optimized for a wide variety of
|
|
* compilers and platforms.
|
|
*
|
|
* @param p1 Pointer to buffer 1.
|
|
* @param p2 Pointer to buffer 2.
|
|
* @param ml Maximal number of bytes to match.
|
|
* @return The number of matching leading bytes.
|
|
*/
|
|
|
|
static inline size_t lzav_match_len( const uint8_t* p1, const uint8_t* p2,
|
|
const size_t ml ) LZAV_NOEX
|
|
{
|
|
const uint8_t* const p1s = p1;
|
|
const uint8_t* const p1e = p1 + ml;
|
|
|
|
#if defined( LZAV_ARCH64 )
|
|
|
|
while( LZAV_LIKELY( p1 + 7 < p1e ))
|
|
{
|
|
uint64_t v1, v2, vd;
|
|
memcpy( &v1, p1, 8 );
|
|
memcpy( &v2, p2, 8 );
|
|
vd = v1 ^ v2;
|
|
|
|
if( vd != 0 )
|
|
{
|
|
#if defined( LZAV_GCC_BUILTINS )
|
|
|
|
#if LZAV_LITTLE_ENDIAN
|
|
return( (size_t) ( p1 - p1s + ( __builtin_ctzll( vd ) >> 3 )));
|
|
#else // LZAV_LITTLE_ENDIAN
|
|
return( (size_t) ( p1 - p1s + ( __builtin_clzll( vd ) >> 3 )));
|
|
#endif // LZAV_LITTLE_ENDIAN
|
|
|
|
#else // defined( LZAV_GCC_BUILTINS )
|
|
|
|
#if defined( _MSC_VER )
|
|
unsigned long i;
|
|
_BitScanForward64( &i, (unsigned __int64) vd );
|
|
return( (size_t) ( p1 - p1s + ( i >> 3 )));
|
|
#else // defined( _MSC_VER )
|
|
#if !LZAV_LITTLE_ENDIAN
|
|
const uint64_t sw = vd >> 32 | vd << 32;
|
|
const uint64_t sw2 =
|
|
( sw & (uint64_t) 0xFFFF0000FFFF0000 ) >> 16 |
|
|
( sw & (uint64_t) 0x0000FFFF0000FFFF ) << 16;
|
|
vd = ( sw2 & (uint64_t) 0xFF00FF00FF00FF00 ) >> 8 |
|
|
( sw2 & (uint64_t) 0x00FF00FF00FF00FF ) << 8;
|
|
#endif // !LZAV_LITTLE_ENDIAN
|
|
|
|
const uint64_t m = (uint64_t) 0x0101010101010101;
|
|
|
|
return( (size_t) ( p1 - p1s +
|
|
(((( vd ^ ( vd - 1 )) & ( m - 1 )) * m ) >> 56 )));
|
|
#endif // defined( _MSC_VER )
|
|
|
|
#endif // defined( LZAV_GCC_BUILTINS )
|
|
}
|
|
|
|
p1 += 8;
|
|
p2 += 8;
|
|
}
|
|
|
|
// At most 7 bytes left.
|
|
|
|
if( LZAV_LIKELY( p1 + 3 < p1e ))
|
|
{
|
|
|
|
#else // defined( LZAV_ARCH64 )
|
|
|
|
while( LZAV_LIKELY( p1 + 3 < p1e ))
|
|
{
|
|
|
|
#endif // defined( LZAV_ARCH64 )
|
|
|
|
uint32_t v1, v2, vd;
|
|
memcpy( &v1, p1, 4 );
|
|
memcpy( &v2, p2, 4 );
|
|
vd = v1 ^ v2;
|
|
|
|
if( vd != 0 )
|
|
{
|
|
#if defined( LZAV_GCC_BUILTINS )
|
|
|
|
#if LZAV_LITTLE_ENDIAN
|
|
return( (size_t) ( p1 - p1s + ( __builtin_ctz( vd ) >> 3 )));
|
|
#else // LZAV_LITTLE_ENDIAN
|
|
return( (size_t) ( p1 - p1s + ( __builtin_clz( vd ) >> 3 )));
|
|
#endif // LZAV_LITTLE_ENDIAN
|
|
|
|
#else // defined( LZAV_GCC_BUILTINS )
|
|
|
|
#if defined( _MSC_VER )
|
|
unsigned long i;
|
|
_BitScanForward( &i, (unsigned long) vd );
|
|
return( (size_t) ( p1 - p1s + ( i >> 3 )));
|
|
#else // defined( _MSC_VER )
|
|
LZAV_IEC32( vd );
|
|
const uint32_t m = 0x01010101;
|
|
|
|
return( (size_t) ( p1 - p1s +
|
|
(((( vd ^ ( vd - 1 )) & ( m - 1 )) * m ) >> 24 )));
|
|
#endif // defined( _MSC_VER )
|
|
|
|
#endif // defined( LZAV_GCC_BUILTINS )
|
|
}
|
|
|
|
p1 += 4;
|
|
p2 += 4;
|
|
}
|
|
|
|
// At most 3 bytes left.
|
|
|
|
if( p1 < p1e )
|
|
{
|
|
if( *p1 != p2[ 0 ])
|
|
{
|
|
return( (size_t) ( p1 - p1s ));
|
|
}
|
|
|
|
if( ++p1 < p1e )
|
|
{
|
|
if( *p1 != p2[ 1 ])
|
|
{
|
|
return( (size_t) ( p1 - p1s ));
|
|
}
|
|
|
|
if( ++p1 < p1e )
|
|
{
|
|
if( *p1 != p2[ 2 ])
|
|
{
|
|
return( (size_t) ( p1 - p1s ));
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
return( ml );
|
|
}
|
|
|
|
/**
|
|
* @brief Data match length finding function, reverse direction.
|
|
*
|
|
* @param p1 Origin pointer to buffer 1.
|
|
* @param p2 Origin pointer to buffer 2.
|
|
* @param ml Maximal number of bytes to back-match.
|
|
* @return The number of matching prior bytes, not including origin position.
|
|
*/
|
|
|
|
static inline size_t lzav_match_len_r( const uint8_t* p1, const uint8_t* p2,
|
|
const size_t ml ) LZAV_NOEX
|
|
{
|
|
if( LZAV_UNLIKELY( ml == 0 ))
|
|
{
|
|
return( 0 );
|
|
}
|
|
|
|
if( p1[ -1 ] != p2[ -1 ])
|
|
{
|
|
return( 0 );
|
|
}
|
|
|
|
if( LZAV_UNLIKELY( ml != 1 ))
|
|
{
|
|
const uint8_t* const p1s = p1;
|
|
const uint8_t* p1e = p1 - ml + 1;
|
|
p1--;
|
|
p2--;
|
|
|
|
while( LZAV_UNLIKELY( p1 > p1e ))
|
|
{
|
|
uint16_t v1, v2;
|
|
memcpy( &v1, p1 - 2, 2 );
|
|
memcpy( &v2, p2 - 2, 2 );
|
|
|
|
const uint32_t vd = (uint32_t) ( v1 ^ v2 );
|
|
|
|
if( vd != 0 )
|
|
{
|
|
#if LZAV_LITTLE_ENDIAN
|
|
return( (size_t) ( p1s - p1 + (( vd & 0xFF00 ) == 0 )));
|
|
#else // LZAV_LITTLE_ENDIAN
|
|
return( (size_t) ( p1s - p1 + (( vd & 0x00FF ) == 0 )));
|
|
#endif // LZAV_LITTLE_ENDIAN
|
|
}
|
|
|
|
p1 -= 2;
|
|
p2 -= 2;
|
|
}
|
|
|
|
p1e--;
|
|
|
|
if( p1 > p1e && p1[ -1 ] != p2[ -1 ])
|
|
{
|
|
return( (size_t) ( p1s - p1 ));
|
|
}
|
|
}
|
|
|
|
return( ml );
|
|
}
|
|
|
|
/**
|
|
* @brief Internal LZAV block header writing function (stream format 2).
|
|
*
|
|
* Internal function writes a block to the output buffer. This function can be
|
|
* used in custom compression algorithms.
|
|
*
|
|
* Stream format 2.
|
|
*
|
|
* "Raw" compressed stream consists of any quantity of unnumerated "blocks".
|
|
* A block starts with a header byte, followed by several optional bytes.
|
|
* Bits 4-5 of the header specify block's type.
|
|
*
|
|
* CC00LLLL: literal block (1-6 bytes). `LLLL` is literal length.
|
|
*
|
|
* OO01RRRR: 10-bit offset block (2-4 bytes). `RRRR` is reference length.
|
|
*
|
|
* OO10RRRR: 18-bit offset block (3-5 bytes).
|
|
*
|
|
* OO11RRRR: 23-bit offset block (4-6 bytes).
|
|
*
|
|
* If `LLLL` or `RRRR` equals 0, a value of 16 is assumed, and an additional
|
|
* length byte follows. If in a literal block this additional byte's highest
|
|
* bit is 1, one more length byte follows that defines higher bits of length
|
|
* (up to 4 bytes). In a reference block, additional 1-2 length bytes follow
|
|
* the offset bytes. `CC` is a reference offset carry value (additional 2
|
|
* lowest bits of offset of the next reference block). Block type 3 includes 3
|
|
* carry bits (highest bits of 4th byte).
|
|
*
|
|
* The overall compressed data is prefixed with a byte whose lower 4 bits
|
|
* contain minimal reference length (mref), and the highest 4 bits contain
|
|
* stream format identifier. Compressed data always finishes with
|
|
* @ref LZAV_LIT_FIN literals. The lzav_write_fin_2() function should be used
|
|
* to finalize compression.
|
|
*
|
|
* Except the last block, a literal block is always followed by a reference
|
|
* block.
|
|
*
|
|
* @param op Output buffer pointer.
|
|
* @param lc Literal length, in bytes.
|
|
* @param rc Reference length, in bytes, not lesser than mref.
|
|
* @param d Reference offset, in bytes. Should be lesser than
|
|
* @ref LZAV_WIN_LEN, and not lesser than `rc` since fast copy on
|
|
* decompression cannot provide consistency of copying of data that is not in
|
|
* the output yet.
|
|
* @param ipa Literals anchor pointer.
|
|
* @param cbpp Pointer to the pointer to the latest offset carry block header.
|
|
* Cannot be 0, but the contained pointer can be 0 (initial value).
|
|
* @param cshp Pointer to offset carry shift.
|
|
* @param mref Minimal reference length, in bytes, used by the compression
|
|
* algorithm.
|
|
* @return Incremented output buffer pointer.
|
|
*/
|
|
|
|
static inline uint8_t* lzav_write_blk_2( uint8_t* op, size_t lc, size_t rc,
|
|
size_t d, const uint8_t* ipa, uint8_t** const cbpp, int* const cshp,
|
|
const size_t mref ) LZAV_NOEX
|
|
{
|
|
// Perform offset carry to a previous block (`csh` may be zero).
|
|
|
|
const int csh = *cshp;
|
|
rc = rc + 1 - mref;
|
|
**cbpp |= (uint8_t) (( d << 8 ) >> csh );
|
|
d >>= csh;
|
|
|
|
if( LZAV_UNLIKELY( lc != 0 ))
|
|
{
|
|
// Write a literal block.
|
|
|
|
size_t cv; // Offset carry value in literal block.
|
|
cv = ( d & 3 ) << 6;
|
|
d >>= 2;
|
|
|
|
if( LZAV_LIKELY( lc < 9 ))
|
|
{
|
|
*op = (uint8_t) ( cv | lc );
|
|
op++;
|
|
|
|
memcpy( op, ipa, 8 );
|
|
op += lc;
|
|
}
|
|
else
|
|
if( LZAV_LIKELY( lc < 16 ))
|
|
{
|
|
*op = (uint8_t) ( cv | lc );
|
|
op++;
|
|
|
|
memcpy( op, ipa, 16 );
|
|
op += lc;
|
|
}
|
|
else
|
|
if( LZAV_LIKELY( lc < 16 + 128 ))
|
|
{
|
|
#if LZAV_LITTLE_ENDIAN
|
|
uint16_t ov = (uint16_t) (( lc - 16 ) << 8 | cv );
|
|
#else // LZAV_LITTLE_ENDIAN
|
|
uint16_t ov = (uint16_t) ( cv << 8 | ( lc - 16 ));
|
|
#endif // LZAV_LITTLE_ENDIAN
|
|
|
|
memcpy( op, &ov, 2 );
|
|
op += 2;
|
|
|
|
memcpy( op, ipa, 16 );
|
|
memcpy( op + 16, ipa + 16, 16 );
|
|
|
|
if( lc < 33 )
|
|
{
|
|
op += lc;
|
|
}
|
|
else
|
|
{
|
|
ipa += 32;
|
|
op += 32;
|
|
lc -= 32;
|
|
|
|
do
|
|
{
|
|
*op = *ipa;
|
|
ipa++;
|
|
op++;
|
|
} while( --lc != 0 );
|
|
}
|
|
}
|
|
else
|
|
{
|
|
*op = (uint8_t) cv;
|
|
op++;
|
|
|
|
size_t lcw = lc - 16;
|
|
|
|
while( lcw > 127 )
|
|
{
|
|
*op = (uint8_t) ( 0x80 | lcw );
|
|
lcw >>= 7;
|
|
op++;
|
|
}
|
|
|
|
*op = (uint8_t) lcw;
|
|
op++;
|
|
|
|
memcpy( op, ipa, lc );
|
|
op += lc;
|
|
}
|
|
}
|
|
|
|
// Write a reference block.
|
|
|
|
static const int ocsh[ 4 ] = { 0, 0, 0, 3 };
|
|
const size_t bt = (size_t) 1 + ( d > LZAV_OFS_TH1 ) + ( d > LZAV_OFS_TH2 );
|
|
|
|
if( LZAV_LIKELY( rc < 16 ))
|
|
{
|
|
uint32_t ov = (uint32_t) ( d << 6 | bt << 4 | rc );
|
|
LZAV_IEC32( ov );
|
|
memcpy( op, &ov, 4 );
|
|
|
|
op += bt;
|
|
*cshp = ocsh[ bt ];
|
|
*cbpp = op;
|
|
|
|
return( op + 1 );
|
|
}
|
|
|
|
uint32_t ov = (uint32_t) ( d << 6 | bt << 4 );
|
|
LZAV_IEC32( ov );
|
|
memcpy( op, &ov, 4 );
|
|
|
|
op += bt;
|
|
*cshp = ocsh[ bt ];
|
|
*cbpp = op;
|
|
|
|
if( LZAV_LIKELY( rc < 16 + 255 ))
|
|
{
|
|
op[ 1 ] = (uint8_t) ( rc - 16 );
|
|
return( op + 2 );
|
|
}
|
|
|
|
op[ 1 ] = (uint8_t) 255;
|
|
op[ 2 ] = (uint8_t) ( rc - 16 - 255 );
|
|
return( op + 3 );
|
|
}
|
|
|
|
/**
|
|
* @brief Internal LZAV finishing function (stream format 2).
|
|
*
|
|
* Internal function writes finishing literal block(s) to the output buffer.
|
|
* This function can be used in custom compression algorithms.
|
|
*
|
|
* Stream format 2.
|
|
*
|
|
* @param op Output buffer pointer.
|
|
* @param lc Literal length, in bytes. Not less than @ref LZAV_LIT_FIN.
|
|
* @param ipa Literals anchor pointer.
|
|
* @return Incremented output buffer pointer.
|
|
*/
|
|
|
|
static inline uint8_t* lzav_write_fin_2( uint8_t* op, size_t lc,
|
|
const uint8_t* ipa ) LZAV_NOEX
|
|
{
|
|
if( lc < 16 )
|
|
{
|
|
*op = (uint8_t) lc;
|
|
op++;
|
|
}
|
|
else
|
|
{
|
|
*op = 0;
|
|
op++;
|
|
|
|
size_t lcw = lc - 16;
|
|
|
|
while( lcw > 127 )
|
|
{
|
|
*op = (uint8_t) ( 0x80 | lcw );
|
|
lcw >>= 7;
|
|
op++;
|
|
}
|
|
|
|
*op = (uint8_t) lcw;
|
|
op++;
|
|
}
|
|
|
|
memcpy( op, ipa, lc );
|
|
return( op + lc );
|
|
}
|
|
|
|
/**
|
|
* @brief Function returns buffer size required for LZAV compression.
|
|
*
|
|
* @param srcl The length of the source data to be compressed.
|
|
* @return The required allocation size for destination compression buffer.
|
|
* Always a positive value.
|
|
*/
|
|
|
|
static inline int lzav_compress_bound( const int srcl ) LZAV_NOEX
|
|
{
|
|
if( srcl <= 0 )
|
|
{
|
|
return( 16 );
|
|
}
|
|
|
|
const int k = 16 + 127 + 1;
|
|
const int l2 = srcl / ( k + 6 );
|
|
|
|
return(( srcl - l2 * 6 + k - 1 ) / k * 2 - l2 + srcl + 16 );
|
|
}
|
|
|
|
/**
|
|
* @brief Function returns buffer size required for the higher-ratio LZAV
|
|
* compression.
|
|
*
|
|
* @param srcl The length of the source data to be compressed.
|
|
* @return The required allocation size for destination compression buffer.
|
|
* Always a positive value.
|
|
*/
|
|
|
|
static inline int lzav_compress_bound_hi( const int srcl ) LZAV_NOEX
|
|
{
|
|
if( srcl <= 0 )
|
|
{
|
|
return( 16 );
|
|
}
|
|
|
|
const int l2 = srcl / ( 16 + 5 );
|
|
|
|
return(( srcl - l2 * 5 + 15 ) / 16 * 2 - l2 + srcl + 16 );
|
|
}
|
|
|
|
/**
|
|
* @brief Hash-table initialization function.
|
|
*
|
|
* Function initializes the hash-table by replicating the contents of the
|
|
* specified tuple value.
|
|
*
|
|
* @param[out] ht Hash-table pointer.
|
|
* @param htsize Hash-table size. The size should be a power of 2 value, not
|
|
* lesser than 64 bytes.
|
|
* @param[in] initv Pointer to initialized 8-byte tuple.
|
|
*/
|
|
|
|
static inline void lzav_ht_init( uint8_t* const ht, const size_t htsize,
|
|
const uint32_t* const initv ) LZAV_NOEX
|
|
{
|
|
memcpy( ht, initv, 8 );
|
|
memcpy( ht + 8, initv, 8 );
|
|
memcpy( ht + 16, ht, 16 );
|
|
memcpy( ht + 32, ht, 16 );
|
|
memcpy( ht + 48, ht, 16 );
|
|
|
|
uint8_t* const hte = ht + htsize;
|
|
uint8_t* htc = ht + 64;
|
|
|
|
while( LZAV_LIKELY( htc != hte ))
|
|
{
|
|
memcpy( htc, ht, 16 );
|
|
memcpy( htc + 16, ht, 16 );
|
|
memcpy( htc + 32, ht, 16 );
|
|
memcpy( htc + 48, ht, 16 );
|
|
htc += 64;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* @brief LZAV compression function, with external buffer option.
|
|
*
|
|
* Function performs in-memory data compression using the LZAV compression
|
|
* algorithm and stream format. The function produces a "raw" compressed data,
|
|
* without a header containing data length nor identifier nor checksum.
|
|
*
|
|
* Note that compression algorithm and its output on the same source data may
|
|
* differ between LZAV versions, and may differ between big- and little-endian
|
|
* systems. However, the decompression of a compressed data produced by any
|
|
* prior compressor version will remain possible.
|
|
*
|
|
* @param[in] src Source (uncompressed) data pointer, can be 0 if `srcl`
|
|
* equals 0. Address alignment is unimportant.
|
|
* @param[out] dst Destination (compressed data) buffer pointer. The allocated
|
|
* size should be at least lzav_compress_bound() bytes large. Address
|
|
* alignment is unimportant. Should be different to `src`.
|
|
* @param srcl Source data length, in bytes, can be 0: in this case the
|
|
* compressed length is assumed to be 0 as well.
|
|
* @param dstl Destination buffer's capacity, in bytes.
|
|
* @param ext_buf External buffer to use for hash-table, set to null for the
|
|
* function to manage memory itself (via standard `malloc`). Supplying a
|
|
* pre-allocated buffer is useful if compression is performed during
|
|
* application's operation often: this reduces memory allocation overhead and
|
|
* fragmentation. Note that the access to the supplied buffer is not
|
|
* implicitly thread-safe. Buffer's address must be aligned to 32 bits.
|
|
* @param ext_bufl The capacity of the `ext_buf`, in bytes, should be a
|
|
* power-of-2 value. Set to 0 if `ext_buf` is null. The capacity should not be
|
|
* lesser than `4*srcl`, and for default compression ratio should not be
|
|
* greater than 1 MiB. Same `ext_bufl` value can be used for any smaller
|
|
* source data. Using smaller `ext_bufl` values reduces the compression ratio
|
|
* and, at the same time, increases compression speed. This aspect can be
|
|
* utilized on memory-constrained and low-performance processors.
|
|
* @return The length of compressed data, in bytes. Returns 0 if `srcl` is
|
|
* lesser or equal to 0, or if `dstl` is too small, or if buffer pointers are
|
|
* invalid, or if not enough memory.
|
|
*/
|
|
|
|
static inline int lzav_compress( const void* const src, void* const dst,
|
|
const int srcl, const int dstl, void* const ext_buf,
|
|
const int ext_bufl ) LZAV_NOEX
|
|
{
|
|
if(( srcl <= 0 ) | ( src == LZAV_NULL ) | ( dst == LZAV_NULL ) |
|
|
( src == dst ) | ( dstl < lzav_compress_bound( srcl )))
|
|
{
|
|
return( 0 );
|
|
}
|
|
|
|
const size_t mref = 6; // Minimal reference length, in bytes.
|
|
const size_t mlen = LZAV_REF_LEN + mref;
|
|
|
|
uint8_t* op = (uint8_t*) dst; // Destination (compressed data) pointer.
|
|
*op = (uint8_t) ( LZAV_FMT_CUR << 4 | mref ); // Write prefix byte.
|
|
op++;
|
|
|
|
if( srcl < 16 )
|
|
{
|
|
// Handle a very short source data.
|
|
|
|
*op = (uint8_t) srcl;
|
|
op++;
|
|
|
|
memcpy( op, src, (size_t) srcl );
|
|
|
|
if( srcl > LZAV_LIT_FIN - 1 )
|
|
{
|
|
return( 2 + srcl );
|
|
}
|
|
|
|
memset( op + srcl, 0, (size_t) ( LZAV_LIT_FIN - srcl ));
|
|
return( 2 + LZAV_LIT_FIN );
|
|
}
|
|
|
|
uint32_t stack_buf[ 2048 ]; // On-stack hash-table.
|
|
void* alloc_buf = LZAV_NULL; // Hash-table allocated on heap.
|
|
uint8_t* ht = (uint8_t*) stack_buf; // The actual hash-table pointer.
|
|
|
|
size_t htsize; // Hash-table's size in bytes (power-of-2).
|
|
htsize = ( 1 << 7 ) * sizeof( uint32_t ) * 4;
|
|
|
|
if( ext_buf == LZAV_NULL )
|
|
{
|
|
while( htsize != ( 1 << 20 ) && ( htsize >> 2 ) < (size_t) srcl )
|
|
{
|
|
htsize <<= 1;
|
|
}
|
|
|
|
if( htsize > sizeof( stack_buf ))
|
|
{
|
|
alloc_buf = malloc( htsize );
|
|
|
|
if( alloc_buf == LZAV_NULL )
|
|
{
|
|
return( 0 );
|
|
}
|
|
|
|
ht = (uint8_t*) alloc_buf;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
size_t htsizem;
|
|
|
|
if( ext_bufl > (int) sizeof( stack_buf ))
|
|
{
|
|
htsizem = (size_t) ext_bufl;
|
|
ht = (uint8_t*) ext_buf;
|
|
}
|
|
else
|
|
{
|
|
htsizem = sizeof( stack_buf );
|
|
}
|
|
|
|
while(( htsize >> 2 ) < (size_t) srcl )
|
|
{
|
|
const size_t htsize2 = htsize << 1;
|
|
|
|
if( htsize2 > htsizem )
|
|
{
|
|
break;
|
|
}
|
|
|
|
htsize = htsize2;
|
|
}
|
|
}
|
|
|
|
// Initialize the hash-table. Each hash-table item consists of 2 tuples
|
|
// (4 initial match bytes; 32-bit source data offset). Set source data
|
|
// offset to avoid OOB in back-match.
|
|
|
|
uint32_t initv[ 2 ] = { 0, 16 };
|
|
|
|
if( LZAV_LIKELY( srcl > 19 ))
|
|
{
|
|
memcpy( initv, (const uint8_t*) src + 16, 4 );
|
|
}
|
|
|
|
lzav_ht_init( ht, htsize, initv );
|
|
|
|
const uint32_t hmask = (uint32_t) (( htsize - 1 ) ^ 15 ); // Hash mask.
|
|
const uint8_t* ip = (const uint8_t*) src; // Source data pointer.
|
|
const uint8_t* const ipe = ip + srcl - LZAV_LIT_FIN; // End pointer.
|
|
const uint8_t* const ipet = ipe - 15 + LZAV_LIT_FIN; // Hashing threshold,
|
|
// avoids I/O OOB.
|
|
const uint8_t* ipa = ip; // Literals anchor pointer.
|
|
|
|
uint8_t* cbp = op; // Pointer to the latest offset carry block header.
|
|
int csh = 0; // Offset carry shift.
|
|
|
|
intptr_t mavg = 100 << 21; // Running average of hash match rate (*2^15).
|
|
// Two-factor average: success (0-64) by average reference length.
|
|
uint32_t rndb = 0; // PRNG bit derived from the non-matching offset.
|
|
|
|
ip += 16; // Skip source bytes, to avoid OOB in back-match.
|
|
|
|
while( LZAV_LIKELY( ip < ipet ))
|
|
{
|
|
// Hash source data (endianness is minimally important for compression
|
|
// efficiency).
|
|
|
|
uint32_t iw1;
|
|
uint16_t iw2, ww2;
|
|
memcpy( &iw1, ip, 4 );
|
|
|
|
uint32_t Seed1 = LZAV_HASH_C1;
|
|
uint32_t hval = LZAV_HASH_C2;
|
|
memcpy( &iw2, ip + 4, 2 );
|
|
|
|
Seed1 ^= iw1;
|
|
hval ^= iw2;
|
|
hval *= Seed1;
|
|
hval >>= 12;
|
|
|
|
// Hash-table access.
|
|
|
|
uint32_t* const hp = (uint32_t*) ( ht + ( hval & hmask ));
|
|
const uint32_t ipo = (uint32_t) ( ip - (const uint8_t*) src );
|
|
const uint32_t hw1 = hp[ 0 ]; // Tuple 1's match word.
|
|
const uint8_t* wp; // At window pointer.
|
|
size_t d, ml, rc, lc;
|
|
|
|
// Find source data in hash-table tuples.
|
|
|
|
if( LZAV_LIKELY( iw1 != hw1 ))
|
|
{
|
|
if( LZAV_LIKELY( iw1 != hp[ 2 ]))
|
|
{
|
|
goto _no_match;
|
|
}
|
|
|
|
wp = (const uint8_t*) src + hp[ 3 ];
|
|
memcpy( &ww2, wp + 4, 2 );
|
|
|
|
if( LZAV_UNLIKELY( iw2 != ww2 ))
|
|
{
|
|
goto _no_match;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
wp = (const uint8_t*) src + hp[ 1 ];
|
|
memcpy( &ww2, wp + 4, 2 );
|
|
|
|
if( LZAV_UNLIKELY( iw2 != ww2 ))
|
|
{
|
|
if( LZAV_LIKELY( iw1 != hp[ 2 ]))
|
|
{
|
|
goto _no_match;
|
|
}
|
|
|
|
wp = (const uint8_t*) src + hp[ 3 ];
|
|
memcpy( &ww2, wp + 4, 2 );
|
|
|
|
if( LZAV_UNLIKELY( iw2 != ww2 ))
|
|
{
|
|
goto _no_match;
|
|
}
|
|
}
|
|
}
|
|
|
|
// Source data and hash-table entry matched.
|
|
|
|
d = (size_t) ( ip - wp ); // Reference offset (distance).
|
|
ml = (size_t) ( ipe - ip ); // Max reference match length. Make sure
|
|
// `LZAV_LIT_FIN` literals remain on finish.
|
|
|
|
if( LZAV_UNLIKELY( d - LZAV_OFS_MIN >
|
|
LZAV_WIN_LEN - LZAV_OFS_MIN - 1 ))
|
|
{
|
|
// Small offsets may be inefficient (wrap over 0 for efficiency).
|
|
|
|
goto _d_oob;
|
|
}
|
|
|
|
// Disallow reference copy overlap by using `d` as max match length.
|
|
|
|
ml = ( ml > d ? d : ml );
|
|
ml = ( ml > mlen ? mlen : ml );
|
|
|
|
if( LZAV_LIKELY( d > 273 ))
|
|
{
|
|
// Update a matching entry only if it is not an adjacent
|
|
// replication. Otherwise, source data consisting of same-byte
|
|
// runs won't compress well.
|
|
|
|
if( LZAV_LIKELY( iw1 == hw1 )) // Replace tuple, or insert.
|
|
{
|
|
hp[ 1 ] = ipo;
|
|
}
|
|
else
|
|
{
|
|
hp[ 2 ] = hw1;
|
|
hp[ 3 ] = hp[ 1 ];
|
|
hp[ 0 ] = iw1;
|
|
hp[ 1 ] = ipo;
|
|
}
|
|
}
|
|
|
|
rc = mref + lzav_match_len( ip + mref, wp + mref, ml - mref );
|
|
|
|
lc = (size_t) ( ip - ipa );
|
|
|
|
if( LZAV_UNLIKELY( lc != 0 ))
|
|
{
|
|
// Try to consume literals by finding a match at a back-position.
|
|
|
|
ml -= rc;
|
|
size_t bmc = ( lc > 16 ? 16 : lc );
|
|
|
|
if( LZAV_LIKELY( ml > bmc ))
|
|
{
|
|
ml = bmc;
|
|
}
|
|
|
|
bmc = lzav_match_len_r( ip, wp, ml );
|
|
|
|
if( LZAV_UNLIKELY( bmc != 0 ))
|
|
{
|
|
rc += bmc;
|
|
ip -= bmc;
|
|
lc -= bmc;
|
|
}
|
|
}
|
|
|
|
op = lzav_write_blk_2( op, lc, rc, d, ipa, &cbp, &csh, mref );
|
|
ip += rc;
|
|
ipa = ip;
|
|
mavg += ( (intptr_t) ( rc << 21 ) - mavg ) >> 10;
|
|
continue;
|
|
|
|
_d_oob:
|
|
ip++;
|
|
|
|
if( LZAV_LIKELY( d < LZAV_WIN_LEN ))
|
|
{
|
|
continue;
|
|
}
|
|
|
|
hp[ 1 + ( iw1 != hw1 ) * 2 ] = ipo;
|
|
continue;
|
|
|
|
_no_match:
|
|
hp[ 2 ] = iw1;
|
|
|
|
mavg -= mavg >> 11;
|
|
|
|
hp[ 3 ] = ipo;
|
|
|
|
if( mavg < ( 200 << 14 ) && ip != ipa ) // Speed-up threshold.
|
|
{
|
|
// Compression speed-up technique that keeps the number of hash
|
|
// evaluations around 45% of compressed data length. In some cases
|
|
// reduces the number of blocks by several percent.
|
|
|
|
ip += 1 + rndb; // Use PRNG bit to dither match positions.
|
|
rndb = ipo & 1; // Delay to decorrelate from current match.
|
|
|
|
if( LZAV_UNLIKELY( mavg < ( 130 << 14 )))
|
|
{
|
|
ip++;
|
|
|
|
if( LZAV_UNLIKELY( mavg < ( 100 << 14 )))
|
|
{
|
|
ip += (intptr_t) 100 - ( mavg >> 14 ); // Gradually faster.
|
|
}
|
|
}
|
|
}
|
|
|
|
ip++;
|
|
}
|
|
|
|
if( alloc_buf != LZAV_NULL )
|
|
{
|
|
free( alloc_buf );
|
|
}
|
|
|
|
return( (int) ( lzav_write_fin_2( op, (size_t) ( ipe - ipa +
|
|
LZAV_LIT_FIN ), ipa ) - (uint8_t*) dst ));
|
|
}
|
|
|
|
/**
|
|
* @brief Default LZAV compression function.
|
|
*
|
|
* Function performs in-memory data compression using the LZAV compression
|
|
* algorithm, with the default settings.
|
|
*
|
|
* See the lzav_compress() function for a more detailed description.
|
|
*
|
|
* @param[in] src Source (uncompressed) data pointer.
|
|
* @param[out] dst Destination (compressed data) buffer pointer. The allocated
|
|
* size should be at least lzav_compress_bound() bytes large.
|
|
* @param srcl Source data length, in bytes.
|
|
* @param dstl Destination buffer's capacity, in bytes.
|
|
* @return The length of compressed data, in bytes. Returns 0 if `srcl` is
|
|
* lesser or equal to 0, or if `dstl` is too small, or if not enough memory.
|
|
*/
|
|
|
|
static inline int lzav_compress_default( const void* const src,
|
|
void* const dst, const int srcl, const int dstl ) LZAV_NOEX
|
|
{
|
|
return( lzav_compress( src, dst, srcl, dstl, LZAV_NULL, 0 ));
|
|
}
|
|
|
|
/**
|
|
* @brief Higher-ratio LZAV compression function (much slower).
|
|
*
|
|
* Function performs in-memory data compression using the higher-ratio LZAV
|
|
* compression algorithm.
|
|
*
|
|
* @param[in] src Source (uncompressed) data pointer.
|
|
* @param[out] dst Destination (compressed data) buffer pointer. The allocated
|
|
* size should be at least lzav_compress_bound_hi() bytes large.
|
|
* @param srcl Source data length, in bytes.
|
|
* @param dstl Destination buffer's capacity, in bytes.
|
|
* @return The length of compressed data, in bytes. Returns 0 if `srcl` is
|
|
* lesser or equal to 0, or if `dstl` is too small, or if buffer pointers are
|
|
* invalid, or if not enough memory.
|
|
*/
|
|
|
|
static inline int lzav_compress_hi( const void* const src, void* const dst,
|
|
const int srcl, const int dstl ) LZAV_NOEX
|
|
{
|
|
if(( srcl <= 0 ) | ( src == LZAV_NULL ) | ( dst == LZAV_NULL ) |
|
|
( src == dst ) | ( dstl < lzav_compress_bound_hi( srcl )))
|
|
{
|
|
return( 0 );
|
|
}
|
|
|
|
const size_t mref = 5; // Minimal reference length, in bytes.
|
|
const size_t mlen = LZAV_REF_LEN + mref;
|
|
|
|
uint8_t* op = (uint8_t*) dst; // Destination (compressed data) pointer.
|
|
*op = (uint8_t) ( LZAV_FMT_CUR << 4 | mref ); // Write prefix byte.
|
|
op++;
|
|
|
|
if( srcl < 16 )
|
|
{
|
|
// Handle a very short source data.
|
|
|
|
*op = (uint8_t) srcl;
|
|
op++;
|
|
|
|
memcpy( op, src, (size_t) srcl );
|
|
|
|
if( srcl > LZAV_LIT_FIN - 1 )
|
|
{
|
|
return( 2 + srcl );
|
|
}
|
|
|
|
memset( op + srcl, 0, (size_t) ( LZAV_LIT_FIN - srcl ));
|
|
return( 2 + LZAV_LIT_FIN );
|
|
}
|
|
|
|
size_t htsize; // Hash-table's size in bytes (power-of-2).
|
|
htsize = ( 1 << 7 ) * sizeof( uint32_t ) * 2 * 8;
|
|
|
|
while( htsize != ( 1 << 23 ) && ( htsize >> 2 ) < (size_t) srcl )
|
|
{
|
|
htsize <<= 1;
|
|
}
|
|
|
|
uint8_t* const ht = (uint8_t*) malloc( htsize ); // The hash-table pointer.
|
|
|
|
if( ht == LZAV_NULL )
|
|
{
|
|
return( 0 );
|
|
}
|
|
|
|
// Initialize the hash-table. Each hash-table item consists of 8 tuples
|
|
// (4 initial match bytes; 32-bit source data offset). The last value of
|
|
// the last tuple is used as head tuple offset (an even value).
|
|
|
|
uint32_t initv[ 2 ] = { 0, 0 };
|
|
memcpy( initv, src, 4 );
|
|
|
|
lzav_ht_init( ht, htsize, initv );
|
|
|
|
const uint32_t hmask = (uint32_t) (( htsize - 1 ) ^ 63 ); // Hash mask.
|
|
const uint8_t* ip = (const uint8_t*) src; // Source data pointer.
|
|
const uint8_t* const ipe = ip + srcl - LZAV_LIT_FIN; // End pointer.
|
|
const uint8_t* const ipet = ipe - 15 + LZAV_LIT_FIN; // Hashing threshold,
|
|
// avoids I/O OOB.
|
|
const uint8_t* ipa = ip; // Literals anchor pointer.
|
|
|
|
uint8_t* cbp = op; // Pointer to the latest offset carry block header.
|
|
int csh = 0; // Offset carry shift.
|
|
|
|
size_t prc = 0; // Length of a previously found match.
|
|
size_t pd = 0; // Distance of a previously found match.
|
|
const uint8_t* pip = ip; // Source pointer of a previously found match.
|
|
|
|
while( LZAV_LIKELY( ip < ipet ))
|
|
{
|
|
// Hash source data (endianness is minimally important for compression
|
|
// efficiency).
|
|
|
|
uint32_t iw1;
|
|
memcpy( &iw1, ip, 4 );
|
|
|
|
uint32_t Seed1 = LZAV_HASH_C1;
|
|
uint32_t hval = LZAV_HASH_C2;
|
|
|
|
Seed1 ^= iw1;
|
|
hval ^= ip[ 4 ];
|
|
hval *= Seed1;
|
|
hval >>= 8;
|
|
|
|
// Hash-table access.
|
|
|
|
uint32_t* const hp = (uint32_t*) ( ht + ( hval & hmask ));
|
|
const uint32_t ipo = (uint32_t) ( ip - (const uint8_t*) src );
|
|
size_t ti0 = hp[ 15 ]; // Head tuple offset.
|
|
|
|
// Find source data in hash-table tuples, in up to 7 previous
|
|
// positions.
|
|
|
|
const uint8_t* wp = ip; // Best found window pointer.
|
|
const size_t mle = (size_t) ( ipe - ip ); // Match length bound.
|
|
size_t rc = 0; // Best found match length-4, 0 - not found.
|
|
size_t d; // Reference offset (distance).
|
|
size_t ti = ti0;
|
|
int i;
|
|
|
|
if( LZAV_LIKELY( mlen < mle ))
|
|
{
|
|
// Optimized match-finding.
|
|
|
|
for( i = 0; i < 7; i++ )
|
|
{
|
|
const uint32_t ww1 = hp[ ti ];
|
|
const uint8_t* const wp0 = (const uint8_t*) src + hp[ ti + 1 ];
|
|
d = (size_t) ( ip - wp0 );
|
|
ti = ( ti == 12 ? 0 : ti + 2 );
|
|
|
|
if( iw1 == ww1 )
|
|
{
|
|
d = ( d < 4 ? 4 : d );
|
|
|
|
const size_t rc0 = lzav_match_len( ip + 4, wp0 + 4,
|
|
( d > mlen ? mlen : d ) - 4 );
|
|
|
|
if( rc0 > rc )
|
|
{
|
|
wp = wp0;
|
|
rc = rc0;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
else
|
|
{
|
|
for( i = 0; i < 7; i++ )
|
|
{
|
|
const uint32_t ww1 = hp[ ti ];
|
|
const uint8_t* const wp0 = (const uint8_t*) src + hp[ ti + 1 ];
|
|
d = (size_t) ( ip - wp0 );
|
|
ti = ( ti == 12 ? 0 : ti + 2 );
|
|
|
|
if( iw1 == ww1 )
|
|
{
|
|
// Disallow reference copy overlap by using `d` as max
|
|
// match length. Fix `d` if it is lesser than 4 (this is
|
|
// safe as max `ip` is lesser than `ipe` by `mref` bytes).
|
|
|
|
d = ( d < 4 ? 4 : d );
|
|
|
|
// Make sure `LZAV_LIT_FIN` literals remain on finish.
|
|
|
|
size_t ml = ( mle > d ? d : mle );
|
|
ml = ( ml > mlen ? mlen : ml );
|
|
|
|
const size_t rc0 = lzav_match_len( ip + 4, wp0 + 4,
|
|
ml - 4 );
|
|
|
|
if( rc0 > rc )
|
|
{
|
|
wp = wp0;
|
|
rc = rc0;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
rc += 4;
|
|
d = (size_t) ( ip - wp );
|
|
|
|
if( LZAV_LIKELY( d != rc ))
|
|
{
|
|
// Update hash-table entry, if there was no match, or if the match
|
|
// is not an adjacent replication.
|
|
|
|
ti0 = ( ti0 == 0 ? 12 : ti0 - 2 );
|
|
hp[ ti0 ] = iw1;
|
|
hp[ ti0 + 1 ] = ipo;
|
|
hp[ 15 ] = (uint32_t) ti0;
|
|
}
|
|
|
|
if(( rc < mref + ( d > LZAV_OFS_TH2 )) |
|
|
( d - LZAV_OFS_MIN > LZAV_WIN_LEN - LZAV_OFS_MIN - 1 ))
|
|
{
|
|
ip++;
|
|
continue;
|
|
}
|
|
|
|
// Source data and hash-table entry match of suitable length.
|
|
|
|
const uint8_t* const ip0 = ip;
|
|
size_t lc = (size_t) ( ip - ipa );
|
|
|
|
if( LZAV_UNLIKELY( lc != 0 ))
|
|
{
|
|
// Try to consume literals by finding a match at back-position.
|
|
|
|
size_t ml = ( mle > d ? d : mle );
|
|
ml = ( ml > mlen ? mlen : ml );
|
|
ml -= rc;
|
|
|
|
const size_t wpo = (size_t) ( wp - (const uint8_t*) src );
|
|
|
|
if( LZAV_LIKELY( ml > lc ))
|
|
{
|
|
ml = lc;
|
|
}
|
|
|
|
if( LZAV_UNLIKELY( ml > wpo ))
|
|
{
|
|
ml = wpo;
|
|
}
|
|
|
|
const size_t bmc = lzav_match_len_r( ip, wp, ml );
|
|
|
|
if( LZAV_UNLIKELY( bmc != 0 ))
|
|
{
|
|
rc += bmc;
|
|
ip -= bmc;
|
|
lc -= bmc;
|
|
}
|
|
}
|
|
|
|
if( prc == 0 )
|
|
{
|
|
// Save match for a later comparison.
|
|
|
|
prc = rc;
|
|
pd = d;
|
|
pip = ip;
|
|
ip = ip0 + 1;
|
|
continue;
|
|
}
|
|
|
|
// Block size overhead estimation, and comparison with a previously
|
|
// found match.
|
|
|
|
const size_t plc = (size_t) ( pip - ipa );
|
|
const int lb = ( lc != 0 );
|
|
const int sh0 = 10 + csh;
|
|
const int sh = sh0 + lb * 2;
|
|
const size_t ov = lc + (size_t) lb + ( lc > 15 ) + 2 +
|
|
( d >= ( (size_t) 1 << sh )) +
|
|
( d >= ( (size_t) 1 << ( sh + 8 )));
|
|
|
|
const int plb = ( plc != 0 );
|
|
const int psh = sh0 + plb * 2;
|
|
const size_t pov = plc + (size_t) plb + ( plc > 15 ) + 2 +
|
|
( pd >= ( (size_t) 1 << psh )) +
|
|
( pd >= ( (size_t) 1 << ( psh + 8 )));
|
|
|
|
if( LZAV_LIKELY( prc * ov > rc * pov ))
|
|
{
|
|
const uint8_t* const nipa = pip + prc;
|
|
|
|
if( LZAV_UNLIKELY( nipa <= ip ))
|
|
{
|
|
// A winning previous match does not overlap a current match.
|
|
|
|
op = lzav_write_blk_2( op, plc, prc, pd, ipa, &cbp, &csh,
|
|
mref );
|
|
|
|
ipa = nipa;
|
|
prc = rc;
|
|
pd = d;
|
|
pip = ip;
|
|
ip = ip0 + 1;
|
|
continue;
|
|
}
|
|
|
|
rc = prc;
|
|
d = pd;
|
|
ip = pip;
|
|
lc = plc;
|
|
}
|
|
|
|
op = lzav_write_blk_2( op, lc, rc, d, ipa, &cbp, &csh, mref );
|
|
ip += rc;
|
|
ipa = ip;
|
|
prc = 0;
|
|
}
|
|
|
|
if( prc != 0 )
|
|
{
|
|
op = lzav_write_blk_2( op, (size_t) ( pip - ipa ), prc, pd, ipa, &cbp,
|
|
&csh, mref );
|
|
|
|
ipa = pip + prc;
|
|
}
|
|
|
|
free( ht );
|
|
|
|
return( (int) ( lzav_write_fin_2( op, (size_t) ( ipe - ipa +
|
|
LZAV_LIT_FIN ), ipa ) - (uint8_t*) dst ));
|
|
}
|
|
|
|
/**
|
|
* @def LZAV_LOAD32( a )
|
|
* @brief Defines `bv` and loads 32-bit unsigned value from memory, with
|
|
* endianness-correction.
|
|
*
|
|
* @param a Memory address.
|
|
*/
|
|
|
|
/**
|
|
* @def LZAV_SET_IPD_CV( x, v, sh )
|
|
* @brief Defines `ipd` as pointer to back-reference, checks bounds,
|
|
* updates carry bit variables.
|
|
*
|
|
* @param x Reference offset.
|
|
* @param v Next `cv` value.
|
|
* @param sh Next `csh` value.
|
|
*/
|
|
|
|
/**
|
|
* @def LZAV_SET_IPD( x )
|
|
* @brief Defines `ipd` as pointer to back-reference, checks bounds,
|
|
* resets carry bit variables.
|
|
*
|
|
* @param x Reference offset.
|
|
*/
|
|
|
|
/**
|
|
* @brief Internal LZAV decompression function (stream format 2).
|
|
*
|
|
* Function decompresses "raw" data previously compressed into the LZAV stream
|
|
* format 2.
|
|
*
|
|
* This function should not be called directly since it does not check the
|
|
* format identifier.
|
|
*
|
|
* @param[in] src Source (compressed) data pointer.
|
|
* @param[out] dst Destination (decompressed data) buffer pointer.
|
|
* @param srcl Source data length, in bytes.
|
|
* @param dstl Expected destination data length, in bytes.
|
|
* @param[out] pwl Pointer to variable that receives the number of bytes
|
|
* written to the destination buffer (until error or end of buffer).
|
|
* @return The length of decompressed data, in bytes, or any negative value if
|
|
* some error happened.
|
|
*/
|
|
|
|
static inline int lzav_decompress_2( const void* const src, void* const dst,
|
|
const int srcl, const int dstl, int* const pwl ) LZAV_NOEX
|
|
{
|
|
const uint8_t* ip = (const uint8_t*) src; // Compressed data pointer.
|
|
const uint8_t* const ipe = ip + srcl; // Compressed data boundary pointer.
|
|
const uint8_t* const ipet = ipe - 6; // Block header read threshold.
|
|
uint8_t* op = (uint8_t*) dst; // Destination (decompressed data) pointer.
|
|
uint8_t* const ope = op + dstl; // Destination boundary pointer.
|
|
uint8_t* const opet = ope - 63; // Threshold for fast copy to destination.
|
|
*pwl = dstl;
|
|
const size_t mref1 = (size_t) ( *ip & 15 ) - 1; // Minimal ref length - 1.
|
|
size_t bh; // Current block header, updated in each branch.
|
|
size_t cv = 0; // Reference offset carry value.
|
|
int csh = 0; // Reference offset carry shift.
|
|
|
|
#define LZAV_LOAD32( a ) \
|
|
uint32_t bv; \
|
|
memcpy( &bv, a, 4 ); \
|
|
LZAV_IEC32( bv )
|
|
|
|
#define LZAV_SET_IPD_CV( x, v, sh ) \
|
|
const size_t d = ( x ) << csh | cv; \
|
|
csh = ( sh ); \
|
|
const size_t md = (size_t) ( op - (uint8_t*) dst ); \
|
|
cv = ( v ); \
|
|
ipd = op - d; \
|
|
if( LZAV_UNLIKELY( d > md )) \
|
|
goto _err_refoob
|
|
|
|
#define LZAV_SET_IPD( x ) \
|
|
LZAV_SET_IPD_CV( x, 0, 0 )
|
|
|
|
ip++; // Advance beyond prefix byte.
|
|
|
|
if( LZAV_UNLIKELY( ip >= ipet ))
|
|
{
|
|
goto _err_srcoob;
|
|
}
|
|
|
|
bh = *ip;
|
|
|
|
while( LZAV_LIKELY( ip < ipet ))
|
|
{
|
|
const uint8_t* ipd; // Source data pointer.
|
|
size_t cc; // Byte copy count.
|
|
size_t bt; // Block type.
|
|
|
|
if( LZAV_UNLIKELY(( bh & 0x30 ) == 0 )) // Block type 0.
|
|
{
|
|
size_t ncv = bh >> 6; // Additional offset carry bits.
|
|
ip++;
|
|
cc = bh & 15;
|
|
|
|
if( LZAV_LIKELY( cc != 0 )) // True, if no additional length byte.
|
|
{
|
|
ipd = ip;
|
|
ncv <<= csh;
|
|
ip += cc;
|
|
|
|
if( LZAV_LIKELY(( op < opet ) & ( ipd < ipe - 22 ))) // 15+6+1
|
|
{
|
|
cv |= ncv;
|
|
bh = *ip;
|
|
csh += 2;
|
|
memcpy( op, ipd, 16 );
|
|
op += cc;
|
|
|
|
goto _refblk; // Reference block follows, if not EOS.
|
|
}
|
|
}
|
|
else
|
|
{
|
|
bh = *ip;
|
|
ncv <<= csh;
|
|
cc = bh & 0x7F;
|
|
ip++;
|
|
|
|
if( LZAV_UNLIKELY(( bh & 0x80 ) != 0 ))
|
|
{
|
|
int sh = 7;
|
|
|
|
do
|
|
{
|
|
bh = *ip;
|
|
ip++;
|
|
cc |= ( bh & 0x7F ) << sh;
|
|
|
|
if( sh == 28 ) // No more than 4 additional bytes.
|
|
{
|
|
break;
|
|
}
|
|
|
|
sh += 7;
|
|
|
|
} while(( bh & 0x80 ) != 0 );
|
|
|
|
cc &= 0x7FFFFFFF; // For malformed data.
|
|
}
|
|
|
|
cc += 16;
|
|
ipd = ip;
|
|
ip += cc;
|
|
|
|
uint8_t* const opcc = op + cc;
|
|
|
|
#if defined( LZAV_PTR32 )
|
|
if( LZAV_UNLIKELY(( ip < ipd ) | ( opcc < op )))
|
|
{
|
|
goto _err_ptrovr;
|
|
}
|
|
#endif // defined( LZAV_PTR32 )
|
|
|
|
if( LZAV_LIKELY(( opcc < opet ) & ( ip < ipe - 70 ))) // 63+6+1
|
|
{
|
|
do
|
|
{
|
|
memcpy( op, ipd, 16 );
|
|
memcpy( op + 16, ipd + 16, 16 );
|
|
memcpy( op + 32, ipd + 32, 16 );
|
|
memcpy( op + 48, ipd + 48, 16 );
|
|
op += 64;
|
|
ipd += 64;
|
|
} while( op < opcc );
|
|
|
|
cv |= ncv;
|
|
bh = *ip;
|
|
csh += 2;
|
|
op = opcc;
|
|
|
|
goto _refblk; // Reference block follows, if not EOS.
|
|
}
|
|
}
|
|
|
|
uint8_t* const opcc = op + cc;
|
|
|
|
if( LZAV_UNLIKELY( opcc > ope ))
|
|
{
|
|
if( LZAV_UNLIKELY( ip > ipe ))
|
|
{
|
|
goto _err_srcoob_lit;
|
|
}
|
|
|
|
goto _err_dstoob_lit;
|
|
}
|
|
|
|
if( LZAV_LIKELY( ip < ipe ))
|
|
{
|
|
cv |= ncv;
|
|
bh = *ip;
|
|
csh += 2;
|
|
memcpy( op, ipd, cc );
|
|
op = opcc;
|
|
continue;
|
|
}
|
|
|
|
if( LZAV_UNLIKELY( ip != ipe ))
|
|
{
|
|
goto _err_srcoob_lit;
|
|
}
|
|
|
|
memcpy( op, ipd, cc );
|
|
op = opcc;
|
|
break;
|
|
|
|
_err_srcoob_lit:
|
|
cc = (size_t) ( ipe - ipd );
|
|
|
|
if( cc < (size_t) ( ope - op ))
|
|
{
|
|
memcpy( op, ipd, cc );
|
|
*pwl = (int) ( op + cc - (uint8_t*) dst );
|
|
}
|
|
else
|
|
{
|
|
memcpy( op, ipd, (size_t) ( ope - op ));
|
|
}
|
|
|
|
return( LZAV_E_SRCOOB );
|
|
|
|
_err_dstoob_lit:
|
|
memcpy( op, ipd, (size_t) ( ope - op ));
|
|
return( LZAV_E_DSTOOB );
|
|
}
|
|
|
|
_refblk:
|
|
bt = ( bh >> 4 ) & 3;
|
|
ip++;
|
|
const int bt8 = (int) ( bt << 3 );
|
|
|
|
#if defined( LZAV_X86 )
|
|
|
|
static const uint32_t om[ 4 ] = { 0, 0xFF, 0xFFFF, 0xFFFFFF };
|
|
static const int ocsh[ 4 ] = { 0, 0, 0, 3 };
|
|
|
|
LZAV_LOAD32( ip );
|
|
ip += bt;
|
|
const uint32_t o = bv & om[ bt ];
|
|
bv >>= bt8;
|
|
|
|
const int wcsh = ocsh[ bt ];
|
|
|
|
LZAV_SET_IPD_CV( bh >> 6 | ( o & 0x1FFFFF ) << 2, o >> 21, wcsh );
|
|
|
|
#else // defined( LZAV_X86 )
|
|
|
|
// Memory accesses on RISC are less efficient here.
|
|
|
|
LZAV_LOAD32( ip );
|
|
const uint32_t om = ( (uint32_t) 1 << bt8 ) - 1;
|
|
ip += bt;
|
|
const size_t o = bv & om;
|
|
bv >>= bt8;
|
|
|
|
LZAV_SET_IPD_CV( bh >> 6 | ( o & 0x1FFFFF ) << 2, o >> 21,
|
|
( bt == 3 ? 3 : 0 ));
|
|
|
|
#endif // defined( LZAV_X86 )
|
|
|
|
cc = bh & 15;
|
|
|
|
if( LZAV_LIKELY( cc != 0 )) // True, if no additional length byte.
|
|
{
|
|
cc += mref1;
|
|
bh = bv & 0xFF;
|
|
|
|
if( LZAV_LIKELY( op < opet ))
|
|
{
|
|
if( LZAV_LIKELY( d > 15 ))
|
|
{
|
|
memcpy( op, ipd, 16 );
|
|
memcpy( op + 16, ipd + 16, 4 );
|
|
op += cc;
|
|
continue;
|
|
}
|
|
|
|
if( LZAV_LIKELY( d > 7 ))
|
|
{
|
|
memcpy( op, ipd, 8 );
|
|
memcpy( op + 8, ipd + 8, 8 );
|
|
op += cc;
|
|
continue;
|
|
}
|
|
|
|
if( d > 3 )
|
|
{
|
|
memcpy( op, ipd, 4 );
|
|
memcpy( op + 4, ipd + 4, 4 );
|
|
op += cc;
|
|
continue;
|
|
}
|
|
|
|
goto _err_refoob;
|
|
}
|
|
|
|
if( LZAV_UNLIKELY( cc > d ))
|
|
{
|
|
goto _err_refoob;
|
|
}
|
|
|
|
uint8_t* const opcc = op + cc;
|
|
|
|
if( LZAV_UNLIKELY( opcc > ope ))
|
|
{
|
|
goto _err_dstoob_ref;
|
|
}
|
|
|
|
memcpy( op, ipd, cc );
|
|
op = opcc;
|
|
continue;
|
|
}
|
|
else
|
|
{
|
|
bh = bv & 0xFF;
|
|
ip++;
|
|
cc = 16 + mref1 + bh;
|
|
|
|
if( LZAV_UNLIKELY( bh == 255 ))
|
|
{
|
|
cc += *ip;
|
|
ip++;
|
|
}
|
|
|
|
uint8_t* const opcc = op + cc;
|
|
bh = *ip;
|
|
|
|
if( LZAV_LIKELY(( opcc < opet ) & ( d > 15 )))
|
|
{
|
|
do
|
|
{
|
|
memcpy( op, ipd, 16 );
|
|
memcpy( op + 16, ipd + 16, 16 );
|
|
memcpy( op + 32, ipd + 32, 16 );
|
|
memcpy( op + 48, ipd + 48, 16 );
|
|
op += 64;
|
|
ipd += 64;
|
|
} while( op < opcc );
|
|
|
|
op = opcc;
|
|
continue;
|
|
}
|
|
|
|
if( LZAV_UNLIKELY( cc > d ))
|
|
{
|
|
goto _err_refoob;
|
|
}
|
|
|
|
if( LZAV_UNLIKELY( opcc > ope ))
|
|
{
|
|
goto _err_dstoob_ref;
|
|
}
|
|
|
|
memcpy( op, ipd, cc );
|
|
op = opcc;
|
|
continue;
|
|
}
|
|
|
|
_err_dstoob_ref:
|
|
memcpy( op, ipd, (size_t) ( ope - op ));
|
|
return( LZAV_E_DSTOOB );
|
|
}
|
|
|
|
if( LZAV_UNLIKELY( op != ope ))
|
|
{
|
|
goto _err_dstlen;
|
|
}
|
|
|
|
return( (int) ( op - (uint8_t*) dst ));
|
|
|
|
_err_srcoob:
|
|
*pwl = (int) ( op - (uint8_t*) dst );
|
|
return( LZAV_E_SRCOOB );
|
|
|
|
_err_refoob:
|
|
*pwl = (int) ( op - (uint8_t*) dst );
|
|
return( LZAV_E_REFOOB );
|
|
|
|
_err_dstlen:
|
|
*pwl = (int) ( op - (uint8_t*) dst );
|
|
return( LZAV_E_DSTLEN );
|
|
|
|
#if defined( LZAV_PTR32 )
|
|
_err_ptrovr:
|
|
*pwl = (int) ( op - (uint8_t*) dst );
|
|
return( LZAV_E_PTROVR );
|
|
#endif // defined( LZAV_PTR32 )
|
|
}
|
|
|
|
#if LZAV_FMT_MIN < 2
|
|
|
|
/**
|
|
* @def LZAV_LOAD16( a )
|
|
* @brief Defines `bv` and loads 16-bit unsigned value from memory, with
|
|
* endianness-correction.
|
|
*
|
|
* @param a Memory address.
|
|
*/
|
|
|
|
/**
|
|
* @def LZAV_MEMMOVE( d, s, c )
|
|
* @brief Stack-based `memmove` function which gets optimized into SIMD
|
|
* instructions.
|
|
*
|
|
* @param d Destination address.
|
|
* @param s Source address.
|
|
* @param c Byte copy count (must be a constant).
|
|
*/
|
|
|
|
/**
|
|
* @brief Internal LZAV decompression function (stream format 1).
|
|
*
|
|
* Function decompresses "raw" data previously compressed into the LZAV stream
|
|
* format 1.
|
|
*
|
|
* This function should not be called directly since it does not check the
|
|
* format identifier.
|
|
*
|
|
* @param[in] src Source (compressed) data pointer.
|
|
* @param[out] dst Destination (decompressed data) buffer pointer.
|
|
* @param srcl Source data length, in bytes.
|
|
* @param dstl Expected destination data length, in bytes.
|
|
* @return The length of decompressed data, in bytes, or any negative value if
|
|
* some error happened.
|
|
*/
|
|
|
|
static inline int lzav_decompress_1( const void* const src, void* const dst,
|
|
const int srcl, const int dstl ) LZAV_NOEX
|
|
{
|
|
const uint8_t* ip = (const uint8_t*) src; // Compressed data pointer.
|
|
const uint8_t* const ipe = ip + srcl; // Compressed data boundary pointer.
|
|
const uint8_t* const ipet = ipe - 5; // Block header read threshold.
|
|
uint8_t* op = (uint8_t*) dst; // Destination (decompressed data) pointer.
|
|
uint8_t* const ope = op + dstl; // Destination boundary pointer.
|
|
uint8_t* const opet = ope - 63; // Threshold for fast copy to destination.
|
|
const size_t mref1 = (size_t) ( *ip & 15 ) - 1; // Minimal ref length - 1.
|
|
size_t bh = 0; // Current block header, updated in each branch.
|
|
size_t cv = 0; // Reference offset carry value.
|
|
int csh = 0; // Reference offset carry shift.
|
|
|
|
#if LZAV_LITTLE_ENDIAN
|
|
#define LZAV_LOAD16( a ) \
|
|
uint16_t bv; \
|
|
memcpy( &bv, a, 2 )
|
|
#else // LZAV_LITTLE_ENDIAN
|
|
#define LZAV_LOAD16( a ) \
|
|
uint16_t bv = (uint16_t) ( *( a ) | *( a + 1 ) << 8 )
|
|
#endif // LZAV_LITTLE_ENDIAN
|
|
|
|
#define LZAV_MEMMOVE( d, s, c ) \
|
|
{ uint8_t tmp[ c ]; memcpy( tmp, s, c ); memcpy( d, tmp, c ); } (void) 0
|
|
|
|
ip++; // Advance beyond prefix byte.
|
|
|
|
if( LZAV_UNLIKELY( ip >= ipet ))
|
|
{
|
|
goto _err_srcoob;
|
|
}
|
|
|
|
bh = *ip;
|
|
|
|
while( LZAV_LIKELY( ip < ipet ))
|
|
{
|
|
const uint8_t* ipd; // Source data pointer.
|
|
size_t cc; // Byte copy count.
|
|
|
|
if( LZAV_UNLIKELY(( bh & 0x30 ) == 0 )) // Block type 0.
|
|
{
|
|
cv = bh >> 6;
|
|
csh = 2;
|
|
ip++;
|
|
cc = bh & 15;
|
|
|
|
if( LZAV_LIKELY( cc != 0 )) // True, if no additional length byte.
|
|
{
|
|
ipd = ip;
|
|
ip += cc;
|
|
|
|
if( LZAV_LIKELY(( op < opet ) & ( ipd < ipe - 15 - 6 )))
|
|
{
|
|
bh = *ip;
|
|
memcpy( op, ipd, 16 );
|
|
op += cc;
|
|
goto _refblk; // Reference block follows, if not EOS.
|
|
}
|
|
}
|
|
else
|
|
{
|
|
LZAV_LOAD16( ip );
|
|
|
|
const size_t l2 = (size_t) ( bv & 0xFF );
|
|
cc = 16;
|
|
ip++;
|
|
const int lb = ( l2 == 255 );
|
|
cc += l2 + (( bv >> 8 ) & ( 0x100 - lb ));
|
|
ip += lb;
|
|
|
|
ipd = ip;
|
|
ip += cc;
|
|
|
|
if( LZAV_LIKELY(( op < opet ) & ( ipd < ipe - 63 - 1 )))
|
|
{
|
|
memcpy( op, ipd, 16 );
|
|
memcpy( op + 16, ipd + 16, 16 );
|
|
memcpy( op + 32, ipd + 32, 16 );
|
|
memcpy( op + 48, ipd + 48, 16 );
|
|
|
|
if( LZAV_LIKELY( cc < 65 ))
|
|
{
|
|
bh = *ip;
|
|
op += cc;
|
|
continue;
|
|
}
|
|
|
|
ipd += 64;
|
|
op += 64;
|
|
cc -= 64;
|
|
}
|
|
}
|
|
|
|
if( LZAV_LIKELY( ip < ipe ))
|
|
{
|
|
bh = *ip;
|
|
}
|
|
else
|
|
if( LZAV_UNLIKELY( ip != ipe ))
|
|
{
|
|
goto _err_srcoob;
|
|
}
|
|
|
|
if( LZAV_UNLIKELY( op + cc > ope ))
|
|
{
|
|
goto _err_dstoob;
|
|
}
|
|
|
|
// This and other alike copy-blocks are transformed into fast SIMD
|
|
// instructions, by a modern compiler. Direct use of `memcpy` is
|
|
// slower due to shortness of data remaining to copy, on average.
|
|
|
|
while( cc != 0 )
|
|
{
|
|
*op = *ipd;
|
|
ipd++;
|
|
op++;
|
|
cc--;
|
|
}
|
|
|
|
continue;
|
|
}
|
|
|
|
_refblk:
|
|
cc = bh & 15;
|
|
|
|
if( LZAV_UNLIKELY(( bh & 32 ) == 0 )) // True, if block type 1.
|
|
{
|
|
LZAV_SET_IPD( bh >> 6 | (size_t) ip[ 1 ] << 2 );
|
|
ip += 2;
|
|
bh = *ip;
|
|
}
|
|
else // Block type 2 or 3.
|
|
{
|
|
if( LZAV_LIKELY(( bh & 16 ) == 0 )) // True, if block type 2.
|
|
{
|
|
LZAV_LOAD16( ip + 1 );
|
|
LZAV_SET_IPD( bh >> 6 | (size_t) bv << 2 );
|
|
ip += 3;
|
|
bh = *ip;
|
|
}
|
|
else // Block type 3.
|
|
{
|
|
LZAV_LOAD32( ip + 1 );
|
|
LZAV_SET_IPD_CV( bv & 0xFFFFFF, bh >> 6, 2 );
|
|
ip += 4;
|
|
bh = bv >> 24;
|
|
}
|
|
}
|
|
|
|
if( LZAV_LIKELY( cc != 0 )) // True, if no additional length byte.
|
|
{
|
|
cc += mref1;
|
|
|
|
if( LZAV_LIKELY( op < opet ))
|
|
{
|
|
LZAV_MEMMOVE( op, ipd, 16 );
|
|
LZAV_MEMMOVE( op + 16, ipd + 16, 4 );
|
|
|
|
op += cc;
|
|
continue;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
cc = 16 + mref1 + bh;
|
|
ip++;
|
|
bh = *ip;
|
|
|
|
if( LZAV_LIKELY( op < opet ))
|
|
{
|
|
LZAV_MEMMOVE( op, ipd, 16 );
|
|
LZAV_MEMMOVE( op + 16, ipd + 16, 16 );
|
|
LZAV_MEMMOVE( op + 32, ipd + 32, 16 );
|
|
LZAV_MEMMOVE( op + 48, ipd + 48, 16 );
|
|
|
|
if( LZAV_LIKELY( cc < 65 ))
|
|
{
|
|
op += cc;
|
|
continue;
|
|
}
|
|
|
|
ipd += 64;
|
|
op += 64;
|
|
cc -= 64;
|
|
}
|
|
}
|
|
|
|
if( LZAV_UNLIKELY( op + cc > ope ))
|
|
{
|
|
goto _err_dstoob;
|
|
}
|
|
|
|
while( cc != 0 )
|
|
{
|
|
*op = *ipd;
|
|
ipd++;
|
|
op++;
|
|
cc--;
|
|
}
|
|
}
|
|
|
|
if( LZAV_UNLIKELY( op != ope ))
|
|
{
|
|
goto _err_dstlen;
|
|
}
|
|
|
|
return( (int) ( op - (uint8_t*) dst ));
|
|
|
|
_err_srcoob:
|
|
return( LZAV_E_SRCOOB );
|
|
|
|
_err_dstoob:
|
|
return( LZAV_E_DSTOOB );
|
|
|
|
_err_refoob:
|
|
return( LZAV_E_REFOOB );
|
|
|
|
_err_dstlen:
|
|
return( LZAV_E_DSTLEN );
|
|
}
|
|
|
|
#undef LZAV_LOAD16
|
|
#undef LZAV_MEMMOVE
|
|
|
|
#endif // LZAV_FMT_MIN < 2
|
|
|
|
#undef LZAV_LOAD32
|
|
#undef LZAV_SET_IPD_CV
|
|
#undef LZAV_SET_IPD
|
|
|
|
/**
|
|
* @brief LZAV decompression function (partial).
|
|
*
|
|
* Function decompresses "raw" data previously compressed into the LZAV stream
|
|
* format, for partial or recovery decompression. For example, this function
|
|
* can be used to decompress only an initial segment of a larger data block.
|
|
*
|
|
* @param[in] src Source (compressed) data pointer, can be 0 if `srcl` is 0.
|
|
* Address alignment is unimportant.
|
|
* @param[out] dst Destination (decompressed data) buffer pointer. Address
|
|
* alignment is unimportant. Should be different to `src`.
|
|
* @param srcl Source data length, in bytes, can be 0.
|
|
* @param dstl Destination buffer length, in bytes, can be 0.
|
|
* @return The length of decompressed data, in bytes. Always a non-negative
|
|
* value (error codes are not returned).
|
|
*/
|
|
|
|
static inline int lzav_decompress_partial( const void* const src,
|
|
void* const dst, const int srcl, const int dstl ) LZAV_NOEX
|
|
{
|
|
if( srcl <= 0 || src == LZAV_NULL || dst == LZAV_NULL || src == dst ||
|
|
dstl <= 0 )
|
|
{
|
|
return( 0 );
|
|
}
|
|
|
|
const int fmt = *(const uint8_t*) src >> 4;
|
|
int dl = 0;
|
|
|
|
if( fmt == 2 )
|
|
{
|
|
lzav_decompress_2( src, dst, srcl, dstl, &dl );
|
|
}
|
|
|
|
return( dl );
|
|
}
|
|
|
|
/**
|
|
* @brief LZAV decompression function.
|
|
*
|
|
* Function decompresses "raw" data previously compressed into the LZAV stream
|
|
* format.
|
|
*
|
|
* Note that while the function does perform checks to avoid OOB memory
|
|
* accesses, and checks for decompressed data length equality, this is not a
|
|
* strict guarantee of a valid decompression. In cases when the compressed
|
|
* data is stored in a long-term storage without embedded data integrity
|
|
* mechanisms (e.g., a database without RAID 1 guarantee, a binary container
|
|
* without a digital signature nor CRC), then a checksum (hash) of the
|
|
* original uncompressed data should be stored, and then evaluated against
|
|
* that of the decompressed data. Also, a separate checksum (hash) of
|
|
* application-defined header, which contains uncompressed and compressed data
|
|
* lengths, should be checked before decompression. A high-performance
|
|
* "komihash" hash function can be used to obtain a hash value of the data.
|
|
*
|
|
* @param[in] src Source (compressed) data pointer, can be 0 if `srcl` is 0.
|
|
* Address alignment is unimportant.
|
|
* @param[out] dst Destination (decompressed data) buffer pointer. Address
|
|
* alignment is unimportant. Should be different to `src`.
|
|
* @param srcl Source data length, in bytes, can be 0.
|
|
* @param dstl Expected destination data length, in bytes, can be 0. Should
|
|
* not be confused with the actual size of the destination buffer (which may
|
|
* be larger).
|
|
* @return The length of decompressed data, in bytes, or any negative value if
|
|
* some error happened. Always returns a negative value if the resulting
|
|
* decompressed data length differs from `dstl`. This means that error result
|
|
* handling requires just a check for a negative return value (see the
|
|
* LZAV_ERROR enum for possible values).
|
|
*/
|
|
|
|
static inline int lzav_decompress( const void* const src, void* const dst,
|
|
const int srcl, const int dstl ) LZAV_NOEX
|
|
{
|
|
if( srcl < 0 )
|
|
{
|
|
return( LZAV_E_PARAMS );
|
|
}
|
|
|
|
if( srcl == 0 )
|
|
{
|
|
return( dstl == 0 ? 0 : LZAV_E_PARAMS );
|
|
}
|
|
|
|
if( src == LZAV_NULL || dst == LZAV_NULL || src == dst || dstl <= 0 )
|
|
{
|
|
return( LZAV_E_PARAMS );
|
|
}
|
|
|
|
const int fmt = *(const uint8_t*) src >> 4;
|
|
|
|
if( fmt == 2 )
|
|
{
|
|
int tmp;
|
|
return( lzav_decompress_2( src, dst, srcl, dstl, &tmp ));
|
|
}
|
|
|
|
#if LZAV_FMT_MIN < 2
|
|
if( fmt == 1 )
|
|
{
|
|
return( lzav_decompress_1( src, dst, srcl, dstl ));
|
|
}
|
|
#endif // LZAV_FMT_MIN < 2
|
|
|
|
return( LZAV_E_UNKFMT );
|
|
}
|
|
|
|
#if defined( LZAV_NS )
|
|
|
|
} // namespace LZAV_NS
|
|
|
|
#if !defined( LZAV_NS_CUSTOM )
|
|
|
|
namespace {
|
|
|
|
using namespace LZAV_NS :: enum_wrapper;
|
|
using LZAV_NS :: lzav_compress_bound;
|
|
using LZAV_NS :: lzav_compress_bound_hi;
|
|
using LZAV_NS :: lzav_compress;
|
|
using LZAV_NS :: lzav_compress_default;
|
|
using LZAV_NS :: lzav_compress_hi;
|
|
using LZAV_NS :: lzav_decompress_partial;
|
|
using LZAV_NS :: lzav_decompress;
|
|
|
|
} // namespace
|
|
|
|
#endif // !defined( LZAV_NS_CUSTOM )
|
|
|
|
#endif // defined( LZAV_NS )
|
|
|
|
// Defines for Doxygen.
|
|
|
|
#if !defined( LZAV_NS_CUSTOM )
|
|
#define LZAV_NS_CUSTOM
|
|
#endif // !defined( LZAV_NS_CUSTOM )
|
|
|
|
#undef LZAV_NS_CUSTOM
|
|
#undef LZAV_NOEX
|
|
#undef LZAV_NULL
|
|
#undef LZAV_X86
|
|
#undef LZAV_GCC_BUILTINS
|
|
#undef LZAV_IEC32
|
|
#undef LZAV_LIKELY
|
|
#undef LZAV_UNLIKELY
|
|
#undef LZAV_HASH_C1
|
|
#undef LZAV_HASH_C2
|
|
|
|
#endif // LZAV_INCLUDED
|