mirror of
https://github.com/Feodor2/Mypal68.git
synced 2025-06-18 14:55:44 -04:00
The switch for sse1
This commit is contained in:
parent
7b56d348b6
commit
af8ad28b05
@ -459,7 +459,7 @@ SOURCES += [
|
||||
|
||||
# Are we targeting x86-32 or x86-64? If so, we want to include SSE2 code for
|
||||
# nsTextFragment.cpp
|
||||
if CONFIG['INTEL_ARCHITECTURE']:
|
||||
if CONFIG['INTEL_ARCHITECTURE'] and not CONFIG['THE_SSE1']:
|
||||
SOURCES += ['nsTextFragmentSSE2.cpp']
|
||||
SOURCES['nsTextFragmentSSE2.cpp'].flags += CONFIG['SSE2_FLAGS']
|
||||
|
||||
|
@ -23,7 +23,7 @@ UNIFIED_SOURCES += [
|
||||
]
|
||||
|
||||
# Are we targeting x86 or x64? If so, build SSE2 files.
|
||||
if CONFIG['INTEL_ARCHITECTURE']:
|
||||
if CONFIG['INTEL_ARCHITECTURE'] and not CONFIG['THE_SSE1']:
|
||||
DEFINES['USE_SSE2'] = True
|
||||
|
||||
include('/ipc/chromium/chromium-config.mozbuild')
|
||||
|
@ -130,7 +130,7 @@ if CONFIG['CPU_ARCH'] == 'aarch64' or CONFIG['BUILD_ARM_NEON']:
|
||||
]
|
||||
|
||||
# Are we targeting x86 or x64? If so, build SSE2 files.
|
||||
if CONFIG['INTEL_ARCHITECTURE']:
|
||||
if CONFIG['INTEL_ARCHITECTURE'] and not CONFIG['THE_SSE1']:
|
||||
SOURCES += ['AudioNodeEngineSSE2.cpp']
|
||||
DEFINES['USE_SSE2'] = True
|
||||
SOURCES['AudioNodeEngineSSE2.cpp'].flags += CONFIG['SSE2_FLAGS']
|
||||
|
@ -136,7 +136,7 @@ if CONFIG['MOZ_ENABLE_SKIA']:
|
||||
]
|
||||
|
||||
# Are we targeting x86 or x64? If so, build SSE2 files.
|
||||
if CONFIG['INTEL_ARCHITECTURE']:
|
||||
if CONFIG['INTEL_ARCHITECTURE'] and not CONFIG['THE_SSE1']:
|
||||
SOURCES += [
|
||||
'BlurSSE2.cpp',
|
||||
'FilterProcessingSSE2.cpp',
|
||||
|
@ -79,10 +79,10 @@
|
||||
# undef far
|
||||
#endif
|
||||
|
||||
#if defined(_MSC_VER) && !defined(_M_ARM) && !defined(_M_ARM64)
|
||||
#if defined(_MSC_VER) && !defined(_M_ARM) && !defined(_M_ARM64) && !defined(THE_SSE1)
|
||||
# include <intrin.h>
|
||||
# define ANGLE_USE_SSE
|
||||
#elif defined(__GNUC__) && (defined(__x86_64__) || defined(__i386__))
|
||||
#elif defined(__GNUC__) && (defined(__x86_64__) || defined(__i386__)) && !defined(THE_SSE1)
|
||||
# include <x86intrin.h>
|
||||
# define ANGLE_USE_SSE
|
||||
#endif
|
||||
|
@ -1,6 +1,6 @@
|
||||
AllowCompilerWarnings()
|
||||
|
||||
if CONFIG['INTEL_ARCHITECTURE']:
|
||||
if CONFIG['INTEL_ARCHITECTURE'] and not CONFIG['THE_SSE1']:
|
||||
CXXFLAGS += CONFIG['SSE2_FLAGS']
|
||||
DEFINES['__NDK_FPABI__'] = ''
|
||||
DEFINES['ANGLE_SKIP_DXGI_1_2_CHECK'] = True
|
||||
|
@ -86,8 +86,10 @@ use_sse2 = False
|
||||
use_vmx = False
|
||||
use_arm_simd_gcc = False
|
||||
use_arm_neon_gcc = False
|
||||
if CONFIG['INTEL_ARCHITECTURE']:
|
||||
if CONFIG['INTEL_ARCHITECTURE'] and not CONFIG['THE_SSE1']:
|
||||
use_sse2 = True
|
||||
|
||||
if CONFIG['INTEL_ARCHITECTURE']:
|
||||
if CONFIG['CPU_ARCH'] == 'x86':
|
||||
if CONFIG['CC_TYPE'] == 'clang-cl':
|
||||
use_mmx = True
|
||||
|
@ -490,7 +490,7 @@ static bool AttemptVideoScale(TextureSourceBasic* aSource,
|
||||
const gfx::Rect& aRect,
|
||||
const gfx::Rect& aClipRect, DrawTarget* aDest,
|
||||
const DrawTarget* aBuffer) {
|
||||
#ifdef MOZILLA_SSE_HAVE_CPUID_DETECTION
|
||||
#if defined(MOZILLA_SSE_HAVE_CPUID_DETECTION) && !defined(THE_SSE1)
|
||||
if (!mozilla::supports_ssse3()) return false;
|
||||
if (aNewTransform
|
||||
.IsTranslation()) // unscaled painting should take the regular path
|
||||
@ -563,7 +563,7 @@ static bool AttemptVideoConvertAndScale(
|
||||
WrappingTextureSourceYCbCrBasic* wrappingSource =
|
||||
aSource->AsWrappingTextureSourceYCbCrBasic();
|
||||
if (!wrappingSource) return false;
|
||||
#ifdef MOZILLA_SSE_HAVE_CPUID_DETECTION
|
||||
#if defined(MOZILLA_SSE_HAVE_CPUID_DETECTION) && !defined(THE_SSE1)
|
||||
if (!mozilla::supports_ssse3()) // libyuv requests SSSE3 for fast YUV
|
||||
// conversion.
|
||||
return false;
|
||||
|
@ -23,8 +23,10 @@ if CONFIG['CC_TYPE'] in ('clang', 'gcc'):
|
||||
use_sse1 = False
|
||||
use_sse2 = False
|
||||
use_altivec = False
|
||||
if CONFIG['INTEL_ARCHITECTURE']:
|
||||
if CONFIG['INTEL_ARCHITECTURE'] and not CONFIG['THE_SSE1']:
|
||||
use_sse2 = True
|
||||
|
||||
if CONFIG['INTEL_ARCHITECTURE']:
|
||||
if CONFIG['CC_TYPE'] == 'clang-cl':
|
||||
if CONFIG['OS_ARCH'] != 'WINNT' or CONFIG['CPU_ARCH'] != 'x86_64':
|
||||
use_sse1 = True
|
||||
|
@ -442,6 +442,7 @@ static void qcms_transform_data_graya_out_precache(qcms_transform *transform, un
|
||||
}
|
||||
}
|
||||
|
||||
#if (defined(__POWERPC__) || defined(__powerpc__) && !defined(__NO_FPRS__))
|
||||
static void qcms_transform_data_rgb_out_lut_precache(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length)
|
||||
{
|
||||
unsigned int i;
|
||||
@ -511,6 +512,7 @@ static void qcms_transform_data_rgba_out_lut_precache(qcms_transform *transform,
|
||||
dest += RGBA_OUTPUT_COMPONENTS;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
// Not used
|
||||
/*
|
||||
@ -1011,87 +1013,6 @@ void qcms_transform_release(qcms_transform *t)
|
||||
transform_free(t);
|
||||
}
|
||||
|
||||
#ifdef X86
|
||||
// Determine if we can build with SSE2 (this was partly copied from jmorecfg.h in
|
||||
// mozilla/jpeg)
|
||||
// -------------------------------------------------------------------------
|
||||
#if defined(_M_IX86) && defined(_MSC_VER)
|
||||
#define HAS_CPUID
|
||||
/* Get us a CPUID function. Avoid clobbering EBX because sometimes it's the PIC
|
||||
register - I'm not sure if that ever happens on windows, but cpuid isn't
|
||||
on the critical path so we just preserve the register to be safe and to be
|
||||
consistent with the non-windows version. */
|
||||
static void cpuid(uint32_t fxn, uint32_t *a, uint32_t *b, uint32_t *c, uint32_t *d) {
|
||||
uint32_t a_, b_, c_, d_;
|
||||
__asm {
|
||||
xchg ebx, esi
|
||||
mov eax, fxn
|
||||
cpuid
|
||||
mov a_, eax
|
||||
mov b_, ebx
|
||||
mov c_, ecx
|
||||
mov d_, edx
|
||||
xchg ebx, esi
|
||||
}
|
||||
*a = a_;
|
||||
*b = b_;
|
||||
*c = c_;
|
||||
*d = d_;
|
||||
}
|
||||
#elif (defined(__GNUC__) || defined(__SUNPRO_C)) && (defined(__i386__) || defined(__i386))
|
||||
#define HAS_CPUID
|
||||
/* Get us a CPUID function. We can't use ebx because it's the PIC register on
|
||||
some platforms, so we use ESI instead and save ebx to avoid clobbering it. */
|
||||
static void cpuid(uint32_t fxn, uint32_t *a, uint32_t *b, uint32_t *c, uint32_t *d) {
|
||||
|
||||
uint32_t a_, b_, c_, d_;
|
||||
__asm__ __volatile__ ("xchgl %%ebx, %%esi; cpuid; xchgl %%ebx, %%esi;"
|
||||
: "=a" (a_), "=S" (b_), "=c" (c_), "=d" (d_) : "a" (fxn));
|
||||
*a = a_;
|
||||
*b = b_;
|
||||
*c = c_;
|
||||
*d = d_;
|
||||
}
|
||||
#endif
|
||||
|
||||
// -------------------------Runtime SSEx Detection-----------------------------
|
||||
|
||||
/* MMX is always supported per
|
||||
* Gecko v1.9.1 minimum CPU requirements */
|
||||
#define SSE1_EDX_MASK (1UL << 25)
|
||||
#define SSE2_EDX_MASK (1UL << 26)
|
||||
#define SSE3_ECX_MASK (1UL << 0)
|
||||
|
||||
static int sse_version_available(void)
|
||||
{
|
||||
#if defined(__x86_64__) || defined(__x86_64) || defined(_M_AMD64)
|
||||
/* we know at build time that 64-bit CPUs always have SSE2
|
||||
* this tells the compiler that non-SSE2 branches will never be
|
||||
* taken (i.e. OK to optimze away the SSE1 and non-SIMD code */
|
||||
return 2;
|
||||
#elif defined(HAS_CPUID)
|
||||
static int sse_version = -1;
|
||||
uint32_t a, b, c, d;
|
||||
uint32_t function = 0x00000001;
|
||||
|
||||
if (sse_version == -1) {
|
||||
sse_version = 0;
|
||||
cpuid(function, &a, &b, &c, &d);
|
||||
if (c & SSE3_ECX_MASK)
|
||||
sse_version = 3;
|
||||
else if (d & SSE2_EDX_MASK)
|
||||
sse_version = 2;
|
||||
else if (d & SSE1_EDX_MASK)
|
||||
sse_version = 1;
|
||||
}
|
||||
|
||||
return sse_version;
|
||||
#else
|
||||
return 0;
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
static const struct matrix bradford_matrix = {{ { 0.8951f, 0.2664f,-0.1614f},
|
||||
{-0.7502f, 1.7135f, 0.0367f},
|
||||
{ 0.0389f,-0.0685f, 1.0296f}},
|
||||
@ -1291,24 +1212,21 @@ qcms_transform* qcms_transform_create(
|
||||
return NULL;
|
||||
}
|
||||
if (precache) {
|
||||
#ifdef X86
|
||||
if (sse_version_available() >= 2) {
|
||||
if (in_type == QCMS_DATA_RGB_8)
|
||||
transform->transform_fn = qcms_transform_data_rgb_out_lut_sse2;
|
||||
else
|
||||
transform->transform_fn = qcms_transform_data_rgba_out_lut_sse2;
|
||||
#if defined(X86)
|
||||
#if !defined(THE_SSE1)
|
||||
if (in_type == QCMS_DATA_RGB_8)
|
||||
transform->transform_fn = qcms_transform_data_rgb_out_lut_sse2;
|
||||
else
|
||||
transform->transform_fn = qcms_transform_data_rgba_out_lut_sse2;
|
||||
|
||||
#if !(defined(_MSC_VER) && defined(_M_AMD64))
|
||||
/* Microsoft Compiler for x64 doesn't support MMX.
|
||||
* SSE code uses MMX so that we disable on x64 */
|
||||
} else
|
||||
if (sse_version_available() >= 1) {
|
||||
if (in_type == QCMS_DATA_RGB_8)
|
||||
transform->transform_fn = qcms_transform_data_rgb_out_lut_sse1;
|
||||
else
|
||||
transform->transform_fn = qcms_transform_data_rgba_out_lut_sse1;
|
||||
#elif !(defined(_MSC_VER) && defined(_M_AMD64)) || defined(THE_SSE1)
|
||||
/* Microsoft Compiler for x64 doesn't support MMX.
|
||||
* SSE code uses MMX so that we disable on x64 */
|
||||
if (in_type == QCMS_DATA_RGB_8)
|
||||
transform->transform_fn = qcms_transform_data_rgb_out_lut_sse1;
|
||||
else
|
||||
transform->transform_fn = qcms_transform_data_rgba_out_lut_sse1;
|
||||
#endif
|
||||
} else
|
||||
#endif
|
||||
#if (defined(__POWERPC__) || defined(__powerpc__) && !defined(__NO_FPRS__))
|
||||
if (have_altivec()) {
|
||||
@ -1316,14 +1234,14 @@ qcms_transform* qcms_transform_create(
|
||||
transform->transform_fn = qcms_transform_data_rgb_out_lut_altivec;
|
||||
else
|
||||
transform->transform_fn = qcms_transform_data_rgba_out_lut_altivec;
|
||||
} else
|
||||
} else {
|
||||
|
||||
if (in_type == QCMS_DATA_RGB_8)
|
||||
transform->transform_fn = qcms_transform_data_rgb_out_lut_precache;
|
||||
else
|
||||
transform->transform_fn = qcms_transform_data_rgba_out_lut_precache;
|
||||
}
|
||||
#endif
|
||||
{
|
||||
if (in_type == QCMS_DATA_RGB_8)
|
||||
transform->transform_fn = qcms_transform_data_rgb_out_lut_precache;
|
||||
else
|
||||
transform->transform_fn = qcms_transform_data_rgba_out_lut_precache;
|
||||
}
|
||||
} else {
|
||||
if (in_type == QCMS_DATA_RGB_8)
|
||||
transform->transform_fn = qcms_transform_data_rgb_out_lut;
|
||||
|
@ -160,7 +160,7 @@ elif CONFIG['MOZ_WIDGET_TOOLKIT'] == 'windows':
|
||||
]
|
||||
|
||||
# Are we targeting x86 or x64? If so, build gfxAlphaRecoverySSE2.cpp.
|
||||
if CONFIG['INTEL_ARCHITECTURE']:
|
||||
if CONFIG['INTEL_ARCHITECTURE'] and not CONFIG['THE_SSE1']:
|
||||
SOURCES += ['gfxAlphaRecoverySSE2.cpp']
|
||||
# The file uses SSE2 intrinsics, so it needs special compile flags on some
|
||||
# compilers.
|
||||
|
@ -15,12 +15,13 @@ UNIFIED_SOURCES += [
|
||||
'yuv_row_table.cpp',
|
||||
]
|
||||
|
||||
if CONFIG['INTEL_ARCHITECTURE']:
|
||||
if CONFIG['INTEL_ARCHITECTURE'] and not CONFIG['THE_SSE1']:
|
||||
# These files use MMX and SSE2 intrinsics, so they need special compile flags
|
||||
# on some compilers.
|
||||
SOURCES += ['yuv_convert_sse2.cpp']
|
||||
SOURCES['yuv_convert_sse2.cpp'].flags += CONFIG['SSE2_FLAGS']
|
||||
|
||||
if CONFIG['INTEL_ARCHITECTURE']:
|
||||
# MSVC doesn't support MMX when targeting AMD64.
|
||||
if CONFIG['CC_TYPE'] == 'clang-cl':
|
||||
if CONFIG['CPU_ARCH'] == 'x86':
|
||||
@ -33,7 +34,7 @@ if CONFIG['INTEL_ARCHITECTURE']:
|
||||
|
||||
if CONFIG['CC_TYPE'] == 'clang-cl':
|
||||
if CONFIG['CPU_ARCH'] == 'x86_64' or \
|
||||
(CONFIG['CPU_ARCH'] == 'x86' and CONFIG['CC_TYPE'] == 'clang-cl'):
|
||||
(CONFIG['CPU_ARCH'] == 'x86' and CONFIG['CC_TYPE'] == 'clang-cl' and not CONFIG['THE_SSE1']):
|
||||
SOURCES += [
|
||||
'yuv_row_win64.cpp',
|
||||
]
|
||||
|
@ -579,11 +579,16 @@ case "$target" in
|
||||
dnl more recent, so set that explicitly here unless another
|
||||
dnl target arch has already been set.
|
||||
changequote(,)
|
||||
if test "$THE_SSE1" = 1; then
|
||||
SSE2_FLAGS="-arch:SSE"
|
||||
else
|
||||
SSE2_FLAGS="-arch:SSE2"
|
||||
fi
|
||||
if test -z `echo $CFLAGS | grep -i [-/]arch:` ; then
|
||||
CFLAGS="$CFLAGS -arch:SSE2"
|
||||
CFLAGS="$CFLAGS $SSE2_FLAGS"
|
||||
fi
|
||||
if test -z `echo $CXXFLAGS | grep -i [-/]arch:` ; then
|
||||
CXXFLAGS="$CXXFLAGS -arch:SSE2"
|
||||
CXXFLAGS="$CXXFLAGS $SSE2_FLAGS"
|
||||
fi
|
||||
changequote([,])
|
||||
fi
|
||||
|
@ -69,11 +69,19 @@
|
||||
#define HAVE_MSA 0
|
||||
#define HAVE_NEON 0
|
||||
#define HAVE_SSE 1
|
||||
#ifndef THE_SSE1
|
||||
#define HAVE_SSE2 1
|
||||
#define HAVE_SSE3 1
|
||||
#define HAVE_SSE4_1 1
|
||||
#define HAVE_SSE4_2 1
|
||||
#define HAVE_SSSE3 1
|
||||
#else
|
||||
#define HAVE_SSE2 0
|
||||
#define HAVE_SSE3 0
|
||||
#define HAVE_SSE4_1 0
|
||||
#define HAVE_SSE4_2 0
|
||||
#define HAVE_SSSE3 0
|
||||
#endif
|
||||
#define HAVE_VSX 0
|
||||
#define HAVE_WXWIDGETS 0
|
||||
#define INCLUDE_INSTALL_DIR INSTALLDIR/include
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -52,51 +52,71 @@ extern "C" {
|
||||
#endif
|
||||
|
||||
void apply_selfguided_restoration_c(const uint8_t *dat, int width, int height, int stride, int eps, const int *xqd, uint8_t *dst, int dst_stride, int32_t *tmpbuf, int bit_depth, int highbd);
|
||||
#ifndef THE_SSE1
|
||||
void apply_selfguided_restoration_sse4_1(const uint8_t *dat, int width, int height, int stride, int eps, const int *xqd, uint8_t *dst, int dst_stride, int32_t *tmpbuf, int bit_depth, int highbd);
|
||||
void apply_selfguided_restoration_avx2(const uint8_t *dat, int width, int height, int stride, int eps, const int *xqd, uint8_t *dst, int dst_stride, int32_t *tmpbuf, int bit_depth, int highbd);
|
||||
#endif
|
||||
RTCD_EXTERN void (*apply_selfguided_restoration)(const uint8_t *dat, int width, int height, int stride, int eps, const int *xqd, uint8_t *dst, int dst_stride, int32_t *tmpbuf, int bit_depth, int highbd);
|
||||
|
||||
void av1_build_compound_diffwtd_mask_c(uint8_t *mask, DIFFWTD_MASK_TYPE mask_type, const uint8_t *src0, int src0_stride, const uint8_t *src1, int src1_stride, int h, int w);
|
||||
#ifndef THE_SSE1
|
||||
void av1_build_compound_diffwtd_mask_sse4_1(uint8_t *mask, DIFFWTD_MASK_TYPE mask_type, const uint8_t *src0, int src0_stride, const uint8_t *src1, int src1_stride, int h, int w);
|
||||
void av1_build_compound_diffwtd_mask_avx2(uint8_t *mask, DIFFWTD_MASK_TYPE mask_type, const uint8_t *src0, int src0_stride, const uint8_t *src1, int src1_stride, int h, int w);
|
||||
#endif
|
||||
RTCD_EXTERN void (*av1_build_compound_diffwtd_mask)(uint8_t *mask, DIFFWTD_MASK_TYPE mask_type, const uint8_t *src0, int src0_stride, const uint8_t *src1, int src1_stride, int h, int w);
|
||||
|
||||
void av1_build_compound_diffwtd_mask_d16_c(uint8_t *mask, DIFFWTD_MASK_TYPE mask_type, const CONV_BUF_TYPE *src0, int src0_stride, const CONV_BUF_TYPE *src1, int src1_stride, int h, int w, ConvolveParams *conv_params, int bd);
|
||||
#ifndef THE_SSE1
|
||||
void av1_build_compound_diffwtd_mask_d16_sse4_1(uint8_t *mask, DIFFWTD_MASK_TYPE mask_type, const CONV_BUF_TYPE *src0, int src0_stride, const CONV_BUF_TYPE *src1, int src1_stride, int h, int w, ConvolveParams *conv_params, int bd);
|
||||
void av1_build_compound_diffwtd_mask_d16_avx2(uint8_t *mask, DIFFWTD_MASK_TYPE mask_type, const CONV_BUF_TYPE *src0, int src0_stride, const CONV_BUF_TYPE *src1, int src1_stride, int h, int w, ConvolveParams *conv_params, int bd);
|
||||
#endif
|
||||
RTCD_EXTERN void (*av1_build_compound_diffwtd_mask_d16)(uint8_t *mask, DIFFWTD_MASK_TYPE mask_type, const CONV_BUF_TYPE *src0, int src0_stride, const CONV_BUF_TYPE *src1, int src1_stride, int h, int w, ConvolveParams *conv_params, int bd);
|
||||
|
||||
void av1_build_compound_diffwtd_mask_highbd_c(uint8_t *mask, DIFFWTD_MASK_TYPE mask_type, const uint8_t *src0, int src0_stride, const uint8_t *src1, int src1_stride, int h, int w, int bd);
|
||||
#ifndef THE_SSE1
|
||||
void av1_build_compound_diffwtd_mask_highbd_ssse3(uint8_t *mask, DIFFWTD_MASK_TYPE mask_type, const uint8_t *src0, int src0_stride, const uint8_t *src1, int src1_stride, int h, int w, int bd);
|
||||
void av1_build_compound_diffwtd_mask_highbd_avx2(uint8_t *mask, DIFFWTD_MASK_TYPE mask_type, const uint8_t *src0, int src0_stride, const uint8_t *src1, int src1_stride, int h, int w, int bd);
|
||||
#endif
|
||||
RTCD_EXTERN void (*av1_build_compound_diffwtd_mask_highbd)(uint8_t *mask, DIFFWTD_MASK_TYPE mask_type, const uint8_t *src0, int src0_stride, const uint8_t *src1, int src1_stride, int h, int w, int bd);
|
||||
|
||||
void av1_convolve_2d_copy_sr_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
#ifndef THE_SSE1
|
||||
void av1_convolve_2d_copy_sr_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_convolve_2d_copy_sr_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
#endif
|
||||
RTCD_EXTERN void (*av1_convolve_2d_copy_sr)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
|
||||
void av1_convolve_2d_scale_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_qn, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params);
|
||||
#ifndef THE_SSE1
|
||||
void av1_convolve_2d_scale_sse4_1(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_qn, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params);
|
||||
#endif
|
||||
RTCD_EXTERN void (*av1_convolve_2d_scale)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_qn, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params);
|
||||
|
||||
void av1_convolve_2d_sr_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
#ifndef THE_SSE1
|
||||
void av1_convolve_2d_sr_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_convolve_2d_sr_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
#endif
|
||||
RTCD_EXTERN void (*av1_convolve_2d_sr)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
|
||||
void av1_convolve_horiz_rs_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const int16_t *x_filters, int x0_qn, int x_step_qn);
|
||||
#ifndef THE_SSE1
|
||||
void av1_convolve_horiz_rs_sse4_1(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const int16_t *x_filters, int x0_qn, int x_step_qn);
|
||||
#endif
|
||||
RTCD_EXTERN void (*av1_convolve_horiz_rs)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const int16_t *x_filters, int x0_qn, int x_step_qn);
|
||||
|
||||
void av1_convolve_x_sr_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
#ifndef THE_SSE1
|
||||
void av1_convolve_x_sr_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_convolve_x_sr_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
#endif
|
||||
RTCD_EXTERN void (*av1_convolve_x_sr)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
|
||||
void av1_convolve_y_sr_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
#ifndef THE_SSE1
|
||||
void av1_convolve_y_sr_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_convolve_y_sr_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
#endif
|
||||
RTCD_EXTERN void (*av1_convolve_y_sr)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
|
||||
void av1_dr_prediction_z1_c(uint8_t *dst, ptrdiff_t stride, int bw, int bh, const uint8_t *above, const uint8_t *left, int upsample_above, int dx, int dy);
|
||||
@ -109,15 +129,21 @@ void av1_dr_prediction_z3_c(uint8_t *dst, ptrdiff_t stride, int bw, int bh, cons
|
||||
#define av1_dr_prediction_z3 av1_dr_prediction_z3_c
|
||||
|
||||
void av1_filter_intra_edge_c(uint8_t *p, int sz, int strength);
|
||||
#ifndef THE_SSE1
|
||||
void av1_filter_intra_edge_sse4_1(uint8_t *p, int sz, int strength);
|
||||
#endif
|
||||
RTCD_EXTERN void (*av1_filter_intra_edge)(uint8_t *p, int sz, int strength);
|
||||
|
||||
void av1_filter_intra_edge_high_c(uint16_t *p, int sz, int strength);
|
||||
#ifndef THE_SSE1
|
||||
void av1_filter_intra_edge_high_sse4_1(uint16_t *p, int sz, int strength);
|
||||
#endif
|
||||
RTCD_EXTERN void (*av1_filter_intra_edge_high)(uint16_t *p, int sz, int strength);
|
||||
|
||||
void av1_filter_intra_predictor_c(uint8_t *dst, ptrdiff_t stride, TX_SIZE tx_size, const uint8_t *above, const uint8_t *left, int mode);
|
||||
#ifndef THE_SSE1
|
||||
void av1_filter_intra_predictor_sse4_1(uint8_t *dst, ptrdiff_t stride, TX_SIZE tx_size, const uint8_t *above, const uint8_t *left, int mode);
|
||||
#endif
|
||||
RTCD_EXTERN void (*av1_filter_intra_predictor)(uint8_t *dst, ptrdiff_t stride, TX_SIZE tx_size, const uint8_t *above, const uint8_t *left, int mode);
|
||||
|
||||
void av1_highbd_convolve8_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps);
|
||||
@ -130,17 +156,23 @@ void av1_highbd_convolve8_vert_c(const uint8_t *src, ptrdiff_t src_stride, uint8
|
||||
#define av1_highbd_convolve8_vert av1_highbd_convolve8_vert_c
|
||||
|
||||
void av1_highbd_convolve_2d_copy_sr_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
#ifndef THE_SSE1
|
||||
void av1_highbd_convolve_2d_copy_sr_sse2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_convolve_2d_copy_sr_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
#endif
|
||||
RTCD_EXTERN void (*av1_highbd_convolve_2d_copy_sr)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
|
||||
void av1_highbd_convolve_2d_scale_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params, int bd);
|
||||
#ifndef THE_SSE1
|
||||
void av1_highbd_convolve_2d_scale_sse4_1(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params, int bd);
|
||||
#endif
|
||||
RTCD_EXTERN void (*av1_highbd_convolve_2d_scale)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params, int bd);
|
||||
|
||||
void av1_highbd_convolve_2d_sr_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
#ifndef THE_SSE1
|
||||
void av1_highbd_convolve_2d_sr_ssse3(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_convolve_2d_sr_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
#endif
|
||||
RTCD_EXTERN void (*av1_highbd_convolve_2d_sr)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
|
||||
void av1_highbd_convolve_avg_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps);
|
||||
@ -150,17 +182,23 @@ void av1_highbd_convolve_copy_c(const uint8_t *src, ptrdiff_t src_stride, uint8_
|
||||
#define av1_highbd_convolve_copy av1_highbd_convolve_copy_c
|
||||
|
||||
void av1_highbd_convolve_horiz_rs_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const int16_t *x_filters, int x0_qn, int x_step_qn, int bd);
|
||||
#ifndef THE_SSE1
|
||||
void av1_highbd_convolve_horiz_rs_sse4_1(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const int16_t *x_filters, int x0_qn, int x_step_qn, int bd);
|
||||
#endif
|
||||
RTCD_EXTERN void (*av1_highbd_convolve_horiz_rs)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const int16_t *x_filters, int x0_qn, int x_step_qn, int bd);
|
||||
|
||||
void av1_highbd_convolve_x_sr_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
#ifndef THE_SSE1
|
||||
void av1_highbd_convolve_x_sr_ssse3(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_convolve_x_sr_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
#endif
|
||||
RTCD_EXTERN void (*av1_highbd_convolve_x_sr)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
|
||||
void av1_highbd_convolve_y_sr_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
#ifndef THE_SSE1
|
||||
void av1_highbd_convolve_y_sr_ssse3(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_convolve_y_sr_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
#endif
|
||||
RTCD_EXTERN void (*av1_highbd_convolve_y_sr)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
|
||||
void av1_highbd_dr_prediction_z1_c(uint16_t *dst, ptrdiff_t stride, int bw, int bh, const uint16_t *above, const uint16_t *left, int upsample_above, int dx, int dy, int bd);
|
||||
@ -173,33 +211,47 @@ void av1_highbd_dr_prediction_z3_c(uint16_t *dst, ptrdiff_t stride, int bw, int
|
||||
#define av1_highbd_dr_prediction_z3 av1_highbd_dr_prediction_z3_c
|
||||
|
||||
void av1_highbd_inv_txfm_add_c(const tran_low_t *dqcoeff, uint8_t *dst, int stride, const TxfmParam *txfm_param);
|
||||
#ifndef THE_SSE1
|
||||
void av1_highbd_inv_txfm_add_sse4_1(const tran_low_t *dqcoeff, uint8_t *dst, int stride, const TxfmParam *txfm_param);
|
||||
void av1_highbd_inv_txfm_add_avx2(const tran_low_t *dqcoeff, uint8_t *dst, int stride, const TxfmParam *txfm_param);
|
||||
#endif
|
||||
RTCD_EXTERN void (*av1_highbd_inv_txfm_add)(const tran_low_t *dqcoeff, uint8_t *dst, int stride, const TxfmParam *txfm_param);
|
||||
|
||||
void av1_highbd_inv_txfm_add_16x16_c(const tran_low_t *dqcoeff, uint8_t *dst, int stride, const TxfmParam *txfm_param);
|
||||
#ifndef THE_SSE1
|
||||
void av1_highbd_inv_txfm_add_16x16_sse4_1(const tran_low_t *dqcoeff, uint8_t *dst, int stride, const TxfmParam *txfm_param);
|
||||
#endif
|
||||
RTCD_EXTERN void (*av1_highbd_inv_txfm_add_16x16)(const tran_low_t *dqcoeff, uint8_t *dst, int stride, const TxfmParam *txfm_param);
|
||||
|
||||
void av1_highbd_inv_txfm_add_16x8_c(const tran_low_t *dqcoeff, uint8_t *dst, int stride, const TxfmParam *txfm_param);
|
||||
#ifndef THE_SSE1
|
||||
void av1_highbd_inv_txfm_add_16x8_sse4_1(const tran_low_t *dqcoeff, uint8_t *dst, int stride, const TxfmParam *txfm_param);
|
||||
#endif
|
||||
RTCD_EXTERN void (*av1_highbd_inv_txfm_add_16x8)(const tran_low_t *dqcoeff, uint8_t *dst, int stride, const TxfmParam *txfm_param);
|
||||
|
||||
void av1_highbd_inv_txfm_add_32x32_c(const tran_low_t *dqcoeff, uint8_t *dst, int stride, const TxfmParam *txfm_param);
|
||||
#ifndef THE_SSE1
|
||||
void av1_highbd_inv_txfm_add_32x32_sse4_1(const tran_low_t *dqcoeff, uint8_t *dst, int stride, const TxfmParam *txfm_param);
|
||||
void av1_highbd_inv_txfm_add_32x32_avx2(const tran_low_t *dqcoeff, uint8_t *dst, int stride, const TxfmParam *txfm_param);
|
||||
#endif
|
||||
RTCD_EXTERN void (*av1_highbd_inv_txfm_add_32x32)(const tran_low_t *dqcoeff, uint8_t *dst, int stride, const TxfmParam *txfm_param);
|
||||
|
||||
void av1_highbd_inv_txfm_add_4x4_c(const tran_low_t *dqcoeff, uint8_t *dst, int stride, const TxfmParam *txfm_param);
|
||||
#ifndef THE_SSE1
|
||||
void av1_highbd_inv_txfm_add_4x4_sse4_1(const tran_low_t *dqcoeff, uint8_t *dst, int stride, const TxfmParam *txfm_param);
|
||||
#endif
|
||||
RTCD_EXTERN void (*av1_highbd_inv_txfm_add_4x4)(const tran_low_t *dqcoeff, uint8_t *dst, int stride, const TxfmParam *txfm_param);
|
||||
|
||||
void av1_highbd_inv_txfm_add_8x16_c(const tran_low_t *dqcoeff, uint8_t *dst, int stride, const TxfmParam *txfm_param);
|
||||
#ifndef THE_SSE1
|
||||
void av1_highbd_inv_txfm_add_8x16_sse4_1(const tran_low_t *dqcoeff, uint8_t *dst, int stride, const TxfmParam *txfm_param);
|
||||
#endif
|
||||
RTCD_EXTERN void (*av1_highbd_inv_txfm_add_8x16)(const tran_low_t *dqcoeff, uint8_t *dst, int stride, const TxfmParam *txfm_param);
|
||||
|
||||
void av1_highbd_inv_txfm_add_8x8_c(const tran_low_t *dqcoeff, uint8_t *dst, int stride, const TxfmParam *txfm_param);
|
||||
#ifndef THE_SSE1
|
||||
void av1_highbd_inv_txfm_add_8x8_sse4_1(const tran_low_t *dqcoeff, uint8_t *dst, int stride, const TxfmParam *txfm_param);
|
||||
#endif
|
||||
RTCD_EXTERN void (*av1_highbd_inv_txfm_add_8x8)(const tran_low_t *dqcoeff, uint8_t *dst, int stride, const TxfmParam *txfm_param);
|
||||
|
||||
void av1_highbd_iwht4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride, int bd);
|
||||
@ -209,32 +261,44 @@ void av1_highbd_iwht4x4_1_add_c(const tran_low_t *input, uint8_t *dest, int dest
|
||||
#define av1_highbd_iwht4x4_1_add av1_highbd_iwht4x4_1_add_c
|
||||
|
||||
void av1_highbd_jnt_convolve_2d_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
#ifndef THE_SSE1
|
||||
void av1_highbd_jnt_convolve_2d_sse4_1(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_jnt_convolve_2d_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
#endif
|
||||
RTCD_EXTERN void (*av1_highbd_jnt_convolve_2d)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
|
||||
void av1_highbd_jnt_convolve_2d_copy_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
#ifndef THE_SSE1
|
||||
void av1_highbd_jnt_convolve_2d_copy_sse4_1(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_jnt_convolve_2d_copy_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
#endif
|
||||
RTCD_EXTERN void (*av1_highbd_jnt_convolve_2d_copy)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
|
||||
void av1_highbd_jnt_convolve_x_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
#ifndef THE_SSE1
|
||||
void av1_highbd_jnt_convolve_x_sse4_1(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_jnt_convolve_x_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
#endif
|
||||
RTCD_EXTERN void (*av1_highbd_jnt_convolve_x)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
|
||||
void av1_highbd_jnt_convolve_y_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
#ifndef THE_SSE1
|
||||
void av1_highbd_jnt_convolve_y_sse4_1(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_jnt_convolve_y_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
#endif
|
||||
RTCD_EXTERN void (*av1_highbd_jnt_convolve_y)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
|
||||
void av1_highbd_warp_affine_c(const int32_t *mat, const uint16_t *ref, int width, int height, int stride, uint16_t *pred, int p_col, int p_row, int p_width, int p_height, int p_stride, int subsampling_x, int subsampling_y, int bd, ConvolveParams *conv_params, int16_t alpha, int16_t beta, int16_t gamma, int16_t delta);
|
||||
#ifndef THE_SSE1
|
||||
void av1_highbd_warp_affine_sse4_1(const int32_t *mat, const uint16_t *ref, int width, int height, int stride, uint16_t *pred, int p_col, int p_row, int p_width, int p_height, int p_stride, int subsampling_x, int subsampling_y, int bd, ConvolveParams *conv_params, int16_t alpha, int16_t beta, int16_t gamma, int16_t delta);
|
||||
#endif
|
||||
RTCD_EXTERN void (*av1_highbd_warp_affine)(const int32_t *mat, const uint16_t *ref, int width, int height, int stride, uint16_t *pred, int p_col, int p_row, int p_width, int p_height, int p_stride, int subsampling_x, int subsampling_y, int bd, ConvolveParams *conv_params, int16_t alpha, int16_t beta, int16_t gamma, int16_t delta);
|
||||
|
||||
void av1_highbd_wiener_convolve_add_src_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, const ConvolveParams *conv_params, int bps);
|
||||
#ifndef THE_SSE1
|
||||
void av1_highbd_wiener_convolve_add_src_ssse3(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, const ConvolveParams *conv_params, int bps);
|
||||
void av1_highbd_wiener_convolve_add_src_avx2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, const ConvolveParams *conv_params, int bps);
|
||||
#endif
|
||||
RTCD_EXTERN void (*av1_highbd_wiener_convolve_add_src)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, const ConvolveParams *conv_params, int bps);
|
||||
|
||||
void av1_inv_txfm2d_add_16x16_c(const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd);
|
||||
@ -268,7 +332,9 @@ void av1_inv_txfm2d_add_4x16_c(const int32_t *input, uint16_t *output, int strid
|
||||
#define av1_inv_txfm2d_add_4x16 av1_inv_txfm2d_add_4x16_c
|
||||
|
||||
void av1_inv_txfm2d_add_4x4_c(const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd);
|
||||
#ifndef THE_SSE1
|
||||
void av1_inv_txfm2d_add_4x4_sse4_1(const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd);
|
||||
#endif
|
||||
RTCD_EXTERN void (*av1_inv_txfm2d_add_4x4)(const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd);
|
||||
|
||||
void av1_inv_txfm2d_add_4x8_c(const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd);
|
||||
@ -293,135 +359,183 @@ void av1_inv_txfm2d_add_8x4_c(const int32_t *input, uint16_t *output, int stride
|
||||
#define av1_inv_txfm2d_add_8x4 av1_inv_txfm2d_add_8x4_c
|
||||
|
||||
void av1_inv_txfm2d_add_8x8_c(const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd);
|
||||
#ifndef THE_SSE1
|
||||
void av1_inv_txfm2d_add_8x8_sse4_1(const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd);
|
||||
#endif
|
||||
RTCD_EXTERN void (*av1_inv_txfm2d_add_8x8)(const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd);
|
||||
|
||||
void av1_inv_txfm_add_c(const tran_low_t *dqcoeff, uint8_t *dst, int stride, const TxfmParam *txfm_param);
|
||||
#ifndef THE_SSE1
|
||||
void av1_inv_txfm_add_ssse3(const tran_low_t *dqcoeff, uint8_t *dst, int stride, const TxfmParam *txfm_param);
|
||||
void av1_inv_txfm_add_avx2(const tran_low_t *dqcoeff, uint8_t *dst, int stride, const TxfmParam *txfm_param);
|
||||
#endif
|
||||
RTCD_EXTERN void (*av1_inv_txfm_add)(const tran_low_t *dqcoeff, uint8_t *dst, int stride, const TxfmParam *txfm_param);
|
||||
|
||||
void av1_jnt_convolve_2d_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
#ifndef THE_SSE1
|
||||
void av1_jnt_convolve_2d_ssse3(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_jnt_convolve_2d_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
#endif
|
||||
RTCD_EXTERN void (*av1_jnt_convolve_2d)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
|
||||
void av1_jnt_convolve_2d_copy_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
#ifndef THE_SSE1
|
||||
void av1_jnt_convolve_2d_copy_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_jnt_convolve_2d_copy_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
#endif
|
||||
RTCD_EXTERN void (*av1_jnt_convolve_2d_copy)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
|
||||
void av1_jnt_convolve_x_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
#ifndef THE_SSE1
|
||||
void av1_jnt_convolve_x_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_jnt_convolve_x_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
#endif
|
||||
RTCD_EXTERN void (*av1_jnt_convolve_x)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
|
||||
void av1_jnt_convolve_y_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
#ifndef THE_SSE1
|
||||
void av1_jnt_convolve_y_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_jnt_convolve_y_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
#endif
|
||||
RTCD_EXTERN void (*av1_jnt_convolve_y)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
|
||||
int av1_selfguided_restoration_c(const uint8_t *dgd8, int width, int height,
|
||||
int dgd_stride, int32_t *flt0, int32_t *flt1, int flt_stride,
|
||||
int sgr_params_idx, int bit_depth, int highbd);
|
||||
#ifndef THE_SSE1
|
||||
int av1_selfguided_restoration_sse4_1(const uint8_t *dgd8, int width, int height,
|
||||
int dgd_stride, int32_t *flt0, int32_t *flt1, int flt_stride,
|
||||
int sgr_params_idx, int bit_depth, int highbd);
|
||||
int av1_selfguided_restoration_avx2(const uint8_t *dgd8, int width, int height,
|
||||
int dgd_stride, int32_t *flt0, int32_t *flt1, int flt_stride,
|
||||
int sgr_params_idx, int bit_depth, int highbd);
|
||||
#endif
|
||||
RTCD_EXTERN int (*av1_selfguided_restoration)(const uint8_t *dgd8, int width, int height,
|
||||
int dgd_stride, int32_t *flt0, int32_t *flt1, int flt_stride,
|
||||
int sgr_params_idx, int bit_depth, int highbd);
|
||||
|
||||
void av1_upsample_intra_edge_c(uint8_t *p, int sz);
|
||||
#ifndef THE_SSE1
|
||||
void av1_upsample_intra_edge_sse4_1(uint8_t *p, int sz);
|
||||
#endif
|
||||
RTCD_EXTERN void (*av1_upsample_intra_edge)(uint8_t *p, int sz);
|
||||
|
||||
void av1_upsample_intra_edge_high_c(uint16_t *p, int sz, int bd);
|
||||
#ifndef THE_SSE1
|
||||
void av1_upsample_intra_edge_high_sse4_1(uint16_t *p, int sz, int bd);
|
||||
#endif
|
||||
RTCD_EXTERN void (*av1_upsample_intra_edge_high)(uint16_t *p, int sz, int bd);
|
||||
|
||||
void av1_warp_affine_c(const int32_t *mat, const uint8_t *ref, int width, int height, int stride, uint8_t *pred, int p_col, int p_row, int p_width, int p_height, int p_stride, int subsampling_x, int subsampling_y, ConvolveParams *conv_params, int16_t alpha, int16_t beta, int16_t gamma, int16_t delta);
|
||||
#ifndef THE_SSE1
|
||||
void av1_warp_affine_sse4_1(const int32_t *mat, const uint8_t *ref, int width, int height, int stride, uint8_t *pred, int p_col, int p_row, int p_width, int p_height, int p_stride, int subsampling_x, int subsampling_y, ConvolveParams *conv_params, int16_t alpha, int16_t beta, int16_t gamma, int16_t delta);
|
||||
#endif
|
||||
RTCD_EXTERN void (*av1_warp_affine)(const int32_t *mat, const uint8_t *ref, int width, int height, int stride, uint8_t *pred, int p_col, int p_row, int p_width, int p_height, int p_stride, int subsampling_x, int subsampling_y, ConvolveParams *conv_params, int16_t alpha, int16_t beta, int16_t gamma, int16_t delta);
|
||||
|
||||
void av1_wiener_convolve_add_src_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, const ConvolveParams *conv_params);
|
||||
#ifndef THE_SSE1
|
||||
void av1_wiener_convolve_add_src_sse2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, const ConvolveParams *conv_params);
|
||||
void av1_wiener_convolve_add_src_avx2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, const ConvolveParams *conv_params);
|
||||
#endif
|
||||
RTCD_EXTERN void (*av1_wiener_convolve_add_src)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, const ConvolveParams *conv_params);
|
||||
|
||||
void cdef_filter_block_c(uint8_t *dst8, uint16_t *dst16, int dstride, const uint16_t *in, int pri_strength, int sec_strength, int dir, int pri_damping, int sec_damping, int bsize, int max, int coeff_shift);
|
||||
#ifndef THE_SSE1
|
||||
void cdef_filter_block_sse2(uint8_t *dst8, uint16_t *dst16, int dstride, const uint16_t *in, int pri_strength, int sec_strength, int dir, int pri_damping, int sec_damping, int bsize, int max, int coeff_shift);
|
||||
void cdef_filter_block_ssse3(uint8_t *dst8, uint16_t *dst16, int dstride, const uint16_t *in, int pri_strength, int sec_strength, int dir, int pri_damping, int sec_damping, int bsize, int max, int coeff_shift);
|
||||
void cdef_filter_block_sse4_1(uint8_t *dst8, uint16_t *dst16, int dstride, const uint16_t *in, int pri_strength, int sec_strength, int dir, int pri_damping, int sec_damping, int bsize, int max, int coeff_shift);
|
||||
void cdef_filter_block_avx2(uint8_t *dst8, uint16_t *dst16, int dstride, const uint16_t *in, int pri_strength, int sec_strength, int dir, int pri_damping, int sec_damping, int bsize, int max, int coeff_shift);
|
||||
#endif
|
||||
RTCD_EXTERN void (*cdef_filter_block)(uint8_t *dst8, uint16_t *dst16, int dstride, const uint16_t *in, int pri_strength, int sec_strength, int dir, int pri_damping, int sec_damping, int bsize, int max, int coeff_shift);
|
||||
|
||||
int cdef_find_dir_c(const uint16_t *img, int stride, int32_t *var, int coeff_shift);
|
||||
#ifndef THE_SSE1
|
||||
int cdef_find_dir_sse2(const uint16_t *img, int stride, int32_t *var, int coeff_shift);
|
||||
int cdef_find_dir_ssse3(const uint16_t *img, int stride, int32_t *var, int coeff_shift);
|
||||
int cdef_find_dir_sse4_1(const uint16_t *img, int stride, int32_t *var, int coeff_shift);
|
||||
int cdef_find_dir_avx2(const uint16_t *img, int stride, int32_t *var, int coeff_shift);
|
||||
#endif
|
||||
RTCD_EXTERN int (*cdef_find_dir)(const uint16_t *img, int stride, int32_t *var, int coeff_shift);
|
||||
|
||||
cfl_subsample_hbd_fn cfl_get_luma_subsampling_420_hbd_c(TX_SIZE tx_size);
|
||||
#ifndef THE_SSE1
|
||||
cfl_subsample_hbd_fn cfl_get_luma_subsampling_420_hbd_ssse3(TX_SIZE tx_size);
|
||||
cfl_subsample_hbd_fn cfl_get_luma_subsampling_420_hbd_avx2(TX_SIZE tx_size);
|
||||
#endif
|
||||
RTCD_EXTERN cfl_subsample_hbd_fn (*cfl_get_luma_subsampling_420_hbd)(TX_SIZE tx_size);
|
||||
|
||||
cfl_subsample_lbd_fn cfl_get_luma_subsampling_420_lbd_c(TX_SIZE tx_size);
|
||||
#ifndef THE_SSE1
|
||||
cfl_subsample_lbd_fn cfl_get_luma_subsampling_420_lbd_ssse3(TX_SIZE tx_size);
|
||||
cfl_subsample_lbd_fn cfl_get_luma_subsampling_420_lbd_avx2(TX_SIZE tx_size);
|
||||
#endif
|
||||
RTCD_EXTERN cfl_subsample_lbd_fn (*cfl_get_luma_subsampling_420_lbd)(TX_SIZE tx_size);
|
||||
|
||||
cfl_subsample_hbd_fn cfl_get_luma_subsampling_422_hbd_c(TX_SIZE tx_size);
|
||||
#ifndef THE_SSE1
|
||||
cfl_subsample_hbd_fn cfl_get_luma_subsampling_422_hbd_ssse3(TX_SIZE tx_size);
|
||||
cfl_subsample_hbd_fn cfl_get_luma_subsampling_422_hbd_avx2(TX_SIZE tx_size);
|
||||
#endif
|
||||
RTCD_EXTERN cfl_subsample_hbd_fn (*cfl_get_luma_subsampling_422_hbd)(TX_SIZE tx_size);
|
||||
|
||||
cfl_subsample_lbd_fn cfl_get_luma_subsampling_422_lbd_c(TX_SIZE tx_size);
|
||||
#ifndef THE_SSE1
|
||||
cfl_subsample_lbd_fn cfl_get_luma_subsampling_422_lbd_ssse3(TX_SIZE tx_size);
|
||||
cfl_subsample_lbd_fn cfl_get_luma_subsampling_422_lbd_avx2(TX_SIZE tx_size);
|
||||
#endif
|
||||
RTCD_EXTERN cfl_subsample_lbd_fn (*cfl_get_luma_subsampling_422_lbd)(TX_SIZE tx_size);
|
||||
|
||||
cfl_subsample_hbd_fn cfl_get_luma_subsampling_444_hbd_c(TX_SIZE tx_size);
|
||||
#ifndef THE_SSE1
|
||||
cfl_subsample_hbd_fn cfl_get_luma_subsampling_444_hbd_ssse3(TX_SIZE tx_size);
|
||||
cfl_subsample_hbd_fn cfl_get_luma_subsampling_444_hbd_avx2(TX_SIZE tx_size);
|
||||
#endif
|
||||
RTCD_EXTERN cfl_subsample_hbd_fn (*cfl_get_luma_subsampling_444_hbd)(TX_SIZE tx_size);
|
||||
|
||||
cfl_subsample_lbd_fn cfl_get_luma_subsampling_444_lbd_c(TX_SIZE tx_size);
|
||||
#ifndef THE_SSE1
|
||||
cfl_subsample_lbd_fn cfl_get_luma_subsampling_444_lbd_ssse3(TX_SIZE tx_size);
|
||||
cfl_subsample_lbd_fn cfl_get_luma_subsampling_444_lbd_avx2(TX_SIZE tx_size);
|
||||
#endif
|
||||
RTCD_EXTERN cfl_subsample_lbd_fn (*cfl_get_luma_subsampling_444_lbd)(TX_SIZE tx_size);
|
||||
|
||||
void copy_rect8_16bit_to_16bit_c(uint16_t *dst, int dstride, const uint16_t *src, int sstride, int v, int h);
|
||||
#ifndef THE_SSE1
|
||||
void copy_rect8_16bit_to_16bit_sse2(uint16_t *dst, int dstride, const uint16_t *src, int sstride, int v, int h);
|
||||
void copy_rect8_16bit_to_16bit_ssse3(uint16_t *dst, int dstride, const uint16_t *src, int sstride, int v, int h);
|
||||
void copy_rect8_16bit_to_16bit_sse4_1(uint16_t *dst, int dstride, const uint16_t *src, int sstride, int v, int h);
|
||||
void copy_rect8_16bit_to_16bit_avx2(uint16_t *dst, int dstride, const uint16_t *src, int sstride, int v, int h);
|
||||
#endif
|
||||
RTCD_EXTERN void (*copy_rect8_16bit_to_16bit)(uint16_t *dst, int dstride, const uint16_t *src, int sstride, int v, int h);
|
||||
|
||||
void copy_rect8_8bit_to_16bit_c(uint16_t *dst, int dstride, const uint8_t *src, int sstride, int v, int h);
|
||||
#ifndef THE_SSE1
|
||||
void copy_rect8_8bit_to_16bit_sse2(uint16_t *dst, int dstride, const uint8_t *src, int sstride, int v, int h);
|
||||
void copy_rect8_8bit_to_16bit_ssse3(uint16_t *dst, int dstride, const uint8_t *src, int sstride, int v, int h);
|
||||
void copy_rect8_8bit_to_16bit_sse4_1(uint16_t *dst, int dstride, const uint8_t *src, int sstride, int v, int h);
|
||||
void copy_rect8_8bit_to_16bit_avx2(uint16_t *dst, int dstride, const uint8_t *src, int sstride, int v, int h);
|
||||
#endif
|
||||
RTCD_EXTERN void (*copy_rect8_8bit_to_16bit)(uint16_t *dst, int dstride, const uint8_t *src, int sstride, int v, int h);
|
||||
|
||||
cfl_predict_hbd_fn get_predict_hbd_fn_c(TX_SIZE tx_size);
|
||||
#ifndef THE_SSE1
|
||||
cfl_predict_hbd_fn get_predict_hbd_fn_ssse3(TX_SIZE tx_size);
|
||||
cfl_predict_hbd_fn get_predict_hbd_fn_avx2(TX_SIZE tx_size);
|
||||
#endif
|
||||
RTCD_EXTERN cfl_predict_hbd_fn (*get_predict_hbd_fn)(TX_SIZE tx_size);
|
||||
|
||||
cfl_predict_lbd_fn get_predict_lbd_fn_c(TX_SIZE tx_size);
|
||||
#ifndef THE_SSE1
|
||||
cfl_predict_lbd_fn get_predict_lbd_fn_ssse3(TX_SIZE tx_size);
|
||||
cfl_predict_lbd_fn get_predict_lbd_fn_avx2(TX_SIZE tx_size);
|
||||
#endif
|
||||
RTCD_EXTERN cfl_predict_lbd_fn (*get_predict_lbd_fn)(TX_SIZE tx_size);
|
||||
|
||||
cfl_subtract_average_fn get_subtract_average_fn_c(TX_SIZE tx_size);
|
||||
#ifndef THE_SSE1
|
||||
cfl_subtract_average_fn get_subtract_average_fn_sse2(TX_SIZE tx_size);
|
||||
cfl_subtract_average_fn get_subtract_average_fn_avx2(TX_SIZE tx_size);
|
||||
#endif
|
||||
RTCD_EXTERN cfl_subtract_average_fn (*get_subtract_average_fn)(TX_SIZE tx_size);
|
||||
|
||||
void av1_rtcd(void);
|
||||
@ -430,171 +544,173 @@ void av1_rtcd(void);
|
||||
#include "aom_ports/x86.h"
|
||||
static void setup_rtcd_internal(void)
|
||||
{
|
||||
apply_selfguided_restoration = apply_selfguided_restoration_c;
|
||||
av1_build_compound_diffwtd_mask = av1_build_compound_diffwtd_mask_c;
|
||||
av1_build_compound_diffwtd_mask_d16 = av1_build_compound_diffwtd_mask_d16_c;
|
||||
av1_build_compound_diffwtd_mask_highbd = av1_build_compound_diffwtd_mask_highbd_c;
|
||||
av1_convolve_2d_copy_sr = av1_convolve_2d_copy_sr_c;
|
||||
av1_convolve_2d_scale = av1_convolve_2d_scale_c;
|
||||
av1_convolve_2d_sr = av1_convolve_2d_sr_c;
|
||||
av1_convolve_horiz_rs = av1_convolve_horiz_rs_c;
|
||||
av1_convolve_x_sr = av1_convolve_x_sr_c;
|
||||
av1_convolve_y_sr = av1_convolve_y_sr_c;
|
||||
av1_filter_intra_edge = av1_filter_intra_edge_c;
|
||||
av1_filter_intra_edge_high = av1_filter_intra_edge_high_c;
|
||||
av1_filter_intra_predictor = av1_filter_intra_predictor_c;
|
||||
av1_highbd_convolve_2d_copy_sr = av1_highbd_convolve_2d_copy_sr_c;
|
||||
av1_highbd_convolve_2d_scale = av1_highbd_convolve_2d_scale_c;
|
||||
av1_highbd_convolve_2d_sr = av1_highbd_convolve_2d_sr_c;
|
||||
av1_highbd_convolve_horiz_rs = av1_highbd_convolve_horiz_rs_c;
|
||||
av1_highbd_convolve_x_sr = av1_highbd_convolve_x_sr_c;
|
||||
av1_highbd_convolve_y_sr = av1_highbd_convolve_y_sr_c;
|
||||
av1_highbd_inv_txfm_add = av1_highbd_inv_txfm_add_c;
|
||||
av1_highbd_inv_txfm_add_16x16 = av1_highbd_inv_txfm_add_16x16_c;
|
||||
av1_highbd_inv_txfm_add_16x8 = av1_highbd_inv_txfm_add_16x8_c;
|
||||
av1_highbd_inv_txfm_add_32x32 = av1_highbd_inv_txfm_add_32x32_c;
|
||||
av1_highbd_inv_txfm_add_4x4 = av1_highbd_inv_txfm_add_4x4_c;
|
||||
av1_highbd_inv_txfm_add_8x16 = av1_highbd_inv_txfm_add_8x16_c;
|
||||
av1_highbd_inv_txfm_add_8x8 = av1_highbd_inv_txfm_add_8x8_c;
|
||||
av1_highbd_jnt_convolve_2d = av1_highbd_jnt_convolve_2d_c;
|
||||
av1_highbd_jnt_convolve_2d_copy = av1_highbd_jnt_convolve_2d_copy_c;
|
||||
av1_highbd_jnt_convolve_x = av1_highbd_jnt_convolve_x_c;
|
||||
av1_highbd_jnt_convolve_y = av1_highbd_jnt_convolve_y_c;
|
||||
av1_highbd_warp_affine = av1_highbd_warp_affine_c;
|
||||
av1_highbd_wiener_convolve_add_src = av1_highbd_wiener_convolve_add_src_c;
|
||||
av1_inv_txfm2d_add_4x4 = av1_inv_txfm2d_add_4x4_c;
|
||||
av1_inv_txfm2d_add_8x8 = av1_inv_txfm2d_add_8x8_c;
|
||||
av1_inv_txfm_add = av1_inv_txfm_add_c;
|
||||
av1_jnt_convolve_2d = av1_jnt_convolve_2d_c;
|
||||
av1_jnt_convolve_2d_copy = av1_jnt_convolve_2d_copy_c;
|
||||
av1_jnt_convolve_x = av1_jnt_convolve_x_c;
|
||||
av1_jnt_convolve_y = av1_jnt_convolve_y_c;
|
||||
av1_selfguided_restoration = av1_selfguided_restoration_c;
|
||||
av1_upsample_intra_edge_high = av1_upsample_intra_edge_high_c;
|
||||
av1_upsample_intra_edge = av1_upsample_intra_edge_c;
|
||||
av1_warp_affine = av1_warp_affine_c;
|
||||
av1_wiener_convolve_add_src = av1_wiener_convolve_add_src_c;
|
||||
cdef_filter_block = cdef_filter_block_c;
|
||||
cdef_find_dir = cdef_find_dir_c;
|
||||
cfl_get_luma_subsampling_420_hbd = cfl_get_luma_subsampling_420_hbd_c;
|
||||
cfl_get_luma_subsampling_420_lbd = cfl_get_luma_subsampling_420_lbd_c;
|
||||
cfl_get_luma_subsampling_422_hbd = cfl_get_luma_subsampling_422_hbd_c;
|
||||
cfl_get_luma_subsampling_422_lbd = cfl_get_luma_subsampling_422_lbd_c;
|
||||
cfl_get_luma_subsampling_444_hbd = cfl_get_luma_subsampling_444_hbd_c;
|
||||
cfl_get_luma_subsampling_444_lbd = cfl_get_luma_subsampling_444_lbd_c;
|
||||
copy_rect8_16bit_to_16bit = copy_rect8_16bit_to_16bit_c;
|
||||
copy_rect8_8bit_to_16bit = copy_rect8_8bit_to_16bit_c;
|
||||
get_predict_hbd_fn = get_predict_hbd_fn_c;
|
||||
get_predict_lbd_fn = get_predict_lbd_fn_c;
|
||||
get_subtract_average_fn = get_subtract_average_fn_c;
|
||||
#ifndef THE_SSE1
|
||||
int flags = x86_simd_caps();
|
||||
|
||||
(void)flags;
|
||||
|
||||
apply_selfguided_restoration = apply_selfguided_restoration_c;
|
||||
if (flags & HAS_SSE4_1) apply_selfguided_restoration = apply_selfguided_restoration_sse4_1;
|
||||
if (flags & HAS_AVX2) apply_selfguided_restoration = apply_selfguided_restoration_avx2;
|
||||
av1_build_compound_diffwtd_mask = av1_build_compound_diffwtd_mask_c;
|
||||
if (flags & HAS_SSE4_1) av1_build_compound_diffwtd_mask = av1_build_compound_diffwtd_mask_sse4_1;
|
||||
if (flags & HAS_AVX2) av1_build_compound_diffwtd_mask = av1_build_compound_diffwtd_mask_avx2;
|
||||
av1_build_compound_diffwtd_mask_d16 = av1_build_compound_diffwtd_mask_d16_c;
|
||||
if (flags & HAS_SSE4_1) av1_build_compound_diffwtd_mask_d16 = av1_build_compound_diffwtd_mask_d16_sse4_1;
|
||||
if (flags & HAS_AVX2) av1_build_compound_diffwtd_mask_d16 = av1_build_compound_diffwtd_mask_d16_avx2;
|
||||
av1_build_compound_diffwtd_mask_highbd = av1_build_compound_diffwtd_mask_highbd_c;
|
||||
if (flags & HAS_SSSE3) av1_build_compound_diffwtd_mask_highbd = av1_build_compound_diffwtd_mask_highbd_ssse3;
|
||||
if (flags & HAS_AVX2) av1_build_compound_diffwtd_mask_highbd = av1_build_compound_diffwtd_mask_highbd_avx2;
|
||||
av1_convolve_2d_copy_sr = av1_convolve_2d_copy_sr_c;
|
||||
if (flags & HAS_SSE2) av1_convolve_2d_copy_sr = av1_convolve_2d_copy_sr_sse2;
|
||||
if (flags & HAS_AVX2) av1_convolve_2d_copy_sr = av1_convolve_2d_copy_sr_avx2;
|
||||
av1_convolve_2d_scale = av1_convolve_2d_scale_c;
|
||||
if (flags & HAS_SSE4_1) av1_convolve_2d_scale = av1_convolve_2d_scale_sse4_1;
|
||||
av1_convolve_2d_sr = av1_convolve_2d_sr_c;
|
||||
if (flags & HAS_SSE2) av1_convolve_2d_sr = av1_convolve_2d_sr_sse2;
|
||||
if (flags & HAS_AVX2) av1_convolve_2d_sr = av1_convolve_2d_sr_avx2;
|
||||
av1_convolve_horiz_rs = av1_convolve_horiz_rs_c;
|
||||
if (flags & HAS_SSE4_1) av1_convolve_horiz_rs = av1_convolve_horiz_rs_sse4_1;
|
||||
av1_convolve_x_sr = av1_convolve_x_sr_c;
|
||||
if (flags & HAS_SSE2) av1_convolve_x_sr = av1_convolve_x_sr_sse2;
|
||||
if (flags & HAS_AVX2) av1_convolve_x_sr = av1_convolve_x_sr_avx2;
|
||||
av1_convolve_y_sr = av1_convolve_y_sr_c;
|
||||
if (flags & HAS_SSE2) av1_convolve_y_sr = av1_convolve_y_sr_sse2;
|
||||
if (flags & HAS_AVX2) av1_convolve_y_sr = av1_convolve_y_sr_avx2;
|
||||
av1_filter_intra_edge = av1_filter_intra_edge_c;
|
||||
if (flags & HAS_SSE4_1) av1_filter_intra_edge = av1_filter_intra_edge_sse4_1;
|
||||
av1_filter_intra_edge_high = av1_filter_intra_edge_high_c;
|
||||
if (flags & HAS_SSE4_1) av1_filter_intra_edge_high = av1_filter_intra_edge_high_sse4_1;
|
||||
av1_filter_intra_predictor = av1_filter_intra_predictor_c;
|
||||
if (flags & HAS_SSE4_1) av1_filter_intra_predictor = av1_filter_intra_predictor_sse4_1;
|
||||
av1_highbd_convolve_2d_copy_sr = av1_highbd_convolve_2d_copy_sr_c;
|
||||
if (flags & HAS_SSE2) av1_highbd_convolve_2d_copy_sr = av1_highbd_convolve_2d_copy_sr_sse2;
|
||||
if (flags & HAS_AVX2) av1_highbd_convolve_2d_copy_sr = av1_highbd_convolve_2d_copy_sr_avx2;
|
||||
av1_highbd_convolve_2d_scale = av1_highbd_convolve_2d_scale_c;
|
||||
if (flags & HAS_SSE4_1) av1_highbd_convolve_2d_scale = av1_highbd_convolve_2d_scale_sse4_1;
|
||||
av1_highbd_convolve_2d_sr = av1_highbd_convolve_2d_sr_c;
|
||||
if (flags & HAS_SSSE3) av1_highbd_convolve_2d_sr = av1_highbd_convolve_2d_sr_ssse3;
|
||||
if (flags & HAS_AVX2) av1_highbd_convolve_2d_sr = av1_highbd_convolve_2d_sr_avx2;
|
||||
av1_highbd_convolve_horiz_rs = av1_highbd_convolve_horiz_rs_c;
|
||||
if (flags & HAS_SSE4_1) av1_highbd_convolve_horiz_rs = av1_highbd_convolve_horiz_rs_sse4_1;
|
||||
av1_highbd_convolve_x_sr = av1_highbd_convolve_x_sr_c;
|
||||
if (flags & HAS_SSSE3) av1_highbd_convolve_x_sr = av1_highbd_convolve_x_sr_ssse3;
|
||||
if (flags & HAS_AVX2) av1_highbd_convolve_x_sr = av1_highbd_convolve_x_sr_avx2;
|
||||
av1_highbd_convolve_y_sr = av1_highbd_convolve_y_sr_c;
|
||||
if (flags & HAS_SSSE3) av1_highbd_convolve_y_sr = av1_highbd_convolve_y_sr_ssse3;
|
||||
if (flags & HAS_AVX2) av1_highbd_convolve_y_sr = av1_highbd_convolve_y_sr_avx2;
|
||||
av1_highbd_inv_txfm_add = av1_highbd_inv_txfm_add_c;
|
||||
if (flags & HAS_SSE4_1) av1_highbd_inv_txfm_add = av1_highbd_inv_txfm_add_sse4_1;
|
||||
if (flags & HAS_AVX2) av1_highbd_inv_txfm_add = av1_highbd_inv_txfm_add_avx2;
|
||||
av1_highbd_inv_txfm_add_16x16 = av1_highbd_inv_txfm_add_16x16_c;
|
||||
if (flags & HAS_SSE4_1) av1_highbd_inv_txfm_add_16x16 = av1_highbd_inv_txfm_add_16x16_sse4_1;
|
||||
av1_highbd_inv_txfm_add_16x8 = av1_highbd_inv_txfm_add_16x8_c;
|
||||
if (flags & HAS_SSE4_1) av1_highbd_inv_txfm_add_16x8 = av1_highbd_inv_txfm_add_16x8_sse4_1;
|
||||
av1_highbd_inv_txfm_add_32x32 = av1_highbd_inv_txfm_add_32x32_c;
|
||||
if (flags & HAS_SSE4_1) av1_highbd_inv_txfm_add_32x32 = av1_highbd_inv_txfm_add_32x32_sse4_1;
|
||||
if (flags & HAS_AVX2) av1_highbd_inv_txfm_add_32x32 = av1_highbd_inv_txfm_add_32x32_avx2;
|
||||
av1_highbd_inv_txfm_add_4x4 = av1_highbd_inv_txfm_add_4x4_c;
|
||||
if (flags & HAS_SSE4_1) av1_highbd_inv_txfm_add_4x4 = av1_highbd_inv_txfm_add_4x4_sse4_1;
|
||||
av1_highbd_inv_txfm_add_8x16 = av1_highbd_inv_txfm_add_8x16_c;
|
||||
if (flags & HAS_SSE4_1) av1_highbd_inv_txfm_add_8x16 = av1_highbd_inv_txfm_add_8x16_sse4_1;
|
||||
av1_highbd_inv_txfm_add_8x8 = av1_highbd_inv_txfm_add_8x8_c;
|
||||
if (flags & HAS_SSE4_1) av1_highbd_inv_txfm_add_8x8 = av1_highbd_inv_txfm_add_8x8_sse4_1;
|
||||
av1_highbd_jnt_convolve_2d = av1_highbd_jnt_convolve_2d_c;
|
||||
if (flags & HAS_SSE4_1) av1_highbd_jnt_convolve_2d = av1_highbd_jnt_convolve_2d_sse4_1;
|
||||
if (flags & HAS_AVX2) av1_highbd_jnt_convolve_2d = av1_highbd_jnt_convolve_2d_avx2;
|
||||
av1_highbd_jnt_convolve_2d_copy = av1_highbd_jnt_convolve_2d_copy_c;
|
||||
if (flags & HAS_SSE4_1) av1_highbd_jnt_convolve_2d_copy = av1_highbd_jnt_convolve_2d_copy_sse4_1;
|
||||
if (flags & HAS_AVX2) av1_highbd_jnt_convolve_2d_copy = av1_highbd_jnt_convolve_2d_copy_avx2;
|
||||
av1_highbd_jnt_convolve_x = av1_highbd_jnt_convolve_x_c;
|
||||
if (flags & HAS_SSE4_1) av1_highbd_jnt_convolve_x = av1_highbd_jnt_convolve_x_sse4_1;
|
||||
if (flags & HAS_AVX2) av1_highbd_jnt_convolve_x = av1_highbd_jnt_convolve_x_avx2;
|
||||
av1_highbd_jnt_convolve_y = av1_highbd_jnt_convolve_y_c;
|
||||
if (flags & HAS_SSE4_1) av1_highbd_jnt_convolve_y = av1_highbd_jnt_convolve_y_sse4_1;
|
||||
if (flags & HAS_AVX2) av1_highbd_jnt_convolve_y = av1_highbd_jnt_convolve_y_avx2;
|
||||
av1_highbd_warp_affine = av1_highbd_warp_affine_c;
|
||||
if (flags & HAS_SSE4_1) av1_highbd_warp_affine = av1_highbd_warp_affine_sse4_1;
|
||||
av1_highbd_wiener_convolve_add_src = av1_highbd_wiener_convolve_add_src_c;
|
||||
if (flags & HAS_SSSE3) av1_highbd_wiener_convolve_add_src = av1_highbd_wiener_convolve_add_src_ssse3;
|
||||
if (flags & HAS_AVX2) av1_highbd_wiener_convolve_add_src = av1_highbd_wiener_convolve_add_src_avx2;
|
||||
av1_inv_txfm2d_add_4x4 = av1_inv_txfm2d_add_4x4_c;
|
||||
if (flags & HAS_SSE4_1) av1_inv_txfm2d_add_4x4 = av1_inv_txfm2d_add_4x4_sse4_1;
|
||||
av1_inv_txfm2d_add_8x8 = av1_inv_txfm2d_add_8x8_c;
|
||||
if (flags & HAS_SSE4_1) av1_inv_txfm2d_add_8x8 = av1_inv_txfm2d_add_8x8_sse4_1;
|
||||
av1_inv_txfm_add = av1_inv_txfm_add_c;
|
||||
if (flags & HAS_SSSE3) av1_inv_txfm_add = av1_inv_txfm_add_ssse3;
|
||||
if (flags & HAS_AVX2) av1_inv_txfm_add = av1_inv_txfm_add_avx2;
|
||||
av1_jnt_convolve_2d = av1_jnt_convolve_2d_c;
|
||||
if (flags & HAS_SSSE3) av1_jnt_convolve_2d = av1_jnt_convolve_2d_ssse3;
|
||||
if (flags & HAS_AVX2) av1_jnt_convolve_2d = av1_jnt_convolve_2d_avx2;
|
||||
av1_jnt_convolve_2d_copy = av1_jnt_convolve_2d_copy_c;
|
||||
if (flags & HAS_SSE2) av1_jnt_convolve_2d_copy = av1_jnt_convolve_2d_copy_sse2;
|
||||
if (flags & HAS_AVX2) av1_jnt_convolve_2d_copy = av1_jnt_convolve_2d_copy_avx2;
|
||||
av1_jnt_convolve_x = av1_jnt_convolve_x_c;
|
||||
if (flags & HAS_SSE2) av1_jnt_convolve_x = av1_jnt_convolve_x_sse2;
|
||||
if (flags & HAS_AVX2) av1_jnt_convolve_x = av1_jnt_convolve_x_avx2;
|
||||
av1_jnt_convolve_y = av1_jnt_convolve_y_c;
|
||||
if (flags & HAS_SSE2) av1_jnt_convolve_y = av1_jnt_convolve_y_sse2;
|
||||
if (flags & HAS_AVX2) av1_jnt_convolve_y = av1_jnt_convolve_y_avx2;
|
||||
av1_selfguided_restoration = av1_selfguided_restoration_c;
|
||||
if (flags & HAS_SSE4_1) av1_selfguided_restoration = av1_selfguided_restoration_sse4_1;
|
||||
if (flags & HAS_AVX2) av1_selfguided_restoration = av1_selfguided_restoration_avx2;
|
||||
av1_upsample_intra_edge = av1_upsample_intra_edge_c;
|
||||
if (flags & HAS_SSE4_1) av1_upsample_intra_edge = av1_upsample_intra_edge_sse4_1;
|
||||
av1_upsample_intra_edge_high = av1_upsample_intra_edge_high_c;
|
||||
if (flags & HAS_SSE4_1) av1_upsample_intra_edge_high = av1_upsample_intra_edge_high_sse4_1;
|
||||
av1_warp_affine = av1_warp_affine_c;
|
||||
if (flags & HAS_SSE4_1) av1_warp_affine = av1_warp_affine_sse4_1;
|
||||
av1_wiener_convolve_add_src = av1_wiener_convolve_add_src_c;
|
||||
if (flags & HAS_SSE2) av1_wiener_convolve_add_src = av1_wiener_convolve_add_src_sse2;
|
||||
if (flags & HAS_AVX2) av1_wiener_convolve_add_src = av1_wiener_convolve_add_src_avx2;
|
||||
cdef_filter_block = cdef_filter_block_c;
|
||||
if (flags & HAS_SSE2) cdef_filter_block = cdef_filter_block_sse2;
|
||||
if (flags & HAS_SSSE3) cdef_filter_block = cdef_filter_block_ssse3;
|
||||
if (flags & HAS_SSE4_1) cdef_filter_block = cdef_filter_block_sse4_1;
|
||||
if (flags & HAS_AVX2) cdef_filter_block = cdef_filter_block_avx2;
|
||||
cdef_find_dir = cdef_find_dir_c;
|
||||
if (flags & HAS_SSE2) cdef_find_dir = cdef_find_dir_sse2;
|
||||
if (flags & HAS_SSSE3) cdef_find_dir = cdef_find_dir_ssse3;
|
||||
if (flags & HAS_SSE4_1) cdef_find_dir = cdef_find_dir_sse4_1;
|
||||
if (flags & HAS_AVX2) cdef_find_dir = cdef_find_dir_avx2;
|
||||
cfl_get_luma_subsampling_420_hbd = cfl_get_luma_subsampling_420_hbd_c;
|
||||
if (flags & HAS_SSSE3) cfl_get_luma_subsampling_420_hbd = cfl_get_luma_subsampling_420_hbd_ssse3;
|
||||
if (flags & HAS_AVX2) cfl_get_luma_subsampling_420_hbd = cfl_get_luma_subsampling_420_hbd_avx2;
|
||||
cfl_get_luma_subsampling_420_lbd = cfl_get_luma_subsampling_420_lbd_c;
|
||||
if (flags & HAS_SSSE3) cfl_get_luma_subsampling_420_lbd = cfl_get_luma_subsampling_420_lbd_ssse3;
|
||||
if (flags & HAS_AVX2) cfl_get_luma_subsampling_420_lbd = cfl_get_luma_subsampling_420_lbd_avx2;
|
||||
cfl_get_luma_subsampling_422_hbd = cfl_get_luma_subsampling_422_hbd_c;
|
||||
if (flags & HAS_SSSE3) cfl_get_luma_subsampling_422_hbd = cfl_get_luma_subsampling_422_hbd_ssse3;
|
||||
if (flags & HAS_AVX2) cfl_get_luma_subsampling_422_hbd = cfl_get_luma_subsampling_422_hbd_avx2;
|
||||
cfl_get_luma_subsampling_422_lbd = cfl_get_luma_subsampling_422_lbd_c;
|
||||
if (flags & HAS_SSSE3) cfl_get_luma_subsampling_422_lbd = cfl_get_luma_subsampling_422_lbd_ssse3;
|
||||
if (flags & HAS_AVX2) cfl_get_luma_subsampling_422_lbd = cfl_get_luma_subsampling_422_lbd_avx2;
|
||||
cfl_get_luma_subsampling_444_hbd = cfl_get_luma_subsampling_444_hbd_c;
|
||||
if (flags & HAS_SSSE3) cfl_get_luma_subsampling_444_hbd = cfl_get_luma_subsampling_444_hbd_ssse3;
|
||||
if (flags & HAS_AVX2) cfl_get_luma_subsampling_444_hbd = cfl_get_luma_subsampling_444_hbd_avx2;
|
||||
cfl_get_luma_subsampling_444_lbd = cfl_get_luma_subsampling_444_lbd_c;
|
||||
if (flags & HAS_SSSE3) cfl_get_luma_subsampling_444_lbd = cfl_get_luma_subsampling_444_lbd_ssse3;
|
||||
if (flags & HAS_AVX2) cfl_get_luma_subsampling_444_lbd = cfl_get_luma_subsampling_444_lbd_avx2;
|
||||
copy_rect8_16bit_to_16bit = copy_rect8_16bit_to_16bit_c;
|
||||
if (flags & HAS_SSE2) copy_rect8_16bit_to_16bit = copy_rect8_16bit_to_16bit_sse2;
|
||||
if (flags & HAS_SSSE3) copy_rect8_16bit_to_16bit = copy_rect8_16bit_to_16bit_ssse3;
|
||||
if (flags & HAS_SSE4_1) copy_rect8_16bit_to_16bit = copy_rect8_16bit_to_16bit_sse4_1;
|
||||
if (flags & HAS_AVX2) copy_rect8_16bit_to_16bit = copy_rect8_16bit_to_16bit_avx2;
|
||||
copy_rect8_8bit_to_16bit = copy_rect8_8bit_to_16bit_c;
|
||||
if (flags & HAS_SSE2) copy_rect8_8bit_to_16bit = copy_rect8_8bit_to_16bit_sse2;
|
||||
if (flags & HAS_SSSE3) copy_rect8_8bit_to_16bit = copy_rect8_8bit_to_16bit_ssse3;
|
||||
if (flags & HAS_SSE4_1) copy_rect8_8bit_to_16bit = copy_rect8_8bit_to_16bit_sse4_1;
|
||||
if (flags & HAS_AVX2) copy_rect8_8bit_to_16bit = copy_rect8_8bit_to_16bit_avx2;
|
||||
get_predict_hbd_fn = get_predict_hbd_fn_c;
|
||||
if (flags & HAS_SSSE3) get_predict_hbd_fn = get_predict_hbd_fn_ssse3;
|
||||
if (flags & HAS_AVX2) get_predict_hbd_fn = get_predict_hbd_fn_avx2;
|
||||
get_predict_lbd_fn = get_predict_lbd_fn_c;
|
||||
if (flags & HAS_SSSE3) get_predict_lbd_fn = get_predict_lbd_fn_ssse3;
|
||||
if (flags & HAS_AVX2) get_predict_lbd_fn = get_predict_lbd_fn_avx2;
|
||||
get_subtract_average_fn = get_subtract_average_fn_c;
|
||||
if (flags & HAS_SSE2) get_subtract_average_fn = get_subtract_average_fn_sse2;
|
||||
if (flags & HAS_AVX2) get_subtract_average_fn = get_subtract_average_fn_avx2;
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
|
@ -35,6 +35,8 @@ if CONFIG['CPU_ARCH'] == 'x86_64':
|
||||
elif CONFIG['CPU_ARCH'] == 'x86':
|
||||
EXPORTS.aom += files['IA32_EXPORTS']
|
||||
SOURCES += files['IA32_SOURCES']
|
||||
if not CONFIG['THE_SSE1']:
|
||||
SOURCES += files['IA32_SSE2_SOURCES']
|
||||
USE_YASM = True
|
||||
if CONFIG['OS_TARGET'] == 'WINNT':
|
||||
if CONFIG['CC_TYPE'] == 'gcc':
|
||||
|
@ -247,6 +247,60 @@ files = {
|
||||
'../../third_party/aom/aom_dsp/loopfilter.c',
|
||||
'../../third_party/aom/aom_dsp/subtract.c',
|
||||
'../../third_party/aom/aom_dsp/x86/aom_asm_stubs.c',
|
||||
'../../third_party/aom/aom_mem/aom_mem.c',
|
||||
'../../third_party/aom/aom_ports/emms.asm',
|
||||
'../../third_party/aom/aom_ports/x86_abi_support.asm',
|
||||
'../../third_party/aom/aom_scale/aom_scale_rtcd.c',
|
||||
'../../third_party/aom/aom_scale/generic/aom_scale.c',
|
||||
'../../third_party/aom/aom_scale/generic/gen_scalers.c',
|
||||
'../../third_party/aom/aom_scale/generic/yv12config.c',
|
||||
'../../third_party/aom/aom_scale/generic/yv12extend.c',
|
||||
'../../third_party/aom/aom_util/aom_thread.c',
|
||||
'../../third_party/aom/aom_util/debug_util.c',
|
||||
'../../third_party/aom/av1/av1_dx_iface.c',
|
||||
'../../third_party/aom/av1/common/alloccommon.c',
|
||||
'../../third_party/aom/av1/common/av1_inv_txfm1d.c',
|
||||
'../../third_party/aom/av1/common/av1_inv_txfm2d.c',
|
||||
'../../third_party/aom/av1/common/av1_loopfilter.c',
|
||||
'../../third_party/aom/av1/common/av1_rtcd.c',
|
||||
'../../third_party/aom/av1/common/av1_txfm.c',
|
||||
'../../third_party/aom/av1/common/blockd.c',
|
||||
'../../third_party/aom/av1/common/cdef.c',
|
||||
'../../third_party/aom/av1/common/cdef_block.c',
|
||||
'../../third_party/aom/av1/common/cfl.c',
|
||||
'../../third_party/aom/av1/common/convolve.c',
|
||||
'../../third_party/aom/av1/common/debugmodes.c',
|
||||
'../../third_party/aom/av1/common/entropy.c',
|
||||
'../../third_party/aom/av1/common/entropymode.c',
|
||||
'../../third_party/aom/av1/common/entropymv.c',
|
||||
'../../third_party/aom/av1/common/frame_buffers.c',
|
||||
'../../third_party/aom/av1/common/idct.c',
|
||||
'../../third_party/aom/av1/common/mvref_common.c',
|
||||
'../../third_party/aom/av1/common/obu_util.c',
|
||||
'../../third_party/aom/av1/common/odintrin.c',
|
||||
'../../third_party/aom/av1/common/pred_common.c',
|
||||
'../../third_party/aom/av1/common/quant_common.c',
|
||||
'../../third_party/aom/av1/common/reconinter.c',
|
||||
'../../third_party/aom/av1/common/reconintra.c',
|
||||
'../../third_party/aom/av1/common/resize.c',
|
||||
'../../third_party/aom/av1/common/restoration.c',
|
||||
'../../third_party/aom/av1/common/scale.c',
|
||||
'../../third_party/aom/av1/common/scan.c',
|
||||
'../../third_party/aom/av1/common/seg_common.c',
|
||||
'../../third_party/aom/av1/common/thread_common.c',
|
||||
'../../third_party/aom/av1/common/tile_common.c',
|
||||
'../../third_party/aom/av1/common/timing.c',
|
||||
'../../third_party/aom/av1/common/txb_common.c',
|
||||
'../../third_party/aom/av1/common/warped_motion.c',
|
||||
'../../third_party/aom/av1/decoder/decodeframe.c',
|
||||
'../../third_party/aom/av1/decoder/decodemv.c',
|
||||
'../../third_party/aom/av1/decoder/decoder.c',
|
||||
'../../third_party/aom/av1/decoder/decodetxb.c',
|
||||
'../../third_party/aom/av1/decoder/detokenize.c',
|
||||
'../../third_party/aom/av1/decoder/dthread.c',
|
||||
'../../third_party/aom/av1/decoder/obu.c',
|
||||
],
|
||||
'IA32_SSE2_SOURCES': [
|
||||
'../../third_party/aom/aom_dsp/x86/aom_convolve_copy_sse2.asm',
|
||||
'../../third_party/aom/aom_dsp/x86/aom_high_subpixel_8t_sse2.asm',
|
||||
'../../third_party/aom/aom_dsp/x86/aom_high_subpixel_bilinear_sse2.asm',
|
||||
@ -274,55 +328,10 @@ files = {
|
||||
'../../third_party/aom/aom_dsp/x86/intrapred_ssse3.c',
|
||||
'../../third_party/aom/aom_dsp/x86/inv_wht_sse2.asm',
|
||||
'../../third_party/aom/aom_dsp/x86/loopfilter_sse2.c',
|
||||
'../../third_party/aom/aom_mem/aom_mem.c',
|
||||
'../../third_party/aom/aom_ports/emms.asm',
|
||||
'../../third_party/aom/aom_ports/x86_abi_support.asm',
|
||||
'../../third_party/aom/aom_scale/aom_scale_rtcd.c',
|
||||
'../../third_party/aom/aom_scale/generic/aom_scale.c',
|
||||
'../../third_party/aom/aom_scale/generic/gen_scalers.c',
|
||||
'../../third_party/aom/aom_scale/generic/yv12config.c',
|
||||
'../../third_party/aom/aom_scale/generic/yv12extend.c',
|
||||
'../../third_party/aom/aom_util/aom_thread.c',
|
||||
'../../third_party/aom/aom_util/debug_util.c',
|
||||
'../../third_party/aom/av1/av1_dx_iface.c',
|
||||
'../../third_party/aom/av1/common/alloccommon.c',
|
||||
'../../third_party/aom/av1/common/av1_inv_txfm1d.c',
|
||||
'../../third_party/aom/av1/common/av1_inv_txfm2d.c',
|
||||
'../../third_party/aom/av1/common/av1_loopfilter.c',
|
||||
'../../third_party/aom/av1/common/av1_rtcd.c',
|
||||
'../../third_party/aom/av1/common/av1_txfm.c',
|
||||
'../../third_party/aom/av1/common/blockd.c',
|
||||
'../../third_party/aom/av1/common/cdef.c',
|
||||
'../../third_party/aom/av1/common/cdef_block.c',
|
||||
'../../third_party/aom/av1/common/cdef_block_avx2.c',
|
||||
'../../third_party/aom/av1/common/cdef_block_sse2.c',
|
||||
'../../third_party/aom/av1/common/cdef_block_sse4.c',
|
||||
'../../third_party/aom/av1/common/cdef_block_ssse3.c',
|
||||
'../../third_party/aom/av1/common/cfl.c',
|
||||
'../../third_party/aom/av1/common/convolve.c',
|
||||
'../../third_party/aom/av1/common/debugmodes.c',
|
||||
'../../third_party/aom/av1/common/entropy.c',
|
||||
'../../third_party/aom/av1/common/entropymode.c',
|
||||
'../../third_party/aom/av1/common/entropymv.c',
|
||||
'../../third_party/aom/av1/common/frame_buffers.c',
|
||||
'../../third_party/aom/av1/common/idct.c',
|
||||
'../../third_party/aom/av1/common/mvref_common.c',
|
||||
'../../third_party/aom/av1/common/obu_util.c',
|
||||
'../../third_party/aom/av1/common/odintrin.c',
|
||||
'../../third_party/aom/av1/common/pred_common.c',
|
||||
'../../third_party/aom/av1/common/quant_common.c',
|
||||
'../../third_party/aom/av1/common/reconinter.c',
|
||||
'../../third_party/aom/av1/common/reconintra.c',
|
||||
'../../third_party/aom/av1/common/resize.c',
|
||||
'../../third_party/aom/av1/common/restoration.c',
|
||||
'../../third_party/aom/av1/common/scale.c',
|
||||
'../../third_party/aom/av1/common/scan.c',
|
||||
'../../third_party/aom/av1/common/seg_common.c',
|
||||
'../../third_party/aom/av1/common/thread_common.c',
|
||||
'../../third_party/aom/av1/common/tile_common.c',
|
||||
'../../third_party/aom/av1/common/timing.c',
|
||||
'../../third_party/aom/av1/common/txb_common.c',
|
||||
'../../third_party/aom/av1/common/warped_motion.c',
|
||||
'../../third_party/aom/av1/common/x86/av1_convolve_horiz_rs_sse4.c',
|
||||
'../../third_party/aom/av1/common/x86/av1_convolve_scale_sse4.c',
|
||||
'../../third_party/aom/av1/common/x86/av1_highbd_convolve_sse4.c',
|
||||
@ -360,13 +369,6 @@ files = {
|
||||
'../../third_party/aom/av1/common/x86/warp_plane_sse4.c',
|
||||
'../../third_party/aom/av1/common/x86/wiener_convolve_avx2.c',
|
||||
'../../third_party/aom/av1/common/x86/wiener_convolve_sse2.c',
|
||||
'../../third_party/aom/av1/decoder/decodeframe.c',
|
||||
'../../third_party/aom/av1/decoder/decodemv.c',
|
||||
'../../third_party/aom/av1/decoder/decoder.c',
|
||||
'../../third_party/aom/av1/decoder/decodetxb.c',
|
||||
'../../third_party/aom/av1/decoder/detokenize.c',
|
||||
'../../third_party/aom/av1/decoder/dthread.c',
|
||||
'../../third_party/aom/av1/decoder/obu.c',
|
||||
],
|
||||
'X64_EXPORTS': [
|
||||
'../../third_party/aom/aom/aom.h',
|
||||
|
@ -91,7 +91,7 @@ opus_val32 celt_inner_prod_sse2(
|
||||
int N);
|
||||
#endif
|
||||
|
||||
#if defined(OPUS_X86_MAY_HAVE_SSE2) && !defined(FIXED_POINT)
|
||||
#if defined(OPUS_X86_MAY_HAVE_SSE) && !defined(FIXED_POINT)
|
||||
opus_val32 celt_inner_prod_sse(
|
||||
const opus_val16 *x,
|
||||
const opus_val16 *y,
|
||||
|
@ -84,13 +84,14 @@ else:
|
||||
if CONFIG['CPU_ARCH'] in ('x86', 'x86_64'):
|
||||
DEFINES['OPUS_HAVE_RTCD'] = True
|
||||
DEFINES['OPUS_X86_MAY_HAVE_SSE'] = True
|
||||
DEFINES['OPUS_X86_MAY_HAVE_SSE2'] = True
|
||||
DEFINES['OPUS_X86_MAY_HAVE_SSE4_1'] = True
|
||||
DEFINES['OPUS_X86_MAY_HAVE_AVX'] = True
|
||||
SOURCES += celt_sources_sse
|
||||
SOURCES += celt_sources_sse2
|
||||
SOURCES += celt_sources_sse4_1
|
||||
SOURCES += silk_sources_sse4_1
|
||||
if not CONFIG['THE_SSE1']:
|
||||
DEFINES['OPUS_X86_MAY_HAVE_SSE2'] = True
|
||||
DEFINES['OPUS_X86_MAY_HAVE_SSE4_1'] = True
|
||||
DEFINES['OPUS_X86_MAY_HAVE_AVX'] = True
|
||||
SOURCES += celt_sources_sse2
|
||||
SOURCES += celt_sources_sse4_1
|
||||
SOURCES += silk_sources_sse4_1
|
||||
if not CONFIG['MOZ_SAMPLE_TYPE_FLOAT32']:
|
||||
SOURCES += silk_sources_fixed_sse4_1
|
||||
for f in SOURCES:
|
||||
|
@ -41,7 +41,7 @@ if CONFIG['CPU_ARCH'] == 'arm' or CONFIG['CPU_ARCH'] == 'aarch64':
|
||||
'arm/filter_neon.S'
|
||||
]
|
||||
|
||||
if CONFIG['INTEL_ARCHITECTURE']:
|
||||
if CONFIG['INTEL_ARCHITECTURE'] and not CONFIG['THE_SSE1']:
|
||||
DEFINES['MOZ_PNG_USE_INTEL_SSE'] = True
|
||||
UNIFIED_SOURCES += [
|
||||
'intel/filter_sse2_intrinsics.c',
|
||||
|
@ -6,4 +6,3 @@ with Files("**"):
|
||||
BUG_COMPONENT = ("Core", "Audio/Video: Playback")
|
||||
|
||||
DIRS += ['src']
|
||||
|
||||
|
@ -155,7 +155,7 @@ namespace soundtouch
|
||||
// data type for sample accumulation: Use double to utilize full precision.
|
||||
typedef double LONG_SAMPLETYPE;
|
||||
|
||||
#ifdef SOUNDTOUCH_ALLOW_X86_OPTIMIZATIONS
|
||||
#if defined(SOUNDTOUCH_ALLOW_X86_OPTIMIZATIONS) && !defined(THE_SSE1)
|
||||
// Allow SSE optimizations
|
||||
#define SOUNDTOUCH_ALLOW_SSE 1
|
||||
#endif
|
||||
|
@ -25,7 +25,7 @@ UNIFIED_SOURCES += [
|
||||
]
|
||||
|
||||
if CONFIG['INTEL_ARCHITECTURE']:
|
||||
if CONFIG['MOZ_SAMPLE_TYPE_FLOAT32']:
|
||||
if CONFIG['MOZ_SAMPLE_TYPE_FLOAT32'] and not CONFIG['THE_SSE1']:
|
||||
SOURCES += ['sse_optimized.cpp']
|
||||
SOURCES['sse_optimized.cpp'].flags += CONFIG['SSE2_FLAGS']
|
||||
else:
|
||||
|
@ -29,7 +29,7 @@ else:
|
||||
DEFINES['FLOATING_POINT'] = True
|
||||
|
||||
# Only use SSE code when using floating point samples, and on x86
|
||||
if CONFIG['INTEL_ARCHITECTURE'] and not CONFIG['MOZ_SAMPLE_TYPE_S16']:
|
||||
if CONFIG['INTEL_ARCHITECTURE'] and not CONFIG['MOZ_SAMPLE_TYPE_S16'] and not CONFIG['THE_SSE1']:
|
||||
DEFINES['_USE_SSE'] = True
|
||||
DEFINES['_USE_SSE2'] = True
|
||||
SOURCES += [
|
||||
|
@ -28,8 +28,10 @@ extern "C" {
|
||||
#endif
|
||||
|
||||
void vp8_bilinear_predict16x16_c(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
|
||||
#ifndef THE_SSE1
|
||||
void vp8_bilinear_predict16x16_sse2(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
|
||||
void vp8_bilinear_predict16x16_ssse3(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
|
||||
#endif
|
||||
RTCD_EXTERN void (*vp8_bilinear_predict16x16)(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
|
||||
|
||||
void vp8_bilinear_predict4x4_c(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
|
||||
@ -41,8 +43,10 @@ void vp8_bilinear_predict8x4_mmx(unsigned char *src, int src_pitch, int xofst, i
|
||||
RTCD_EXTERN void (*vp8_bilinear_predict8x4)(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
|
||||
|
||||
void vp8_bilinear_predict8x8_c(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
|
||||
#ifndef THE_SSE1
|
||||
void vp8_bilinear_predict8x8_sse2(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
|
||||
void vp8_bilinear_predict8x8_ssse3(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
|
||||
#endif
|
||||
RTCD_EXTERN void (*vp8_bilinear_predict8x8)(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
|
||||
|
||||
void vp8_blend_b_c(unsigned char *y, unsigned char *u, unsigned char *v, int y1, int u1, int v1, int alpha, int stride);
|
||||
@ -55,16 +59,22 @@ void vp8_blend_mb_outer_c(unsigned char *y, unsigned char *u, unsigned char *v,
|
||||
#define vp8_blend_mb_outer vp8_blend_mb_outer_c
|
||||
|
||||
int vp8_block_error_c(short *coeff, short *dqcoeff);
|
||||
#ifndef THE_SSE1
|
||||
int vp8_block_error_sse2(short *coeff, short *dqcoeff);
|
||||
#endif
|
||||
RTCD_EXTERN int (*vp8_block_error)(short *coeff, short *dqcoeff);
|
||||
|
||||
void vp8_copy32xn_c(const unsigned char *src_ptr, int source_stride, unsigned char *dst_ptr, int dst_stride, int n);
|
||||
#ifndef THE_SSE1
|
||||
void vp8_copy32xn_sse2(const unsigned char *src_ptr, int source_stride, unsigned char *dst_ptr, int dst_stride, int n);
|
||||
void vp8_copy32xn_sse3(const unsigned char *src_ptr, int source_stride, unsigned char *dst_ptr, int dst_stride, int n);
|
||||
#endif
|
||||
RTCD_EXTERN void (*vp8_copy32xn)(const unsigned char *src_ptr, int source_stride, unsigned char *dst_ptr, int dst_stride, int n);
|
||||
|
||||
void vp8_copy_mem16x16_c(unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch);
|
||||
#ifndef THE_SSE1
|
||||
void vp8_copy_mem16x16_sse2(unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch);
|
||||
#endif
|
||||
RTCD_EXTERN void (*vp8_copy_mem16x16)(unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch);
|
||||
|
||||
void vp8_copy_mem8x4_c(unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch);
|
||||
@ -80,11 +90,15 @@ void vp8_dc_only_idct_add_mmx(short input, unsigned char *pred, int pred_stride,
|
||||
RTCD_EXTERN void (*vp8_dc_only_idct_add)(short input, unsigned char *pred, int pred_stride, unsigned char *dst, int dst_stride);
|
||||
|
||||
int vp8_denoiser_filter_c(unsigned char *mc_running_avg_y, int mc_avg_y_stride, unsigned char *running_avg_y, int avg_y_stride, unsigned char *sig, int sig_stride, unsigned int motion_magnitude, int increase_denoising);
|
||||
#ifndef THE_SSE1
|
||||
int vp8_denoiser_filter_sse2(unsigned char *mc_running_avg_y, int mc_avg_y_stride, unsigned char *running_avg_y, int avg_y_stride, unsigned char *sig, int sig_stride, unsigned int motion_magnitude, int increase_denoising);
|
||||
#endif
|
||||
RTCD_EXTERN int (*vp8_denoiser_filter)(unsigned char *mc_running_avg_y, int mc_avg_y_stride, unsigned char *running_avg_y, int avg_y_stride, unsigned char *sig, int sig_stride, unsigned int motion_magnitude, int increase_denoising);
|
||||
|
||||
int vp8_denoiser_filter_uv_c(unsigned char *mc_running_avg, int mc_avg_stride, unsigned char *running_avg, int avg_stride, unsigned char *sig, int sig_stride, unsigned int motion_magnitude, int increase_denoising);
|
||||
#ifndef THE_SSE1
|
||||
int vp8_denoiser_filter_uv_sse2(unsigned char *mc_running_avg, int mc_avg_stride, unsigned char *running_avg, int avg_stride, unsigned char *sig, int sig_stride, unsigned int motion_magnitude, int increase_denoising);
|
||||
#endif
|
||||
RTCD_EXTERN int (*vp8_denoiser_filter_uv)(unsigned char *mc_running_avg, int mc_avg_stride, unsigned char *running_avg, int avg_stride, unsigned char *sig, int sig_stride, unsigned int motion_magnitude, int increase_denoising);
|
||||
|
||||
void vp8_dequant_idct_add_c(short *input, short *dq, unsigned char *output, int stride);
|
||||
@ -92,11 +106,15 @@ void vp8_dequant_idct_add_mmx(short *input, short *dq, unsigned char *output, in
|
||||
RTCD_EXTERN void (*vp8_dequant_idct_add)(short *input, short *dq, unsigned char *output, int stride);
|
||||
|
||||
void vp8_dequant_idct_add_uv_block_c(short *q, short *dq, unsigned char *dst_u, unsigned char *dst_v, int stride, char *eobs);
|
||||
#ifndef THE_SSE1
|
||||
void vp8_dequant_idct_add_uv_block_sse2(short *q, short *dq, unsigned char *dst_u, unsigned char *dst_v, int stride, char *eobs);
|
||||
#endif
|
||||
RTCD_EXTERN void (*vp8_dequant_idct_add_uv_block)(short *q, short *dq, unsigned char *dst_u, unsigned char *dst_v, int stride, char *eobs);
|
||||
|
||||
void vp8_dequant_idct_add_y_block_c(short *q, short *dq, unsigned char *dst, int stride, char *eobs);
|
||||
#ifndef THE_SSE1
|
||||
void vp8_dequant_idct_add_y_block_sse2(short *q, short *dq, unsigned char *dst, int stride, char *eobs);
|
||||
#endif
|
||||
RTCD_EXTERN void (*vp8_dequant_idct_add_y_block)(short *q, short *dq, unsigned char *dst, int stride, char *eobs);
|
||||
|
||||
void vp8_dequantize_b_c(struct blockd*, short *dqc);
|
||||
@ -108,19 +126,25 @@ int vp8_diamond_search_sadx4(struct macroblock *x, struct block *b, struct block
|
||||
RTCD_EXTERN int (*vp8_diamond_search_sad)(struct macroblock *x, struct block *b, struct blockd *d, union int_mv *ref_mv, union int_mv *best_mv, int search_param, int sad_per_bit, int *num00, struct variance_vtable *fn_ptr, int *mvcost[2], union int_mv *center_mv);
|
||||
|
||||
void vp8_fast_quantize_b_c(struct block *, struct blockd *);
|
||||
#ifndef THE_SSE1
|
||||
void vp8_fast_quantize_b_sse2(struct block *, struct blockd *);
|
||||
void vp8_fast_quantize_b_ssse3(struct block *, struct blockd *);
|
||||
#endif
|
||||
RTCD_EXTERN void (*vp8_fast_quantize_b)(struct block *, struct blockd *);
|
||||
|
||||
void vp8_filter_by_weight16x16_c(unsigned char *src, int src_stride, unsigned char *dst, int dst_stride, int src_weight);
|
||||
#ifndef THE_SSE1
|
||||
void vp8_filter_by_weight16x16_sse2(unsigned char *src, int src_stride, unsigned char *dst, int dst_stride, int src_weight);
|
||||
#endif
|
||||
RTCD_EXTERN void (*vp8_filter_by_weight16x16)(unsigned char *src, int src_stride, unsigned char *dst, int dst_stride, int src_weight);
|
||||
|
||||
void vp8_filter_by_weight4x4_c(unsigned char *src, int src_stride, unsigned char *dst, int dst_stride, int src_weight);
|
||||
#define vp8_filter_by_weight4x4 vp8_filter_by_weight4x4_c
|
||||
|
||||
void vp8_filter_by_weight8x8_c(unsigned char *src, int src_stride, unsigned char *dst, int dst_stride, int src_weight);
|
||||
#ifndef THE_SSE1
|
||||
void vp8_filter_by_weight8x8_sse2(unsigned char *src, int src_stride, unsigned char *dst, int dst_stride, int src_weight);
|
||||
#endif
|
||||
RTCD_EXTERN void (*vp8_filter_by_weight8x8)(unsigned char *src, int src_stride, unsigned char *dst, int dst_stride, int src_weight);
|
||||
|
||||
int vp8_full_search_sad_c(struct macroblock *x, struct block *b, struct blockd *d, union int_mv *ref_mv, int sad_per_bit, int distance, struct variance_vtable *fn_ptr, int *mvcost[2], union int_mv *center_mv);
|
||||
@ -129,43 +153,63 @@ int vp8_full_search_sadx8(struct macroblock *x, struct block *b, struct blockd *
|
||||
RTCD_EXTERN int (*vp8_full_search_sad)(struct macroblock *x, struct block *b, struct blockd *d, union int_mv *ref_mv, int sad_per_bit, int distance, struct variance_vtable *fn_ptr, int *mvcost[2], union int_mv *center_mv);
|
||||
|
||||
void vp8_loop_filter_bh_c(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi);
|
||||
#ifndef THE_SSE1
|
||||
void vp8_loop_filter_bh_sse2(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi);
|
||||
#endif
|
||||
RTCD_EXTERN void (*vp8_loop_filter_bh)(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi);
|
||||
|
||||
void vp8_loop_filter_bv_c(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi);
|
||||
#ifndef THE_SSE1
|
||||
void vp8_loop_filter_bv_sse2(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi);
|
||||
#endif
|
||||
RTCD_EXTERN void (*vp8_loop_filter_bv)(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi);
|
||||
|
||||
void vp8_loop_filter_mbh_c(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi);
|
||||
#ifndef THE_SSE1
|
||||
void vp8_loop_filter_mbh_sse2(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi);
|
||||
#endif
|
||||
RTCD_EXTERN void (*vp8_loop_filter_mbh)(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi);
|
||||
|
||||
void vp8_loop_filter_mbv_c(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi);
|
||||
#ifndef THE_SSE1
|
||||
void vp8_loop_filter_mbv_sse2(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi);
|
||||
#endif
|
||||
RTCD_EXTERN void (*vp8_loop_filter_mbv)(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi);
|
||||
|
||||
void vp8_loop_filter_bhs_c(unsigned char *y, int ystride, const unsigned char *blimit);
|
||||
#ifndef THE_SSE1
|
||||
void vp8_loop_filter_bhs_sse2(unsigned char *y, int ystride, const unsigned char *blimit);
|
||||
#endif
|
||||
RTCD_EXTERN void (*vp8_loop_filter_simple_bh)(unsigned char *y, int ystride, const unsigned char *blimit);
|
||||
|
||||
void vp8_loop_filter_bvs_c(unsigned char *y, int ystride, const unsigned char *blimit);
|
||||
#ifndef THE_SSE1
|
||||
void vp8_loop_filter_bvs_sse2(unsigned char *y, int ystride, const unsigned char *blimit);
|
||||
#endif
|
||||
RTCD_EXTERN void (*vp8_loop_filter_simple_bv)(unsigned char *y, int ystride, const unsigned char *blimit);
|
||||
|
||||
void vp8_loop_filter_simple_horizontal_edge_c(unsigned char *y, int ystride, const unsigned char *blimit);
|
||||
#ifndef THE_SSE1
|
||||
void vp8_loop_filter_simple_horizontal_edge_sse2(unsigned char *y, int ystride, const unsigned char *blimit);
|
||||
#endif
|
||||
RTCD_EXTERN void (*vp8_loop_filter_simple_mbh)(unsigned char *y, int ystride, const unsigned char *blimit);
|
||||
|
||||
void vp8_loop_filter_simple_vertical_edge_c(unsigned char *y, int ystride, const unsigned char *blimit);
|
||||
#ifndef THE_SSE1
|
||||
void vp8_loop_filter_simple_vertical_edge_sse2(unsigned char *y, int ystride, const unsigned char *blimit);
|
||||
#endif
|
||||
RTCD_EXTERN void (*vp8_loop_filter_simple_mbv)(unsigned char *y, int ystride, const unsigned char *blimit);
|
||||
|
||||
int vp8_mbblock_error_c(struct macroblock *mb, int dc);
|
||||
#ifndef THE_SSE1
|
||||
int vp8_mbblock_error_sse2(struct macroblock *mb, int dc);
|
||||
#endif
|
||||
RTCD_EXTERN int (*vp8_mbblock_error)(struct macroblock *mb, int dc);
|
||||
|
||||
int vp8_mbuverror_c(struct macroblock *mb);
|
||||
#ifndef THE_SSE1
|
||||
int vp8_mbuverror_sse2(struct macroblock *mb);
|
||||
#endif
|
||||
RTCD_EXTERN int (*vp8_mbuverror)(struct macroblock *mb);
|
||||
|
||||
int vp8_refining_search_sad_c(struct macroblock *x, struct block *b, struct blockd *d, union int_mv *ref_mv, int sad_per_bit, int distance, struct variance_vtable *fn_ptr, int *mvcost[2], union int_mv *center_mv);
|
||||
@ -173,16 +217,22 @@ int vp8_refining_search_sadx4(struct macroblock *x, struct block *b, struct bloc
|
||||
RTCD_EXTERN int (*vp8_refining_search_sad)(struct macroblock *x, struct block *b, struct blockd *d, union int_mv *ref_mv, int sad_per_bit, int distance, struct variance_vtable *fn_ptr, int *mvcost[2], union int_mv *center_mv);
|
||||
|
||||
void vp8_regular_quantize_b_c(struct block *, struct blockd *);
|
||||
#ifndef THE_SSE1
|
||||
void vp8_regular_quantize_b_sse2(struct block *, struct blockd *);
|
||||
void vp8_regular_quantize_b_sse4_1(struct block *, struct blockd *);
|
||||
#endif
|
||||
RTCD_EXTERN void (*vp8_regular_quantize_b)(struct block *, struct blockd *);
|
||||
|
||||
void vp8_short_fdct4x4_c(short *input, short *output, int pitch);
|
||||
#ifndef THE_SSE1
|
||||
void vp8_short_fdct4x4_sse2(short *input, short *output, int pitch);
|
||||
#endif
|
||||
RTCD_EXTERN void (*vp8_short_fdct4x4)(short *input, short *output, int pitch);
|
||||
|
||||
void vp8_short_fdct8x4_c(short *input, short *output, int pitch);
|
||||
#ifndef THE_SSE1
|
||||
void vp8_short_fdct8x4_sse2(short *input, short *output, int pitch);
|
||||
#endif
|
||||
RTCD_EXTERN void (*vp8_short_fdct8x4)(short *input, short *output, int pitch);
|
||||
|
||||
void vp8_short_idct4x4llm_c(short *input, unsigned char *pred, int pitch, unsigned char *dst, int dst_stride);
|
||||
@ -190,38 +240,52 @@ void vp8_short_idct4x4llm_mmx(short *input, unsigned char *pred, int pitch, unsi
|
||||
RTCD_EXTERN void (*vp8_short_idct4x4llm)(short *input, unsigned char *pred, int pitch, unsigned char *dst, int dst_stride);
|
||||
|
||||
void vp8_short_inv_walsh4x4_c(short *input, short *output);
|
||||
#ifndef THE_SSE1
|
||||
void vp8_short_inv_walsh4x4_sse2(short *input, short *output);
|
||||
#endif
|
||||
RTCD_EXTERN void (*vp8_short_inv_walsh4x4)(short *input, short *output);
|
||||
|
||||
void vp8_short_inv_walsh4x4_1_c(short *input, short *output);
|
||||
#define vp8_short_inv_walsh4x4_1 vp8_short_inv_walsh4x4_1_c
|
||||
|
||||
void vp8_short_walsh4x4_c(short *input, short *output, int pitch);
|
||||
#ifndef THE_SSE1
|
||||
void vp8_short_walsh4x4_sse2(short *input, short *output, int pitch);
|
||||
#endif
|
||||
RTCD_EXTERN void (*vp8_short_walsh4x4)(short *input, short *output, int pitch);
|
||||
|
||||
void vp8_sixtap_predict16x16_c(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
|
||||
#ifndef THE_SSE1
|
||||
void vp8_sixtap_predict16x16_sse2(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
|
||||
void vp8_sixtap_predict16x16_ssse3(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
|
||||
#endif
|
||||
RTCD_EXTERN void (*vp8_sixtap_predict16x16)(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
|
||||
|
||||
void vp8_sixtap_predict4x4_c(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
|
||||
void vp8_sixtap_predict4x4_mmx(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
|
||||
#ifndef THE_SSE1
|
||||
void vp8_sixtap_predict4x4_ssse3(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
|
||||
#endif
|
||||
RTCD_EXTERN void (*vp8_sixtap_predict4x4)(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
|
||||
|
||||
void vp8_sixtap_predict8x4_c(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
|
||||
#ifndef THE_SSE1
|
||||
void vp8_sixtap_predict8x4_sse2(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
|
||||
void vp8_sixtap_predict8x4_ssse3(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
|
||||
#endif
|
||||
RTCD_EXTERN void (*vp8_sixtap_predict8x4)(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
|
||||
|
||||
void vp8_sixtap_predict8x8_c(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
|
||||
#ifndef THE_SSE1
|
||||
void vp8_sixtap_predict8x8_sse2(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
|
||||
void vp8_sixtap_predict8x8_ssse3(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
|
||||
#endif
|
||||
RTCD_EXTERN void (*vp8_sixtap_predict8x8)(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
|
||||
|
||||
void vp8_temporal_filter_apply_c(unsigned char *frame1, unsigned int stride, unsigned char *frame2, unsigned int block_size, int strength, int filter_weight, unsigned int *accumulator, unsigned short *count);
|
||||
#ifndef THE_SSE1
|
||||
void vp8_temporal_filter_apply_sse2(unsigned char *frame1, unsigned int stride, unsigned char *frame2, unsigned int block_size, int strength, int filter_weight, unsigned int *accumulator, unsigned short *count);
|
||||
#endif
|
||||
RTCD_EXTERN void (*vp8_temporal_filter_apply)(unsigned char *frame1, unsigned int stride, unsigned char *frame2, unsigned int block_size, int strength, int filter_weight, unsigned int *accumulator, unsigned short *count);
|
||||
|
||||
void vp8_rtcd(void);
|
||||
@ -235,22 +299,14 @@ static void setup_rtcd_internal(void)
|
||||
(void)flags;
|
||||
|
||||
vp8_bilinear_predict16x16 = vp8_bilinear_predict16x16_c;
|
||||
if (flags & HAS_SSE2) vp8_bilinear_predict16x16 = vp8_bilinear_predict16x16_sse2;
|
||||
if (flags & HAS_SSSE3) vp8_bilinear_predict16x16 = vp8_bilinear_predict16x16_ssse3;
|
||||
vp8_bilinear_predict4x4 = vp8_bilinear_predict4x4_c;
|
||||
if (flags & HAS_MMX) vp8_bilinear_predict4x4 = vp8_bilinear_predict4x4_mmx;
|
||||
vp8_bilinear_predict8x4 = vp8_bilinear_predict8x4_c;
|
||||
if (flags & HAS_MMX) vp8_bilinear_predict8x4 = vp8_bilinear_predict8x4_mmx;
|
||||
vp8_bilinear_predict8x8 = vp8_bilinear_predict8x8_c;
|
||||
if (flags & HAS_SSE2) vp8_bilinear_predict8x8 = vp8_bilinear_predict8x8_sse2;
|
||||
if (flags & HAS_SSSE3) vp8_bilinear_predict8x8 = vp8_bilinear_predict8x8_ssse3;
|
||||
vp8_block_error = vp8_block_error_c;
|
||||
if (flags & HAS_SSE2) vp8_block_error = vp8_block_error_sse2;
|
||||
vp8_copy32xn = vp8_copy32xn_c;
|
||||
if (flags & HAS_SSE2) vp8_copy32xn = vp8_copy32xn_sse2;
|
||||
if (flags & HAS_SSE3) vp8_copy32xn = vp8_copy32xn_sse3;
|
||||
vp8_copy_mem16x16 = vp8_copy_mem16x16_c;
|
||||
if (flags & HAS_SSE2) vp8_copy_mem16x16 = vp8_copy_mem16x16_sse2;
|
||||
vp8_copy_mem8x4 = vp8_copy_mem8x4_c;
|
||||
if (flags & HAS_MMX) vp8_copy_mem8x4 = vp8_copy_mem8x4_mmx;
|
||||
vp8_copy_mem8x8 = vp8_copy_mem8x8_c;
|
||||
@ -258,78 +314,88 @@ static void setup_rtcd_internal(void)
|
||||
vp8_dc_only_idct_add = vp8_dc_only_idct_add_c;
|
||||
if (flags & HAS_MMX) vp8_dc_only_idct_add = vp8_dc_only_idct_add_mmx;
|
||||
vp8_denoiser_filter = vp8_denoiser_filter_c;
|
||||
if (flags & HAS_SSE2) vp8_denoiser_filter = vp8_denoiser_filter_sse2;
|
||||
vp8_denoiser_filter_uv = vp8_denoiser_filter_uv_c;
|
||||
if (flags & HAS_SSE2) vp8_denoiser_filter_uv = vp8_denoiser_filter_uv_sse2;
|
||||
vp8_dequant_idct_add = vp8_dequant_idct_add_c;
|
||||
if (flags & HAS_MMX) vp8_dequant_idct_add = vp8_dequant_idct_add_mmx;
|
||||
vp8_dequant_idct_add_uv_block = vp8_dequant_idct_add_uv_block_c;
|
||||
if (flags & HAS_SSE2) vp8_dequant_idct_add_uv_block = vp8_dequant_idct_add_uv_block_sse2;
|
||||
vp8_dequant_idct_add_y_block = vp8_dequant_idct_add_y_block_c;
|
||||
if (flags & HAS_SSE2) vp8_dequant_idct_add_y_block = vp8_dequant_idct_add_y_block_sse2;
|
||||
vp8_dequantize_b = vp8_dequantize_b_c;
|
||||
if (flags & HAS_MMX) vp8_dequantize_b = vp8_dequantize_b_mmx;
|
||||
vp8_diamond_search_sad = vp8_diamond_search_sad_c;
|
||||
if (flags & HAS_SSE2) vp8_diamond_search_sad = vp8_diamond_search_sadx4;
|
||||
vp8_fast_quantize_b = vp8_fast_quantize_b_c;
|
||||
if (flags & HAS_SSE2) vp8_fast_quantize_b = vp8_fast_quantize_b_sse2;
|
||||
if (flags & HAS_SSSE3) vp8_fast_quantize_b = vp8_fast_quantize_b_ssse3;
|
||||
vp8_filter_by_weight16x16 = vp8_filter_by_weight16x16_c;
|
||||
if (flags & HAS_SSE2) vp8_filter_by_weight16x16 = vp8_filter_by_weight16x16_sse2;
|
||||
vp8_filter_by_weight8x8 = vp8_filter_by_weight8x8_c;
|
||||
if (flags & HAS_SSE2) vp8_filter_by_weight8x8 = vp8_filter_by_weight8x8_sse2;
|
||||
vp8_full_search_sad = vp8_full_search_sad_c;
|
||||
if (flags & HAS_SSE3) vp8_full_search_sad = vp8_full_search_sadx3;
|
||||
if (flags & HAS_SSE4_1) vp8_full_search_sad = vp8_full_search_sadx8;
|
||||
vp8_loop_filter_bh = vp8_loop_filter_bh_c;
|
||||
if (flags & HAS_SSE2) vp8_loop_filter_bh = vp8_loop_filter_bh_sse2;
|
||||
vp8_loop_filter_bv = vp8_loop_filter_bv_c;
|
||||
if (flags & HAS_SSE2) vp8_loop_filter_bv = vp8_loop_filter_bv_sse2;
|
||||
vp8_loop_filter_mbh = vp8_loop_filter_mbh_c;
|
||||
if (flags & HAS_SSE2) vp8_loop_filter_mbh = vp8_loop_filter_mbh_sse2;
|
||||
vp8_loop_filter_mbv = vp8_loop_filter_mbv_c;
|
||||
if (flags & HAS_SSE2) vp8_loop_filter_mbv = vp8_loop_filter_mbv_sse2;
|
||||
vp8_loop_filter_simple_bh = vp8_loop_filter_bhs_c;
|
||||
if (flags & HAS_SSE2) vp8_loop_filter_simple_bh = vp8_loop_filter_bhs_sse2;
|
||||
vp8_loop_filter_simple_bv = vp8_loop_filter_bvs_c;
|
||||
if (flags & HAS_SSE2) vp8_loop_filter_simple_bv = vp8_loop_filter_bvs_sse2;
|
||||
vp8_loop_filter_simple_mbh = vp8_loop_filter_simple_horizontal_edge_c;
|
||||
if (flags & HAS_SSE2) vp8_loop_filter_simple_mbh = vp8_loop_filter_simple_horizontal_edge_sse2;
|
||||
vp8_loop_filter_simple_mbv = vp8_loop_filter_simple_vertical_edge_c;
|
||||
if (flags & HAS_SSE2) vp8_loop_filter_simple_mbv = vp8_loop_filter_simple_vertical_edge_sse2;
|
||||
vp8_mbblock_error = vp8_mbblock_error_c;
|
||||
if (flags & HAS_SSE2) vp8_mbblock_error = vp8_mbblock_error_sse2;
|
||||
vp8_mbuverror = vp8_mbuverror_c;
|
||||
if (flags & HAS_SSE2) vp8_mbuverror = vp8_mbuverror_sse2;
|
||||
vp8_refining_search_sad = vp8_refining_search_sad_c;
|
||||
if (flags & HAS_SSE2) vp8_refining_search_sad = vp8_refining_search_sadx4;
|
||||
vp8_regular_quantize_b = vp8_regular_quantize_b_c;
|
||||
if (flags & HAS_SSE2) vp8_regular_quantize_b = vp8_regular_quantize_b_sse2;
|
||||
if (flags & HAS_SSE4_1) vp8_regular_quantize_b = vp8_regular_quantize_b_sse4_1;
|
||||
vp8_short_fdct4x4 = vp8_short_fdct4x4_c;
|
||||
if (flags & HAS_SSE2) vp8_short_fdct4x4 = vp8_short_fdct4x4_sse2;
|
||||
vp8_short_fdct8x4 = vp8_short_fdct8x4_c;
|
||||
if (flags & HAS_SSE2) vp8_short_fdct8x4 = vp8_short_fdct8x4_sse2;
|
||||
vp8_short_idct4x4llm = vp8_short_idct4x4llm_c;
|
||||
if (flags & HAS_MMX) vp8_short_idct4x4llm = vp8_short_idct4x4llm_mmx;
|
||||
vp8_short_inv_walsh4x4 = vp8_short_inv_walsh4x4_c;
|
||||
if (flags & HAS_SSE2) vp8_short_inv_walsh4x4 = vp8_short_inv_walsh4x4_sse2;
|
||||
vp8_short_walsh4x4 = vp8_short_walsh4x4_c;
|
||||
if (flags & HAS_SSE2) vp8_short_walsh4x4 = vp8_short_walsh4x4_sse2;
|
||||
vp8_sixtap_predict16x16 = vp8_sixtap_predict16x16_c;
|
||||
if (flags & HAS_SSE2) vp8_sixtap_predict16x16 = vp8_sixtap_predict16x16_sse2;
|
||||
if (flags & HAS_SSSE3) vp8_sixtap_predict16x16 = vp8_sixtap_predict16x16_ssse3;
|
||||
vp8_sixtap_predict4x4 = vp8_sixtap_predict4x4_c;
|
||||
if (flags & HAS_MMX) vp8_sixtap_predict4x4 = vp8_sixtap_predict4x4_mmx;
|
||||
if (flags & HAS_SSSE3) vp8_sixtap_predict4x4 = vp8_sixtap_predict4x4_ssse3;
|
||||
vp8_sixtap_predict8x4 = vp8_sixtap_predict8x4_c;
|
||||
vp8_sixtap_predict8x8 = vp8_sixtap_predict8x8_c;
|
||||
vp8_temporal_filter_apply = vp8_temporal_filter_apply_c;
|
||||
#ifndef THE_SSE1
|
||||
if (flags & HAS_SSE2) vp8_bilinear_predict16x16 = vp8_bilinear_predict16x16_sse2;
|
||||
if (flags & HAS_SSSE3) vp8_bilinear_predict16x16 = vp8_bilinear_predict16x16_ssse3;
|
||||
if (flags & HAS_SSE2) vp8_bilinear_predict8x8 = vp8_bilinear_predict8x8_sse2;
|
||||
if (flags & HAS_SSSE3) vp8_bilinear_predict8x8 = vp8_bilinear_predict8x8_ssse3;
|
||||
if (flags & HAS_SSE2) vp8_block_error = vp8_block_error_sse2;
|
||||
if (flags & HAS_SSE2) vp8_copy32xn = vp8_copy32xn_sse2;
|
||||
if (flags & HAS_SSE3) vp8_copy32xn = vp8_copy32xn_sse3;
|
||||
if (flags & HAS_SSE2) vp8_copy_mem16x16 = vp8_copy_mem16x16_sse2;
|
||||
if (flags & HAS_SSE2) vp8_denoiser_filter = vp8_denoiser_filter_sse2;
|
||||
if (flags & HAS_SSE2) vp8_denoiser_filter_uv = vp8_denoiser_filter_uv_sse2;
|
||||
if (flags & HAS_SSE2) vp8_dequant_idct_add_uv_block = vp8_dequant_idct_add_uv_block_sse2;
|
||||
if (flags & HAS_SSE2) vp8_dequant_idct_add_y_block = vp8_dequant_idct_add_y_block_sse2;
|
||||
if (flags & HAS_SSE2) vp8_diamond_search_sad = vp8_diamond_search_sadx4;
|
||||
if (flags & HAS_SSE2) vp8_fast_quantize_b = vp8_fast_quantize_b_sse2;
|
||||
if (flags & HAS_SSSE3) vp8_fast_quantize_b = vp8_fast_quantize_b_ssse3;
|
||||
if (flags & HAS_SSE2) vp8_filter_by_weight16x16 = vp8_filter_by_weight16x16_sse2;
|
||||
if (flags & HAS_SSE2) vp8_filter_by_weight8x8 = vp8_filter_by_weight8x8_sse2;
|
||||
if (flags & HAS_SSE3) vp8_full_search_sad = vp8_full_search_sadx3;
|
||||
if (flags & HAS_SSE4_1) vp8_full_search_sad = vp8_full_search_sadx8;
|
||||
if (flags & HAS_SSE2) vp8_loop_filter_bh = vp8_loop_filter_bh_sse2;
|
||||
if (flags & HAS_SSE2) vp8_loop_filter_bv = vp8_loop_filter_bv_sse2;
|
||||
if (flags & HAS_SSE2) vp8_loop_filter_mbh = vp8_loop_filter_mbh_sse2;
|
||||
if (flags & HAS_SSE2) vp8_loop_filter_mbv = vp8_loop_filter_mbv_sse2;
|
||||
if (flags & HAS_SSE2) vp8_loop_filter_simple_bh = vp8_loop_filter_bhs_sse2;
|
||||
if (flags & HAS_SSE2) vp8_loop_filter_simple_bv = vp8_loop_filter_bvs_sse2;
|
||||
if (flags & HAS_SSE2) vp8_loop_filter_simple_mbh = vp8_loop_filter_simple_horizontal_edge_sse2;
|
||||
if (flags & HAS_SSE2) vp8_loop_filter_simple_mbv = vp8_loop_filter_simple_vertical_edge_sse2;
|
||||
if (flags & HAS_SSE2) vp8_mbblock_error = vp8_mbblock_error_sse2;
|
||||
if (flags & HAS_SSE2) vp8_mbuverror = vp8_mbuverror_sse2;
|
||||
if (flags & HAS_SSE2) vp8_refining_search_sad = vp8_refining_search_sadx4;
|
||||
if (flags & HAS_SSE2) vp8_regular_quantize_b = vp8_regular_quantize_b_sse2;
|
||||
if (flags & HAS_SSE4_1) vp8_regular_quantize_b = vp8_regular_quantize_b_sse4_1;
|
||||
if (flags & HAS_SSE2) vp8_short_fdct4x4 = vp8_short_fdct4x4_sse2;
|
||||
if (flags & HAS_SSE2) vp8_short_fdct8x4 = vp8_short_fdct8x4_sse2;
|
||||
if (flags & HAS_SSE2) vp8_short_inv_walsh4x4 = vp8_short_inv_walsh4x4_sse2;
|
||||
if (flags & HAS_SSE2) vp8_short_walsh4x4 = vp8_short_walsh4x4_sse2;
|
||||
if (flags & HAS_SSE2) vp8_sixtap_predict16x16 = vp8_sixtap_predict16x16_sse2;
|
||||
if (flags & HAS_SSSE3) vp8_sixtap_predict16x16 = vp8_sixtap_predict16x16_ssse3;
|
||||
if (flags & HAS_SSSE3) vp8_sixtap_predict4x4 = vp8_sixtap_predict4x4_ssse3;
|
||||
if (flags & HAS_SSE2) vp8_sixtap_predict8x4 = vp8_sixtap_predict8x4_sse2;
|
||||
if (flags & HAS_SSSE3) vp8_sixtap_predict8x4 = vp8_sixtap_predict8x4_ssse3;
|
||||
vp8_sixtap_predict8x8 = vp8_sixtap_predict8x8_c;
|
||||
if (flags & HAS_SSE2) vp8_sixtap_predict8x8 = vp8_sixtap_predict8x8_sse2;
|
||||
if (flags & HAS_SSSE3) vp8_sixtap_predict8x8 = vp8_sixtap_predict8x8_ssse3;
|
||||
vp8_temporal_filter_apply = vp8_temporal_filter_apply_c;
|
||||
if (flags & HAS_SSE2) vp8_temporal_filter_apply = vp8_temporal_filter_apply_sse2;
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
|
@ -32,73 +32,105 @@ extern "C" {
|
||||
#endif
|
||||
|
||||
int64_t vp9_block_error_c(const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz);
|
||||
#ifndef THE_SSE1
|
||||
int64_t vp9_block_error_sse2(const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz);
|
||||
int64_t vp9_block_error_avx2(const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz);
|
||||
#endif
|
||||
RTCD_EXTERN int64_t (*vp9_block_error)(const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz);
|
||||
|
||||
int64_t vp9_block_error_fp_c(const tran_low_t *coeff, const tran_low_t *dqcoeff, int block_size);
|
||||
#ifndef THE_SSE1
|
||||
int64_t vp9_block_error_fp_sse2(const tran_low_t *coeff, const tran_low_t *dqcoeff, int block_size);
|
||||
int64_t vp9_block_error_fp_avx2(const tran_low_t *coeff, const tran_low_t *dqcoeff, int block_size);
|
||||
#endif
|
||||
RTCD_EXTERN int64_t (*vp9_block_error_fp)(const tran_low_t *coeff, const tran_low_t *dqcoeff, int block_size);
|
||||
|
||||
int vp9_diamond_search_sad_c(const struct macroblock *x, const struct search_site_config *cfg, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv);
|
||||
#ifndef THE_SSE1
|
||||
int vp9_diamond_search_sad_avx(const struct macroblock *x, const struct search_site_config *cfg, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv);
|
||||
#endif
|
||||
RTCD_EXTERN int (*vp9_diamond_search_sad)(const struct macroblock *x, const struct search_site_config *cfg, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv);
|
||||
|
||||
void vp9_fdct8x8_quant_c(const int16_t *input, int stride, tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *round_ptr, const int16_t *quant_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan);
|
||||
#ifndef THE_SSE1
|
||||
void vp9_fdct8x8_quant_sse2(const int16_t *input, int stride, tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *round_ptr, const int16_t *quant_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan);
|
||||
void vp9_fdct8x8_quant_ssse3(const int16_t *input, int stride, tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *round_ptr, const int16_t *quant_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan);
|
||||
#endif
|
||||
RTCD_EXTERN void (*vp9_fdct8x8_quant)(const int16_t *input, int stride, tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *round_ptr, const int16_t *quant_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan);
|
||||
|
||||
void vp9_fht16x16_c(const int16_t *input, tran_low_t *output, int stride, int tx_type);
|
||||
#ifndef THE_SSE1
|
||||
void vp9_fht16x16_sse2(const int16_t *input, tran_low_t *output, int stride, int tx_type);
|
||||
#endif
|
||||
RTCD_EXTERN void (*vp9_fht16x16)(const int16_t *input, tran_low_t *output, int stride, int tx_type);
|
||||
|
||||
void vp9_fht4x4_c(const int16_t *input, tran_low_t *output, int stride, int tx_type);
|
||||
#ifndef THE_SSE1
|
||||
void vp9_fht4x4_sse2(const int16_t *input, tran_low_t *output, int stride, int tx_type);
|
||||
#endif
|
||||
RTCD_EXTERN void (*vp9_fht4x4)(const int16_t *input, tran_low_t *output, int stride, int tx_type);
|
||||
|
||||
void vp9_fht8x8_c(const int16_t *input, tran_low_t *output, int stride, int tx_type);
|
||||
#ifndef THE_SSE1
|
||||
void vp9_fht8x8_sse2(const int16_t *input, tran_low_t *output, int stride, int tx_type);
|
||||
#endif
|
||||
RTCD_EXTERN void (*vp9_fht8x8)(const int16_t *input, tran_low_t *output, int stride, int tx_type);
|
||||
|
||||
void vp9_filter_by_weight16x16_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int src_weight);
|
||||
#ifndef THE_SSE1
|
||||
void vp9_filter_by_weight16x16_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int src_weight);
|
||||
#endif
|
||||
RTCD_EXTERN void (*vp9_filter_by_weight16x16)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int src_weight);
|
||||
|
||||
void vp9_filter_by_weight8x8_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int src_weight);
|
||||
#ifndef THE_SSE1
|
||||
void vp9_filter_by_weight8x8_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int src_weight);
|
||||
#endif
|
||||
RTCD_EXTERN void (*vp9_filter_by_weight8x8)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int src_weight);
|
||||
|
||||
void vp9_fwht4x4_c(const int16_t *input, tran_low_t *output, int stride);
|
||||
#ifndef THE_SSE1
|
||||
void vp9_fwht4x4_sse2(const int16_t *input, tran_low_t *output, int stride);
|
||||
#endif
|
||||
RTCD_EXTERN void (*vp9_fwht4x4)(const int16_t *input, tran_low_t *output, int stride);
|
||||
|
||||
void vp9_iht16x16_256_add_c(const tran_low_t *input, uint8_t *output, int pitch, int tx_type);
|
||||
#ifndef THE_SSE1
|
||||
void vp9_iht16x16_256_add_sse2(const tran_low_t *input, uint8_t *output, int pitch, int tx_type);
|
||||
#endif
|
||||
RTCD_EXTERN void (*vp9_iht16x16_256_add)(const tran_low_t *input, uint8_t *output, int pitch, int tx_type);
|
||||
|
||||
void vp9_iht4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int stride, int tx_type);
|
||||
#ifndef THE_SSE1
|
||||
void vp9_iht4x4_16_add_sse2(const tran_low_t *input, uint8_t *dest, int stride, int tx_type);
|
||||
#endif
|
||||
RTCD_EXTERN void (*vp9_iht4x4_16_add)(const tran_low_t *input, uint8_t *dest, int stride, int tx_type);
|
||||
|
||||
void vp9_iht8x8_64_add_c(const tran_low_t *input, uint8_t *dest, int stride, int tx_type);
|
||||
#ifndef THE_SSE1
|
||||
void vp9_iht8x8_64_add_sse2(const tran_low_t *input, uint8_t *dest, int stride, int tx_type);
|
||||
#endif
|
||||
RTCD_EXTERN void (*vp9_iht8x8_64_add)(const tran_low_t *input, uint8_t *dest, int stride, int tx_type);
|
||||
|
||||
void vp9_quantize_fp_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *round_ptr, const int16_t *quant_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan);
|
||||
#ifndef THE_SSE1
|
||||
void vp9_quantize_fp_sse2(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *round_ptr, const int16_t *quant_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan);
|
||||
#endif
|
||||
RTCD_EXTERN void (*vp9_quantize_fp)(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *round_ptr, const int16_t *quant_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan);
|
||||
|
||||
void vp9_quantize_fp_32x32_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *round_ptr, const int16_t *quant_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan);
|
||||
#define vp9_quantize_fp_32x32 vp9_quantize_fp_32x32_c
|
||||
|
||||
void vp9_scale_and_extend_frame_c(const struct yv12_buffer_config *src, struct yv12_buffer_config *dst, INTERP_FILTER filter_type, int phase_scaler);
|
||||
#ifndef THE_SSE1
|
||||
void vp9_scale_and_extend_frame_ssse3(const struct yv12_buffer_config *src, struct yv12_buffer_config *dst, INTERP_FILTER filter_type, int phase_scaler);
|
||||
#endif
|
||||
RTCD_EXTERN void (*vp9_scale_and_extend_frame)(const struct yv12_buffer_config *src, struct yv12_buffer_config *dst, INTERP_FILTER filter_type, int phase_scaler);
|
||||
|
||||
void vp9_temporal_filter_apply_c(const uint8_t *frame1, unsigned int stride, const uint8_t *frame2, unsigned int block_width, unsigned int block_height, int strength, int filter_weight, uint32_t *accumulator, uint16_t *count);
|
||||
#ifndef THE_SSE1
|
||||
void vp9_temporal_filter_apply_sse4_1(const uint8_t *frame1, unsigned int stride, const uint8_t *frame2, unsigned int block_width, unsigned int block_height, int strength, int filter_weight, uint32_t *accumulator, uint16_t *count);
|
||||
#endif
|
||||
RTCD_EXTERN void (*vp9_temporal_filter_apply)(const uint8_t *frame1, unsigned int stride, const uint8_t *frame2, unsigned int block_width, unsigned int block_height, int strength, int filter_weight, uint32_t *accumulator, uint16_t *count);
|
||||
|
||||
void vp9_rtcd(void);
|
||||
@ -107,45 +139,47 @@ void vp9_rtcd(void);
|
||||
#include "vpx_ports/x86.h"
|
||||
static void setup_rtcd_internal(void)
|
||||
{
|
||||
vp9_block_error = vp9_block_error_c;
|
||||
vp9_block_error_fp = vp9_block_error_fp_c;
|
||||
vp9_diamond_search_sad = vp9_diamond_search_sad_c;
|
||||
vp9_fdct8x8_quant = vp9_fdct8x8_quant_c;
|
||||
vp9_fht16x16 = vp9_fht16x16_c;
|
||||
vp9_fht4x4 = vp9_fht4x4_c;
|
||||
vp9_fht8x8 = vp9_fht8x8_c;
|
||||
vp9_filter_by_weight16x16 = vp9_filter_by_weight16x16_c;
|
||||
vp9_filter_by_weight8x8 = vp9_filter_by_weight8x8_c;
|
||||
vp9_fwht4x4 = vp9_fwht4x4_c;
|
||||
vp9_iht16x16_256_add = vp9_iht16x16_256_add_c;
|
||||
vp9_iht4x4_16_add = vp9_iht4x4_16_add_c;
|
||||
vp9_iht8x8_64_add = vp9_iht8x8_64_add_c;
|
||||
vp9_quantize_fp = vp9_quantize_fp_c;
|
||||
vp9_scale_and_extend_frame = vp9_scale_and_extend_frame_c;
|
||||
vp9_temporal_filter_apply = vp9_temporal_filter_apply_c;
|
||||
#ifndef THE_SSE1
|
||||
int flags = x86_simd_caps();
|
||||
|
||||
(void)flags;
|
||||
|
||||
vp9_block_error = vp9_block_error_c;
|
||||
if (flags & HAS_SSE2) vp9_block_error = vp9_block_error_sse2;
|
||||
if (flags & HAS_AVX2) vp9_block_error = vp9_block_error_avx2;
|
||||
vp9_block_error_fp = vp9_block_error_fp_c;
|
||||
if (flags & HAS_SSE2) vp9_block_error_fp = vp9_block_error_fp_sse2;
|
||||
if (flags & HAS_AVX2) vp9_block_error_fp = vp9_block_error_fp_avx2;
|
||||
vp9_diamond_search_sad = vp9_diamond_search_sad_c;
|
||||
if (flags & HAS_AVX) vp9_diamond_search_sad = vp9_diamond_search_sad_avx;
|
||||
vp9_fdct8x8_quant = vp9_fdct8x8_quant_c;
|
||||
if (flags & HAS_SSE2) vp9_fdct8x8_quant = vp9_fdct8x8_quant_sse2;
|
||||
if (flags & HAS_SSSE3) vp9_fdct8x8_quant = vp9_fdct8x8_quant_ssse3;
|
||||
vp9_fht16x16 = vp9_fht16x16_c;
|
||||
if (flags & HAS_SSE2) vp9_fht16x16 = vp9_fht16x16_sse2;
|
||||
vp9_fht4x4 = vp9_fht4x4_c;
|
||||
if (flags & HAS_SSE2) vp9_fht4x4 = vp9_fht4x4_sse2;
|
||||
vp9_fht8x8 = vp9_fht8x8_c;
|
||||
if (flags & HAS_SSE2) vp9_fht8x8 = vp9_fht8x8_sse2;
|
||||
vp9_filter_by_weight16x16 = vp9_filter_by_weight16x16_c;
|
||||
if (flags & HAS_SSE2) vp9_filter_by_weight16x16 = vp9_filter_by_weight16x16_sse2;
|
||||
vp9_filter_by_weight8x8 = vp9_filter_by_weight8x8_c;
|
||||
if (flags & HAS_SSE2) vp9_filter_by_weight8x8 = vp9_filter_by_weight8x8_sse2;
|
||||
vp9_fwht4x4 = vp9_fwht4x4_c;
|
||||
if (flags & HAS_SSE2) vp9_fwht4x4 = vp9_fwht4x4_sse2;
|
||||
vp9_iht16x16_256_add = vp9_iht16x16_256_add_c;
|
||||
if (flags & HAS_SSE2) vp9_iht16x16_256_add = vp9_iht16x16_256_add_sse2;
|
||||
vp9_iht4x4_16_add = vp9_iht4x4_16_add_c;
|
||||
if (flags & HAS_SSE2) vp9_iht4x4_16_add = vp9_iht4x4_16_add_sse2;
|
||||
vp9_iht8x8_64_add = vp9_iht8x8_64_add_c;
|
||||
if (flags & HAS_SSE2) vp9_iht8x8_64_add = vp9_iht8x8_64_add_sse2;
|
||||
vp9_quantize_fp = vp9_quantize_fp_c;
|
||||
if (flags & HAS_SSE2) vp9_quantize_fp = vp9_quantize_fp_sse2;
|
||||
vp9_scale_and_extend_frame = vp9_scale_and_extend_frame_c;
|
||||
if (flags & HAS_SSSE3) vp9_scale_and_extend_frame = vp9_scale_and_extend_frame_ssse3;
|
||||
vp9_temporal_filter_apply = vp9_temporal_filter_apply_c;
|
||||
if (flags & HAS_SSE4_1) vp9_temporal_filter_apply = vp9_temporal_filter_apply_sse4_1;
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
|
@ -23,12 +23,21 @@
|
||||
#define HAVE_MIPS64 0
|
||||
#define HAVE_MMX 1
|
||||
#define HAVE_SSE 1
|
||||
#ifndef THE_SSE1
|
||||
#define HAVE_SSE2 1
|
||||
#define HAVE_SSE3 1
|
||||
#define HAVE_SSSE3 1
|
||||
#define HAVE_SSE4_1 1
|
||||
#define HAVE_AVX 1
|
||||
#define HAVE_AVX2 1
|
||||
#else
|
||||
#define HAVE_SSE2 0
|
||||
#define HAVE_SSE3 0
|
||||
#define HAVE_SSSE3 0
|
||||
#define HAVE_SSE4_1 0
|
||||
#define HAVE_AVX 0
|
||||
#define HAVE_AVX2 0
|
||||
#endif
|
||||
#define HAVE_AVX512 0
|
||||
#define HAVE_VSX 0
|
||||
#define HAVE_MMI 0
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -27,6 +27,8 @@ extern void vp8_filter_block1dc_v6_mmx(
|
||||
unsigned int pixels_per_line, unsigned int pixel_step,
|
||||
unsigned int output_height, unsigned int output_width,
|
||||
const short *vp8_filter);
|
||||
|
||||
#if HAVE_SSE2
|
||||
extern void vp8_filter_block1d8_h6_sse2(unsigned char *src_ptr,
|
||||
unsigned short *output_ptr,
|
||||
unsigned int src_pixels_per_line,
|
||||
@ -74,6 +76,7 @@ extern void vp8_filter_block1d8_v6_only_sse2(unsigned char *src_ptr,
|
||||
int dst_ptich,
|
||||
unsigned int output_height,
|
||||
const short *vp8_filter);
|
||||
#endif
|
||||
|
||||
#if HAVE_MMX
|
||||
void vp8_sixtap_predict4x4_mmx(unsigned char *src_ptr, int src_pixels_per_line,
|
||||
|
@ -30,6 +30,8 @@ if CONFIG['CPU_ARCH'] == 'x86_64':
|
||||
elif CONFIG['CPU_ARCH'] == 'x86':
|
||||
EXPORTS.vpx += files['IA32_EXPORTS']
|
||||
SOURCES += files['IA32_SOURCES']
|
||||
if not CONFIG['THE_SSE1']:
|
||||
SOURCES += files['IA32_SSE2_SOURCES']
|
||||
if CONFIG['OS_TARGET'] == 'WINNT':
|
||||
if CONFIG['CC_TYPE'] == 'gcc':
|
||||
ASFLAGS += [ '-I%s/media/libvpx/config/win/mingw32/' % TOPSRCDIR ]
|
||||
|
@ -313,23 +313,13 @@ files = {
|
||||
'libvpx/vp8/common/treecoder.c',
|
||||
'libvpx/vp8/common/vp8_loopfilter.c',
|
||||
'libvpx/vp8/common/vp8_skin_detection.c',
|
||||
'libvpx/vp8/common/x86/copy_sse2.asm',
|
||||
'libvpx/vp8/common/x86/copy_sse3.asm',
|
||||
'libvpx/vp8/common/x86/dequantize_mmx.asm',
|
||||
'libvpx/vp8/common/x86/filter_x86.c',
|
||||
'libvpx/vp8/common/x86/idct_blk_mmx.c',
|
||||
'libvpx/vp8/common/x86/idct_blk_sse2.c',
|
||||
'libvpx/vp8/common/x86/idctllm_mmx.asm',
|
||||
'libvpx/vp8/common/x86/idctllm_sse2.asm',
|
||||
'libvpx/vp8/common/x86/iwalsh_sse2.asm',
|
||||
'libvpx/vp8/common/x86/loopfilter_sse2.asm',
|
||||
'libvpx/vp8/common/x86/loopfilter_x86.c',
|
||||
'libvpx/vp8/common/x86/mfqe_sse2.asm',
|
||||
'libvpx/vp8/common/x86/recon_mmx.asm',
|
||||
'libvpx/vp8/common/x86/recon_sse2.asm',
|
||||
'libvpx/vp8/common/x86/subpixel_mmx.asm',
|
||||
'libvpx/vp8/common/x86/subpixel_sse2.asm',
|
||||
'libvpx/vp8/common/x86/subpixel_ssse3.asm',
|
||||
'libvpx/vp8/common/x86/vp8_asm_stubs.c',
|
||||
'libvpx/vp8/decoder/dboolhuff.c',
|
||||
'libvpx/vp8/decoder/decodeframe.c',
|
||||
@ -361,15 +351,7 @@ files = {
|
||||
'libvpx/vp8/encoder/tokenize.c',
|
||||
'libvpx/vp8/encoder/treewriter.c',
|
||||
'libvpx/vp8/encoder/vp8_quantize.c',
|
||||
'libvpx/vp8/encoder/x86/dct_sse2.asm',
|
||||
'libvpx/vp8/encoder/x86/denoising_sse2.c',
|
||||
'libvpx/vp8/encoder/x86/encodeopt.asm',
|
||||
'libvpx/vp8/encoder/x86/fwalsh_sse2.asm',
|
||||
'libvpx/vp8/encoder/x86/quantize_sse4.c',
|
||||
'libvpx/vp8/encoder/x86/temporal_filter_apply_sse2.asm',
|
||||
'libvpx/vp8/encoder/x86/vp8_enc_stubs_sse2.c',
|
||||
'libvpx/vp8/encoder/x86/vp8_quantize_sse2.c',
|
||||
'libvpx/vp8/encoder/x86/vp8_quantize_ssse3.c',
|
||||
'libvpx/vp8/vp8_cx_iface.c',
|
||||
'libvpx/vp8/vp8_dx_iface.c',
|
||||
'libvpx/vp9/common/vp9_alloccommon.c',
|
||||
@ -395,8 +377,6 @@ files = {
|
||||
'libvpx/vp9/common/vp9_seg_common.c',
|
||||
'libvpx/vp9/common/vp9_thread_common.c',
|
||||
'libvpx/vp9/common/vp9_tile_common.c',
|
||||
'libvpx/vp9/common/x86/vp9_idct_intrin_sse2.c',
|
||||
'libvpx/vp9/common/x86/vp9_mfqe_sse2.asm',
|
||||
'libvpx/vp9/decoder/vp9_decodeframe.c',
|
||||
'libvpx/vp9/decoder/vp9_decodemv.c',
|
||||
'libvpx/vp9/decoder/vp9_decoder.c',
|
||||
@ -439,15 +419,6 @@ files = {
|
||||
'libvpx/vp9/encoder/vp9_temporal_filter.c',
|
||||
'libvpx/vp9/encoder/vp9_tokenize.c',
|
||||
'libvpx/vp9/encoder/vp9_treewriter.c',
|
||||
'libvpx/vp9/encoder/x86/temporal_filter_sse4.c',
|
||||
'libvpx/vp9/encoder/x86/vp9_dct_intrin_sse2.c',
|
||||
'libvpx/vp9/encoder/x86/vp9_dct_sse2.asm',
|
||||
'libvpx/vp9/encoder/x86/vp9_dct_ssse3.c',
|
||||
'libvpx/vp9/encoder/x86/vp9_diamond_search_sad_avx.c',
|
||||
'libvpx/vp9/encoder/x86/vp9_error_avx2.c',
|
||||
'libvpx/vp9/encoder/x86/vp9_error_sse2.asm',
|
||||
'libvpx/vp9/encoder/x86/vp9_frame_scale_ssse3.c',
|
||||
'libvpx/vp9/encoder/x86/vp9_quantize_sse2.c',
|
||||
'libvpx/vp9/vp9_cx_iface.c',
|
||||
'libvpx/vp9/vp9_dx_iface.c',
|
||||
'libvpx/vpx/src/vpx_codec.c',
|
||||
@ -475,6 +446,47 @@ files = {
|
||||
'libvpx/vpx_dsp/variance.c',
|
||||
'libvpx/vpx_dsp/vpx_convolve.c',
|
||||
'libvpx/vpx_dsp/vpx_dsp_rtcd.c',
|
||||
'libvpx/vpx_dsp/x86/vpx_asm_stubs.c',
|
||||
'libvpx/vpx_mem/vpx_mem.c',
|
||||
'libvpx/vpx_ports/emms.asm',
|
||||
'libvpx/vpx_scale/generic/gen_scalers.c',
|
||||
'libvpx/vpx_scale/generic/vpx_scale.c',
|
||||
'libvpx/vpx_scale/generic/yv12config.c',
|
||||
'libvpx/vpx_scale/generic/yv12extend.c',
|
||||
'libvpx/vpx_scale/vpx_scale_rtcd.c',
|
||||
'libvpx/vpx_util/vpx_thread.c',
|
||||
'libvpx/vpx_util/vpx_write_yuv_frame.c',
|
||||
],
|
||||
'IA32_SSE2_SOURCES': [
|
||||
'libvpx/vp8/common/x86/copy_sse2.asm',
|
||||
'libvpx/vp8/common/x86/copy_sse3.asm',
|
||||
'libvpx/vp8/common/x86/idct_blk_sse2.c',
|
||||
'libvpx/vp8/common/x86/idctllm_sse2.asm',
|
||||
'libvpx/vp8/common/x86/iwalsh_sse2.asm',
|
||||
'libvpx/vp8/common/x86/loopfilter_sse2.asm',
|
||||
'libvpx/vp8/common/x86/mfqe_sse2.asm',
|
||||
'libvpx/vp8/common/x86/recon_sse2.asm',
|
||||
'libvpx/vp8/common/x86/subpixel_sse2.asm',
|
||||
'libvpx/vp8/common/x86/subpixel_ssse3.asm',
|
||||
'libvpx/vp8/encoder/x86/dct_sse2.asm',
|
||||
'libvpx/vp8/encoder/x86/denoising_sse2.c',
|
||||
'libvpx/vp8/encoder/x86/fwalsh_sse2.asm',
|
||||
'libvpx/vp8/encoder/x86/quantize_sse4.c',
|
||||
'libvpx/vp8/encoder/x86/temporal_filter_apply_sse2.asm',
|
||||
'libvpx/vp8/encoder/x86/vp8_enc_stubs_sse2.c',
|
||||
'libvpx/vp8/encoder/x86/vp8_quantize_sse2.c',
|
||||
'libvpx/vp8/encoder/x86/vp8_quantize_ssse3.c',
|
||||
'libvpx/vp9/common/x86/vp9_idct_intrin_sse2.c',
|
||||
'libvpx/vp9/common/x86/vp9_mfqe_sse2.asm',
|
||||
'libvpx/vp9/encoder/x86/temporal_filter_sse4.c',
|
||||
'libvpx/vp9/encoder/x86/vp9_dct_intrin_sse2.c',
|
||||
'libvpx/vp9/encoder/x86/vp9_dct_sse2.asm',
|
||||
'libvpx/vp9/encoder/x86/vp9_dct_ssse3.c',
|
||||
'libvpx/vp9/encoder/x86/vp9_diamond_search_sad_avx.c',
|
||||
'libvpx/vp9/encoder/x86/vp9_error_avx2.c',
|
||||
'libvpx/vp9/encoder/x86/vp9_error_sse2.asm',
|
||||
'libvpx/vp9/encoder/x86/vp9_frame_scale_ssse3.c',
|
||||
'libvpx/vp9/encoder/x86/vp9_quantize_sse2.c',
|
||||
'libvpx/vpx_dsp/x86/add_noise_sse2.asm',
|
||||
'libvpx/vpx_dsp/x86/avg_intrin_avx2.c',
|
||||
'libvpx/vpx_dsp/x86/avg_intrin_sse2.c',
|
||||
@ -504,7 +516,6 @@ files = {
|
||||
'libvpx/vpx_dsp/x86/sum_squares_sse2.c',
|
||||
'libvpx/vpx_dsp/x86/variance_avx2.c',
|
||||
'libvpx/vpx_dsp/x86/variance_sse2.c',
|
||||
'libvpx/vpx_dsp/x86/vpx_asm_stubs.c',
|
||||
'libvpx/vpx_dsp/x86/vpx_convolve_copy_sse2.asm',
|
||||
'libvpx/vpx_dsp/x86/vpx_subpixel_8t_intrin_avx2.c',
|
||||
'libvpx/vpx_dsp/x86/vpx_subpixel_8t_intrin_ssse3.c',
|
||||
@ -512,15 +523,6 @@ files = {
|
||||
'libvpx/vpx_dsp/x86/vpx_subpixel_8t_ssse3.asm',
|
||||
'libvpx/vpx_dsp/x86/vpx_subpixel_bilinear_sse2.asm',
|
||||
'libvpx/vpx_dsp/x86/vpx_subpixel_bilinear_ssse3.asm',
|
||||
'libvpx/vpx_mem/vpx_mem.c',
|
||||
'libvpx/vpx_ports/emms.asm',
|
||||
'libvpx/vpx_scale/generic/gen_scalers.c',
|
||||
'libvpx/vpx_scale/generic/vpx_scale.c',
|
||||
'libvpx/vpx_scale/generic/yv12config.c',
|
||||
'libvpx/vpx_scale/generic/yv12extend.c',
|
||||
'libvpx/vpx_scale/vpx_scale_rtcd.c',
|
||||
'libvpx/vpx_util/vpx_thread.c',
|
||||
'libvpx/vpx_util/vpx_write_yuv_frame.c',
|
||||
],
|
||||
'ARM_EXPORTS': [
|
||||
'libvpx/vpx/vp8.h',
|
||||
|
@ -426,8 +426,12 @@ void (*WebPAlphaReplace)(uint32_t* src, int length, uint32_t color);
|
||||
// Init function
|
||||
|
||||
extern void WebPInitAlphaProcessingMIPSdspR2(void);
|
||||
#if defined(WEBP_HAVE_SSE2)
|
||||
extern void WebPInitAlphaProcessingSSE2(void);
|
||||
#if defined(WEBP_HAVE_SSE41)
|
||||
extern void WebPInitAlphaProcessingSSE41(void);
|
||||
#endif
|
||||
#endif
|
||||
extern void WebPInitAlphaProcessingNEON(void);
|
||||
|
||||
WEBP_DSP_INIT_FUNC(WebPInitAlphaProcessing) {
|
||||
|
@ -376,7 +376,9 @@ VP8SetResidualCoeffsFunc VP8SetResidualCoeffs;
|
||||
|
||||
extern void VP8EncDspCostInitMIPS32(void);
|
||||
extern void VP8EncDspCostInitMIPSdspR2(void);
|
||||
#if defined(WEBP_HAVE_SSE2)
|
||||
extern void VP8EncDspCostInitSSE2(void);
|
||||
#endif
|
||||
extern void VP8EncDspCostInitNEON(void);
|
||||
|
||||
WEBP_DSP_INIT_FUNC(VP8EncDspCostInit) {
|
||||
|
@ -68,43 +68,6 @@ extern "C" {
|
||||
# define __has_builtin(x) 0
|
||||
#endif
|
||||
|
||||
#if !defined(HAVE_CONFIG_H)
|
||||
#if defined(_MSC_VER) && _MSC_VER > 1310 && \
|
||||
(defined(_M_X64) || defined(_M_IX86))
|
||||
#define WEBP_MSC_SSE2 // Visual C++ SSE2 targets
|
||||
#endif
|
||||
|
||||
#if defined(_MSC_VER) && _MSC_VER >= 1500 && \
|
||||
(defined(_M_X64) || defined(_M_IX86))
|
||||
#define WEBP_MSC_SSE41 // Visual C++ SSE4.1 targets
|
||||
#endif
|
||||
#endif
|
||||
|
||||
// WEBP_HAVE_* are used to indicate the presence of the instruction set in dsp
|
||||
// files without intrinsics, allowing the corresponding Init() to be called.
|
||||
// Files containing intrinsics will need to be built targeting the instruction
|
||||
// set so should succeed on one of the earlier tests.
|
||||
#if (defined(__SSE2__) || defined(WEBP_MSC_SSE2)) && \
|
||||
(!defined(HAVE_CONFIG_H) || defined(WEBP_HAVE_SSE2))
|
||||
#define WEBP_USE_SSE2
|
||||
#endif
|
||||
|
||||
#if defined(WEBP_USE_SSE2) && !defined(WEBP_HAVE_SSE2)
|
||||
#define WEBP_HAVE_SSE2
|
||||
#endif
|
||||
|
||||
#if (defined(__SSE4_1__) || defined(WEBP_MSC_SSE41)) && \
|
||||
(!defined(HAVE_CONFIG_H) || defined(WEBP_HAVE_SSE41))
|
||||
#define WEBP_USE_SSE41
|
||||
#endif
|
||||
|
||||
#if defined(WEBP_USE_SSE41) && !defined(WEBP_HAVE_SSE41)
|
||||
#define WEBP_HAVE_SSE41
|
||||
#endif
|
||||
|
||||
#undef WEBP_MSC_SSE41
|
||||
#undef WEBP_MSC_SSE2
|
||||
|
||||
// The intrinsics currently cause compiler errors with arm-nacl-gcc and the
|
||||
// inline assembly would need to be modified for use with Native Client.
|
||||
#if ((defined(__ARM_NEON__) || defined(__aarch64__)) && \
|
||||
|
@ -55,7 +55,7 @@ elif CONFIG['CPU_ARCH'] == 'aarch64':
|
||||
'yuv_neon.c',
|
||||
]
|
||||
DEFINES['WEBP_HAVE_NEON'] = 1;
|
||||
elif CONFIG['INTEL_ARCHITECTURE']:
|
||||
elif CONFIG['INTEL_ARCHITECTURE'] and not CONFIG['THE_SSE1']:
|
||||
SOURCES += [
|
||||
'alpha_processing_sse2.c',
|
||||
'alpha_processing_sse41.c',
|
||||
|
@ -253,7 +253,9 @@ struct AecCore {
|
||||
AecCore* WebRtcAec_CreateAec(int instance_count); // Returns NULL on error.
|
||||
void WebRtcAec_FreeAec(AecCore* aec);
|
||||
int WebRtcAec_InitAec(AecCore* aec, int sampFreq);
|
||||
#if defined(WEBRTC_ARCH_X86_FAMILY)
|
||||
void WebRtcAec_InitAec_SSE2(void);
|
||||
#endif
|
||||
#if defined(MIPS_FPU_LE)
|
||||
void WebRtcAec_InitAec_mips(void);
|
||||
#endif
|
||||
|
@ -481,7 +481,7 @@ if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "WINNT":
|
||||
"/media/webrtc/trunk/webrtc/modules/video_processing/video_processing_neon_gn"
|
||||
]
|
||||
|
||||
if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "WINNT":
|
||||
if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "WINNT" and not CONFIG['THE_SSE1']:
|
||||
|
||||
DIRS += [
|
||||
"/media/webrtc/trunk/webrtc/common_audio/common_audio_sse2_gn",
|
||||
|
@ -28,7 +28,9 @@
|
||||
#define WEBRTC_ARCH_64_BITS
|
||||
#define WEBRTC_ARCH_LITTLE_ENDIAN
|
||||
#elif defined(_M_IX86) || defined(__i386__)
|
||||
#define WEBRTC_ARCH_X86_FAMILY
|
||||
# ifndef THE_SSE1
|
||||
# define WEBRTC_ARCH_X86_FAMILY
|
||||
# endif
|
||||
#define WEBRTC_ARCH_X86
|
||||
#define WEBRTC_ARCH_32_BITS
|
||||
#define WEBRTC_ARCH_LITTLE_ENDIAN
|
||||
|
@ -250,80 +250,80 @@ inline bool supports_sse() { return sse_private::sse_enabled; }
|
||||
inline bool supports_sse() { return false; }
|
||||
#endif
|
||||
|
||||
#if defined(MOZILLA_PRESUME_SSE2)
|
||||
#if defined(MOZILLA_PRESUME_SSE2) && !defined(THE_SSE1)
|
||||
# define MOZILLA_MAY_SUPPORT_SSE2 1
|
||||
inline bool supports_sse2() { return true; }
|
||||
#elif defined(MOZILLA_SSE_HAVE_CPUID_DETECTION)
|
||||
#elif defined(MOZILLA_SSE_HAVE_CPUID_DETECTION) && !defined(THE_SSE1)
|
||||
# define MOZILLA_MAY_SUPPORT_SSE2 1
|
||||
inline bool supports_sse2() { return sse_private::sse2_enabled; }
|
||||
#else
|
||||
inline bool supports_sse2() { return false; }
|
||||
#endif
|
||||
|
||||
#if defined(MOZILLA_PRESUME_SSE3)
|
||||
#if defined(MOZILLA_PRESUME_SSE3) && !defined(THE_SSE1)
|
||||
# define MOZILLA_MAY_SUPPORT_SSE3 1
|
||||
inline bool supports_sse3() { return true; }
|
||||
#elif defined(MOZILLA_SSE_HAVE_CPUID_DETECTION)
|
||||
#elif defined(MOZILLA_SSE_HAVE_CPUID_DETECTION) && !defined(THE_SSE1)
|
||||
# define MOZILLA_MAY_SUPPORT_SSE3 1
|
||||
inline bool supports_sse3() { return sse_private::sse3_enabled; }
|
||||
#else
|
||||
inline bool supports_sse3() { return false; }
|
||||
#endif
|
||||
|
||||
#if defined(MOZILLA_PRESUME_SSSE3)
|
||||
#if defined(MOZILLA_PRESUME_SSSE3) && !defined(THE_SSE1)
|
||||
# define MOZILLA_MAY_SUPPORT_SSSE3 1
|
||||
inline bool supports_ssse3() { return true; }
|
||||
#elif defined(MOZILLA_SSE_HAVE_CPUID_DETECTION)
|
||||
#elif defined(MOZILLA_SSE_HAVE_CPUID_DETECTION) && !defined(THE_SSE1)
|
||||
# define MOZILLA_MAY_SUPPORT_SSSE3 1
|
||||
inline bool supports_ssse3() { return sse_private::ssse3_enabled; }
|
||||
#else
|
||||
inline bool supports_ssse3() { return false; }
|
||||
#endif
|
||||
|
||||
#if defined(MOZILLA_PRESUME_SSE4A)
|
||||
#if defined(MOZILLA_PRESUME_SSE4A) && !defined(THE_SSE1)
|
||||
# define MOZILLA_MAY_SUPPORT_SSE4A 1
|
||||
inline bool supports_sse4a() { return true; }
|
||||
#elif defined(MOZILLA_SSE_HAVE_CPUID_DETECTION)
|
||||
#elif defined(MOZILLA_SSE_HAVE_CPUID_DETECTION) && !defined(THE_SSE1)
|
||||
# define MOZILLA_MAY_SUPPORT_SSE4A 1
|
||||
inline bool supports_sse4a() { return sse_private::sse4a_enabled; }
|
||||
#else
|
||||
inline bool supports_sse4a() { return false; }
|
||||
#endif
|
||||
|
||||
#if defined(MOZILLA_PRESUME_SSE4_1)
|
||||
#if defined(MOZILLA_PRESUME_SSE4_1) && !defined(THE_SSE1)
|
||||
# define MOZILLA_MAY_SUPPORT_SSE4_1 1
|
||||
inline bool supports_sse4_1() { return true; }
|
||||
#elif defined(MOZILLA_SSE_HAVE_CPUID_DETECTION)
|
||||
#elif defined(MOZILLA_SSE_HAVE_CPUID_DETECTION) && !defined(THE_SSE1)
|
||||
# define MOZILLA_MAY_SUPPORT_SSE4_1 1
|
||||
inline bool supports_sse4_1() { return sse_private::sse4_1_enabled; }
|
||||
#else
|
||||
inline bool supports_sse4_1() { return false; }
|
||||
#endif
|
||||
|
||||
#if defined(MOZILLA_PRESUME_SSE4_2)
|
||||
#if defined(MOZILLA_PRESUME_SSE4_2) && !defined(THE_SSE1)
|
||||
# define MOZILLA_MAY_SUPPORT_SSE4_2 1
|
||||
inline bool supports_sse4_2() { return true; }
|
||||
#elif defined(MOZILLA_SSE_HAVE_CPUID_DETECTION)
|
||||
#elif defined(MOZILLA_SSE_HAVE_CPUID_DETECTION) && !defined(THE_SSE1)
|
||||
# define MOZILLA_MAY_SUPPORT_SSE4_2 1
|
||||
inline bool supports_sse4_2() { return sse_private::sse4_2_enabled; }
|
||||
#else
|
||||
inline bool supports_sse4_2() { return false; }
|
||||
#endif
|
||||
|
||||
#if defined(MOZILLA_PRESUME_AVX)
|
||||
#if defined(MOZILLA_PRESUME_AVX) && !defined(THE_SSE1)
|
||||
# define MOZILLA_MAY_SUPPORT_AVX 1
|
||||
inline bool supports_avx() { return true; }
|
||||
#elif defined(MOZILLA_SSE_HAVE_CPUID_DETECTION)
|
||||
#elif defined(MOZILLA_SSE_HAVE_CPUID_DETECTION) && !defined(THE_SSE1)
|
||||
# define MOZILLA_MAY_SUPPORT_AVX 1
|
||||
inline bool supports_avx() { return sse_private::avx_enabled; }
|
||||
#else
|
||||
inline bool supports_avx() { return false; }
|
||||
#endif
|
||||
|
||||
#if defined(MOZILLA_PRESUME_AVX2)
|
||||
#if defined(MOZILLA_PRESUME_AVX2) && !defined(THE_SSE1)
|
||||
# define MOZILLA_MAY_SUPPORT_AVX2 1
|
||||
inline bool supports_avx2() { return true; }
|
||||
#elif defined(MOZILLA_SSE_HAVE_CPUID_DETECTION)
|
||||
#elif defined(MOZILLA_SSE_HAVE_CPUID_DETECTION) && !defined(THE_SSE1)
|
||||
# define MOZILLA_MAY_SUPPORT_AVX2 1
|
||||
inline bool supports_avx2() { return sse_private::avx2_enabled; }
|
||||
#else
|
||||
|
@ -707,19 +707,23 @@ case "$target" in
|
||||
CPPFLAGS="$CPPFLAGS -utf-8"
|
||||
fi
|
||||
if test "$CPU_ARCH" = "x86"; then
|
||||
SSE_FLAGS="-arch:SSE"
|
||||
if test "$THE_SSE1" = 1; then
|
||||
SSE2_FLAGS="-arch:SSE"
|
||||
else
|
||||
SSE2_FLAGS="-arch:SSE2"
|
||||
fi
|
||||
dnl VS2012+ defaults to -arch:SSE2. We want to target nothing
|
||||
dnl more recent, so set that explicitly here unless another
|
||||
dnl target arch has already been set.
|
||||
changequote(,)
|
||||
if test -z `echo $CFLAGS | grep -i [-/]arch:`; then
|
||||
CFLAGS="$CFLAGS -arch:SSE2"
|
||||
CFLAGS="$CFLAGS $SSE2_FLAGS"
|
||||
fi
|
||||
if test -z `echo $CXXFLAGS | grep -i [-/]arch:`; then
|
||||
CXXFLAGS="$CXXFLAGS -arch:SSE2"
|
||||
CXXFLAGS="$CXXFLAGS $SSE2_FLAGS"
|
||||
fi
|
||||
changequote([,])
|
||||
SSE_FLAGS="-arch:SSE"
|
||||
SSE2_FLAGS="-arch:SSE2"
|
||||
dnl MSVC allows the use of intrinsics without any flags
|
||||
dnl and doesn't have a separate arch for SSSE3
|
||||
SSSE3_FLAGS="-arch:SSE2"
|
||||
@ -925,6 +929,10 @@ if test -z "$MOZ_OPTIMIZE_FLAGS"; then
|
||||
MOZ_OPTIMIZE_FLAGS="-O"
|
||||
fi
|
||||
|
||||
if test -n "$THE_SSE1"; then
|
||||
AC_DEFINE(THE_SSE1)
|
||||
fi
|
||||
AC_SUBST(THE_SSE1)
|
||||
AC_SUBST_LIST(MMX_FLAGS)
|
||||
AC_SUBST_LIST(SSE_FLAGS)
|
||||
AC_SUBST_LIST(SSE2_FLAGS)
|
||||
|
Loading…
Reference in New Issue
Block a user