The switch for sse1

This commit is contained in:
Fedor 2024-01-07 10:31:20 +02:00
parent 7b56d348b6
commit af8ad28b05
41 changed files with 2044 additions and 894 deletions

View File

@ -459,7 +459,7 @@ SOURCES += [
# Are we targeting x86-32 or x86-64? If so, we want to include SSE2 code for
# nsTextFragment.cpp
if CONFIG['INTEL_ARCHITECTURE']:
if CONFIG['INTEL_ARCHITECTURE'] and not CONFIG['THE_SSE1']:
SOURCES += ['nsTextFragmentSSE2.cpp']
SOURCES['nsTextFragmentSSE2.cpp'].flags += CONFIG['SSE2_FLAGS']

View File

@ -23,7 +23,7 @@ UNIFIED_SOURCES += [
]
# Are we targeting x86 or x64? If so, build SSE2 files.
if CONFIG['INTEL_ARCHITECTURE']:
if CONFIG['INTEL_ARCHITECTURE'] and not CONFIG['THE_SSE1']:
DEFINES['USE_SSE2'] = True
include('/ipc/chromium/chromium-config.mozbuild')

View File

@ -130,7 +130,7 @@ if CONFIG['CPU_ARCH'] == 'aarch64' or CONFIG['BUILD_ARM_NEON']:
]
# Are we targeting x86 or x64? If so, build SSE2 files.
if CONFIG['INTEL_ARCHITECTURE']:
if CONFIG['INTEL_ARCHITECTURE'] and not CONFIG['THE_SSE1']:
SOURCES += ['AudioNodeEngineSSE2.cpp']
DEFINES['USE_SSE2'] = True
SOURCES['AudioNodeEngineSSE2.cpp'].flags += CONFIG['SSE2_FLAGS']

View File

@ -136,7 +136,7 @@ if CONFIG['MOZ_ENABLE_SKIA']:
]
# Are we targeting x86 or x64? If so, build SSE2 files.
if CONFIG['INTEL_ARCHITECTURE']:
if CONFIG['INTEL_ARCHITECTURE'] and not CONFIG['THE_SSE1']:
SOURCES += [
'BlurSSE2.cpp',
'FilterProcessingSSE2.cpp',

View File

@ -79,10 +79,10 @@
# undef far
#endif
#if defined(_MSC_VER) && !defined(_M_ARM) && !defined(_M_ARM64)
#if defined(_MSC_VER) && !defined(_M_ARM) && !defined(_M_ARM64) && !defined(THE_SSE1)
# include <intrin.h>
# define ANGLE_USE_SSE
#elif defined(__GNUC__) && (defined(__x86_64__) || defined(__i386__))
#elif defined(__GNUC__) && (defined(__x86_64__) || defined(__i386__)) && !defined(THE_SSE1)
# include <x86intrin.h>
# define ANGLE_USE_SSE
#endif

View File

@ -1,6 +1,6 @@
AllowCompilerWarnings()
if CONFIG['INTEL_ARCHITECTURE']:
if CONFIG['INTEL_ARCHITECTURE'] and not CONFIG['THE_SSE1']:
CXXFLAGS += CONFIG['SSE2_FLAGS']
DEFINES['__NDK_FPABI__'] = ''
DEFINES['ANGLE_SKIP_DXGI_1_2_CHECK'] = True

View File

@ -86,8 +86,10 @@ use_sse2 = False
use_vmx = False
use_arm_simd_gcc = False
use_arm_neon_gcc = False
if CONFIG['INTEL_ARCHITECTURE']:
if CONFIG['INTEL_ARCHITECTURE'] and not CONFIG['THE_SSE1']:
use_sse2 = True
if CONFIG['INTEL_ARCHITECTURE']:
if CONFIG['CPU_ARCH'] == 'x86':
if CONFIG['CC_TYPE'] == 'clang-cl':
use_mmx = True

View File

@ -490,7 +490,7 @@ static bool AttemptVideoScale(TextureSourceBasic* aSource,
const gfx::Rect& aRect,
const gfx::Rect& aClipRect, DrawTarget* aDest,
const DrawTarget* aBuffer) {
#ifdef MOZILLA_SSE_HAVE_CPUID_DETECTION
#if defined(MOZILLA_SSE_HAVE_CPUID_DETECTION) && !defined(THE_SSE1)
if (!mozilla::supports_ssse3()) return false;
if (aNewTransform
.IsTranslation()) // unscaled painting should take the regular path
@ -563,7 +563,7 @@ static bool AttemptVideoConvertAndScale(
WrappingTextureSourceYCbCrBasic* wrappingSource =
aSource->AsWrappingTextureSourceYCbCrBasic();
if (!wrappingSource) return false;
#ifdef MOZILLA_SSE_HAVE_CPUID_DETECTION
#if defined(MOZILLA_SSE_HAVE_CPUID_DETECTION) && !defined(THE_SSE1)
if (!mozilla::supports_ssse3()) // libyuv requests SSSE3 for fast YUV
// conversion.
return false;

View File

@ -23,8 +23,10 @@ if CONFIG['CC_TYPE'] in ('clang', 'gcc'):
use_sse1 = False
use_sse2 = False
use_altivec = False
if CONFIG['INTEL_ARCHITECTURE']:
if CONFIG['INTEL_ARCHITECTURE'] and not CONFIG['THE_SSE1']:
use_sse2 = True
if CONFIG['INTEL_ARCHITECTURE']:
if CONFIG['CC_TYPE'] == 'clang-cl':
if CONFIG['OS_ARCH'] != 'WINNT' or CONFIG['CPU_ARCH'] != 'x86_64':
use_sse1 = True

View File

@ -442,6 +442,7 @@ static void qcms_transform_data_graya_out_precache(qcms_transform *transform, un
}
}
#if (defined(__POWERPC__) || defined(__powerpc__) && !defined(__NO_FPRS__))
static void qcms_transform_data_rgb_out_lut_precache(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length)
{
unsigned int i;
@ -511,6 +512,7 @@ static void qcms_transform_data_rgba_out_lut_precache(qcms_transform *transform,
dest += RGBA_OUTPUT_COMPONENTS;
}
}
#endif
// Not used
/*
@ -1011,87 +1013,6 @@ void qcms_transform_release(qcms_transform *t)
transform_free(t);
}
#ifdef X86
// Determine if we can build with SSE2 (this was partly copied from jmorecfg.h in
// mozilla/jpeg)
// -------------------------------------------------------------------------
#if defined(_M_IX86) && defined(_MSC_VER)
#define HAS_CPUID
/* Get us a CPUID function. Avoid clobbering EBX because sometimes it's the PIC
register - I'm not sure if that ever happens on windows, but cpuid isn't
on the critical path so we just preserve the register to be safe and to be
consistent with the non-windows version. */
static void cpuid(uint32_t fxn, uint32_t *a, uint32_t *b, uint32_t *c, uint32_t *d) {
uint32_t a_, b_, c_, d_;
__asm {
xchg ebx, esi
mov eax, fxn
cpuid
mov a_, eax
mov b_, ebx
mov c_, ecx
mov d_, edx
xchg ebx, esi
}
*a = a_;
*b = b_;
*c = c_;
*d = d_;
}
#elif (defined(__GNUC__) || defined(__SUNPRO_C)) && (defined(__i386__) || defined(__i386))
#define HAS_CPUID
/* Get us a CPUID function. We can't use ebx because it's the PIC register on
some platforms, so we use ESI instead and save ebx to avoid clobbering it. */
static void cpuid(uint32_t fxn, uint32_t *a, uint32_t *b, uint32_t *c, uint32_t *d) {
uint32_t a_, b_, c_, d_;
__asm__ __volatile__ ("xchgl %%ebx, %%esi; cpuid; xchgl %%ebx, %%esi;"
: "=a" (a_), "=S" (b_), "=c" (c_), "=d" (d_) : "a" (fxn));
*a = a_;
*b = b_;
*c = c_;
*d = d_;
}
#endif
// -------------------------Runtime SSEx Detection-----------------------------
/* MMX is always supported per
* Gecko v1.9.1 minimum CPU requirements */
#define SSE1_EDX_MASK (1UL << 25)
#define SSE2_EDX_MASK (1UL << 26)
#define SSE3_ECX_MASK (1UL << 0)
static int sse_version_available(void)
{
#if defined(__x86_64__) || defined(__x86_64) || defined(_M_AMD64)
/* we know at build time that 64-bit CPUs always have SSE2
* this tells the compiler that non-SSE2 branches will never be
* taken (i.e. OK to optimze away the SSE1 and non-SIMD code */
return 2;
#elif defined(HAS_CPUID)
static int sse_version = -1;
uint32_t a, b, c, d;
uint32_t function = 0x00000001;
if (sse_version == -1) {
sse_version = 0;
cpuid(function, &a, &b, &c, &d);
if (c & SSE3_ECX_MASK)
sse_version = 3;
else if (d & SSE2_EDX_MASK)
sse_version = 2;
else if (d & SSE1_EDX_MASK)
sse_version = 1;
}
return sse_version;
#else
return 0;
#endif
}
#endif
static const struct matrix bradford_matrix = {{ { 0.8951f, 0.2664f,-0.1614f},
{-0.7502f, 1.7135f, 0.0367f},
{ 0.0389f,-0.0685f, 1.0296f}},
@ -1291,24 +1212,21 @@ qcms_transform* qcms_transform_create(
return NULL;
}
if (precache) {
#ifdef X86
if (sse_version_available() >= 2) {
if (in_type == QCMS_DATA_RGB_8)
transform->transform_fn = qcms_transform_data_rgb_out_lut_sse2;
else
transform->transform_fn = qcms_transform_data_rgba_out_lut_sse2;
#if defined(X86)
#if !defined(THE_SSE1)
if (in_type == QCMS_DATA_RGB_8)
transform->transform_fn = qcms_transform_data_rgb_out_lut_sse2;
else
transform->transform_fn = qcms_transform_data_rgba_out_lut_sse2;
#if !(defined(_MSC_VER) && defined(_M_AMD64))
/* Microsoft Compiler for x64 doesn't support MMX.
* SSE code uses MMX so that we disable on x64 */
} else
if (sse_version_available() >= 1) {
if (in_type == QCMS_DATA_RGB_8)
transform->transform_fn = qcms_transform_data_rgb_out_lut_sse1;
else
transform->transform_fn = qcms_transform_data_rgba_out_lut_sse1;
#elif !(defined(_MSC_VER) && defined(_M_AMD64)) || defined(THE_SSE1)
/* Microsoft Compiler for x64 doesn't support MMX.
* SSE code uses MMX so that we disable on x64 */
if (in_type == QCMS_DATA_RGB_8)
transform->transform_fn = qcms_transform_data_rgb_out_lut_sse1;
else
transform->transform_fn = qcms_transform_data_rgba_out_lut_sse1;
#endif
} else
#endif
#if (defined(__POWERPC__) || defined(__powerpc__) && !defined(__NO_FPRS__))
if (have_altivec()) {
@ -1316,14 +1234,14 @@ qcms_transform* qcms_transform_create(
transform->transform_fn = qcms_transform_data_rgb_out_lut_altivec;
else
transform->transform_fn = qcms_transform_data_rgba_out_lut_altivec;
} else
} else {
if (in_type == QCMS_DATA_RGB_8)
transform->transform_fn = qcms_transform_data_rgb_out_lut_precache;
else
transform->transform_fn = qcms_transform_data_rgba_out_lut_precache;
}
#endif
{
if (in_type == QCMS_DATA_RGB_8)
transform->transform_fn = qcms_transform_data_rgb_out_lut_precache;
else
transform->transform_fn = qcms_transform_data_rgba_out_lut_precache;
}
} else {
if (in_type == QCMS_DATA_RGB_8)
transform->transform_fn = qcms_transform_data_rgb_out_lut;

View File

@ -160,7 +160,7 @@ elif CONFIG['MOZ_WIDGET_TOOLKIT'] == 'windows':
]
# Are we targeting x86 or x64? If so, build gfxAlphaRecoverySSE2.cpp.
if CONFIG['INTEL_ARCHITECTURE']:
if CONFIG['INTEL_ARCHITECTURE'] and not CONFIG['THE_SSE1']:
SOURCES += ['gfxAlphaRecoverySSE2.cpp']
# The file uses SSE2 intrinsics, so it needs special compile flags on some
# compilers.

View File

@ -15,12 +15,13 @@ UNIFIED_SOURCES += [
'yuv_row_table.cpp',
]
if CONFIG['INTEL_ARCHITECTURE']:
if CONFIG['INTEL_ARCHITECTURE'] and not CONFIG['THE_SSE1']:
# These files use MMX and SSE2 intrinsics, so they need special compile flags
# on some compilers.
SOURCES += ['yuv_convert_sse2.cpp']
SOURCES['yuv_convert_sse2.cpp'].flags += CONFIG['SSE2_FLAGS']
if CONFIG['INTEL_ARCHITECTURE']:
# MSVC doesn't support MMX when targeting AMD64.
if CONFIG['CC_TYPE'] == 'clang-cl':
if CONFIG['CPU_ARCH'] == 'x86':
@ -33,7 +34,7 @@ if CONFIG['INTEL_ARCHITECTURE']:
if CONFIG['CC_TYPE'] == 'clang-cl':
if CONFIG['CPU_ARCH'] == 'x86_64' or \
(CONFIG['CPU_ARCH'] == 'x86' and CONFIG['CC_TYPE'] == 'clang-cl'):
(CONFIG['CPU_ARCH'] == 'x86' and CONFIG['CC_TYPE'] == 'clang-cl' and not CONFIG['THE_SSE1']):
SOURCES += [
'yuv_row_win64.cpp',
]

View File

@ -579,11 +579,16 @@ case "$target" in
dnl more recent, so set that explicitly here unless another
dnl target arch has already been set.
changequote(,)
if test "$THE_SSE1" = 1; then
SSE2_FLAGS="-arch:SSE"
else
SSE2_FLAGS="-arch:SSE2"
fi
if test -z `echo $CFLAGS | grep -i [-/]arch:` ; then
CFLAGS="$CFLAGS -arch:SSE2"
CFLAGS="$CFLAGS $SSE2_FLAGS"
fi
if test -z `echo $CXXFLAGS | grep -i [-/]arch:` ; then
CXXFLAGS="$CXXFLAGS -arch:SSE2"
CXXFLAGS="$CXXFLAGS $SSE2_FLAGS"
fi
changequote([,])
fi

View File

@ -69,11 +69,19 @@
#define HAVE_MSA 0
#define HAVE_NEON 0
#define HAVE_SSE 1
#ifndef THE_SSE1
#define HAVE_SSE2 1
#define HAVE_SSE3 1
#define HAVE_SSE4_1 1
#define HAVE_SSE4_2 1
#define HAVE_SSSE3 1
#else
#define HAVE_SSE2 0
#define HAVE_SSE3 0
#define HAVE_SSE4_1 0
#define HAVE_SSE4_2 0
#define HAVE_SSSE3 0
#endif
#define HAVE_VSX 0
#define HAVE_WXWIDGETS 0
#define INCLUDE_INSTALL_DIR INSTALLDIR/include

File diff suppressed because it is too large Load Diff

View File

@ -52,51 +52,71 @@ extern "C" {
#endif
void apply_selfguided_restoration_c(const uint8_t *dat, int width, int height, int stride, int eps, const int *xqd, uint8_t *dst, int dst_stride, int32_t *tmpbuf, int bit_depth, int highbd);
#ifndef THE_SSE1
void apply_selfguided_restoration_sse4_1(const uint8_t *dat, int width, int height, int stride, int eps, const int *xqd, uint8_t *dst, int dst_stride, int32_t *tmpbuf, int bit_depth, int highbd);
void apply_selfguided_restoration_avx2(const uint8_t *dat, int width, int height, int stride, int eps, const int *xqd, uint8_t *dst, int dst_stride, int32_t *tmpbuf, int bit_depth, int highbd);
#endif
RTCD_EXTERN void (*apply_selfguided_restoration)(const uint8_t *dat, int width, int height, int stride, int eps, const int *xqd, uint8_t *dst, int dst_stride, int32_t *tmpbuf, int bit_depth, int highbd);
void av1_build_compound_diffwtd_mask_c(uint8_t *mask, DIFFWTD_MASK_TYPE mask_type, const uint8_t *src0, int src0_stride, const uint8_t *src1, int src1_stride, int h, int w);
#ifndef THE_SSE1
void av1_build_compound_diffwtd_mask_sse4_1(uint8_t *mask, DIFFWTD_MASK_TYPE mask_type, const uint8_t *src0, int src0_stride, const uint8_t *src1, int src1_stride, int h, int w);
void av1_build_compound_diffwtd_mask_avx2(uint8_t *mask, DIFFWTD_MASK_TYPE mask_type, const uint8_t *src0, int src0_stride, const uint8_t *src1, int src1_stride, int h, int w);
#endif
RTCD_EXTERN void (*av1_build_compound_diffwtd_mask)(uint8_t *mask, DIFFWTD_MASK_TYPE mask_type, const uint8_t *src0, int src0_stride, const uint8_t *src1, int src1_stride, int h, int w);
void av1_build_compound_diffwtd_mask_d16_c(uint8_t *mask, DIFFWTD_MASK_TYPE mask_type, const CONV_BUF_TYPE *src0, int src0_stride, const CONV_BUF_TYPE *src1, int src1_stride, int h, int w, ConvolveParams *conv_params, int bd);
#ifndef THE_SSE1
void av1_build_compound_diffwtd_mask_d16_sse4_1(uint8_t *mask, DIFFWTD_MASK_TYPE mask_type, const CONV_BUF_TYPE *src0, int src0_stride, const CONV_BUF_TYPE *src1, int src1_stride, int h, int w, ConvolveParams *conv_params, int bd);
void av1_build_compound_diffwtd_mask_d16_avx2(uint8_t *mask, DIFFWTD_MASK_TYPE mask_type, const CONV_BUF_TYPE *src0, int src0_stride, const CONV_BUF_TYPE *src1, int src1_stride, int h, int w, ConvolveParams *conv_params, int bd);
#endif
RTCD_EXTERN void (*av1_build_compound_diffwtd_mask_d16)(uint8_t *mask, DIFFWTD_MASK_TYPE mask_type, const CONV_BUF_TYPE *src0, int src0_stride, const CONV_BUF_TYPE *src1, int src1_stride, int h, int w, ConvolveParams *conv_params, int bd);
void av1_build_compound_diffwtd_mask_highbd_c(uint8_t *mask, DIFFWTD_MASK_TYPE mask_type, const uint8_t *src0, int src0_stride, const uint8_t *src1, int src1_stride, int h, int w, int bd);
#ifndef THE_SSE1
void av1_build_compound_diffwtd_mask_highbd_ssse3(uint8_t *mask, DIFFWTD_MASK_TYPE mask_type, const uint8_t *src0, int src0_stride, const uint8_t *src1, int src1_stride, int h, int w, int bd);
void av1_build_compound_diffwtd_mask_highbd_avx2(uint8_t *mask, DIFFWTD_MASK_TYPE mask_type, const uint8_t *src0, int src0_stride, const uint8_t *src1, int src1_stride, int h, int w, int bd);
#endif
RTCD_EXTERN void (*av1_build_compound_diffwtd_mask_highbd)(uint8_t *mask, DIFFWTD_MASK_TYPE mask_type, const uint8_t *src0, int src0_stride, const uint8_t *src1, int src1_stride, int h, int w, int bd);
void av1_convolve_2d_copy_sr_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
#ifndef THE_SSE1
void av1_convolve_2d_copy_sr_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_convolve_2d_copy_sr_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
#endif
RTCD_EXTERN void (*av1_convolve_2d_copy_sr)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_convolve_2d_scale_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_qn, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params);
#ifndef THE_SSE1
void av1_convolve_2d_scale_sse4_1(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_qn, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params);
#endif
RTCD_EXTERN void (*av1_convolve_2d_scale)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_qn, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params);
void av1_convolve_2d_sr_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
#ifndef THE_SSE1
void av1_convolve_2d_sr_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_convolve_2d_sr_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
#endif
RTCD_EXTERN void (*av1_convolve_2d_sr)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_convolve_horiz_rs_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const int16_t *x_filters, int x0_qn, int x_step_qn);
#ifndef THE_SSE1
void av1_convolve_horiz_rs_sse4_1(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const int16_t *x_filters, int x0_qn, int x_step_qn);
#endif
RTCD_EXTERN void (*av1_convolve_horiz_rs)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const int16_t *x_filters, int x0_qn, int x_step_qn);
void av1_convolve_x_sr_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
#ifndef THE_SSE1
void av1_convolve_x_sr_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_convolve_x_sr_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
#endif
RTCD_EXTERN void (*av1_convolve_x_sr)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_convolve_y_sr_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
#ifndef THE_SSE1
void av1_convolve_y_sr_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_convolve_y_sr_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
#endif
RTCD_EXTERN void (*av1_convolve_y_sr)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_dr_prediction_z1_c(uint8_t *dst, ptrdiff_t stride, int bw, int bh, const uint8_t *above, const uint8_t *left, int upsample_above, int dx, int dy);
@ -109,15 +129,21 @@ void av1_dr_prediction_z3_c(uint8_t *dst, ptrdiff_t stride, int bw, int bh, cons
#define av1_dr_prediction_z3 av1_dr_prediction_z3_c
void av1_filter_intra_edge_c(uint8_t *p, int sz, int strength);
#ifndef THE_SSE1
void av1_filter_intra_edge_sse4_1(uint8_t *p, int sz, int strength);
#endif
RTCD_EXTERN void (*av1_filter_intra_edge)(uint8_t *p, int sz, int strength);
void av1_filter_intra_edge_high_c(uint16_t *p, int sz, int strength);
#ifndef THE_SSE1
void av1_filter_intra_edge_high_sse4_1(uint16_t *p, int sz, int strength);
#endif
RTCD_EXTERN void (*av1_filter_intra_edge_high)(uint16_t *p, int sz, int strength);
void av1_filter_intra_predictor_c(uint8_t *dst, ptrdiff_t stride, TX_SIZE tx_size, const uint8_t *above, const uint8_t *left, int mode);
#ifndef THE_SSE1
void av1_filter_intra_predictor_sse4_1(uint8_t *dst, ptrdiff_t stride, TX_SIZE tx_size, const uint8_t *above, const uint8_t *left, int mode);
#endif
RTCD_EXTERN void (*av1_filter_intra_predictor)(uint8_t *dst, ptrdiff_t stride, TX_SIZE tx_size, const uint8_t *above, const uint8_t *left, int mode);
void av1_highbd_convolve8_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps);
@ -130,17 +156,23 @@ void av1_highbd_convolve8_vert_c(const uint8_t *src, ptrdiff_t src_stride, uint8
#define av1_highbd_convolve8_vert av1_highbd_convolve8_vert_c
void av1_highbd_convolve_2d_copy_sr_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
#ifndef THE_SSE1
void av1_highbd_convolve_2d_copy_sr_sse2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_convolve_2d_copy_sr_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
#endif
RTCD_EXTERN void (*av1_highbd_convolve_2d_copy_sr)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_convolve_2d_scale_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params, int bd);
#ifndef THE_SSE1
void av1_highbd_convolve_2d_scale_sse4_1(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params, int bd);
#endif
RTCD_EXTERN void (*av1_highbd_convolve_2d_scale)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params, int bd);
void av1_highbd_convolve_2d_sr_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
#ifndef THE_SSE1
void av1_highbd_convolve_2d_sr_ssse3(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_convolve_2d_sr_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
#endif
RTCD_EXTERN void (*av1_highbd_convolve_2d_sr)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_convolve_avg_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps);
@ -150,17 +182,23 @@ void av1_highbd_convolve_copy_c(const uint8_t *src, ptrdiff_t src_stride, uint8_
#define av1_highbd_convolve_copy av1_highbd_convolve_copy_c
void av1_highbd_convolve_horiz_rs_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const int16_t *x_filters, int x0_qn, int x_step_qn, int bd);
#ifndef THE_SSE1
void av1_highbd_convolve_horiz_rs_sse4_1(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const int16_t *x_filters, int x0_qn, int x_step_qn, int bd);
#endif
RTCD_EXTERN void (*av1_highbd_convolve_horiz_rs)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const int16_t *x_filters, int x0_qn, int x_step_qn, int bd);
void av1_highbd_convolve_x_sr_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
#ifndef THE_SSE1
void av1_highbd_convolve_x_sr_ssse3(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_convolve_x_sr_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
#endif
RTCD_EXTERN void (*av1_highbd_convolve_x_sr)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_convolve_y_sr_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
#ifndef THE_SSE1
void av1_highbd_convolve_y_sr_ssse3(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_convolve_y_sr_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
#endif
RTCD_EXTERN void (*av1_highbd_convolve_y_sr)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_dr_prediction_z1_c(uint16_t *dst, ptrdiff_t stride, int bw, int bh, const uint16_t *above, const uint16_t *left, int upsample_above, int dx, int dy, int bd);
@ -173,33 +211,47 @@ void av1_highbd_dr_prediction_z3_c(uint16_t *dst, ptrdiff_t stride, int bw, int
#define av1_highbd_dr_prediction_z3 av1_highbd_dr_prediction_z3_c
void av1_highbd_inv_txfm_add_c(const tran_low_t *dqcoeff, uint8_t *dst, int stride, const TxfmParam *txfm_param);
#ifndef THE_SSE1
void av1_highbd_inv_txfm_add_sse4_1(const tran_low_t *dqcoeff, uint8_t *dst, int stride, const TxfmParam *txfm_param);
void av1_highbd_inv_txfm_add_avx2(const tran_low_t *dqcoeff, uint8_t *dst, int stride, const TxfmParam *txfm_param);
#endif
RTCD_EXTERN void (*av1_highbd_inv_txfm_add)(const tran_low_t *dqcoeff, uint8_t *dst, int stride, const TxfmParam *txfm_param);
void av1_highbd_inv_txfm_add_16x16_c(const tran_low_t *dqcoeff, uint8_t *dst, int stride, const TxfmParam *txfm_param);
#ifndef THE_SSE1
void av1_highbd_inv_txfm_add_16x16_sse4_1(const tran_low_t *dqcoeff, uint8_t *dst, int stride, const TxfmParam *txfm_param);
#endif
RTCD_EXTERN void (*av1_highbd_inv_txfm_add_16x16)(const tran_low_t *dqcoeff, uint8_t *dst, int stride, const TxfmParam *txfm_param);
void av1_highbd_inv_txfm_add_16x8_c(const tran_low_t *dqcoeff, uint8_t *dst, int stride, const TxfmParam *txfm_param);
#ifndef THE_SSE1
void av1_highbd_inv_txfm_add_16x8_sse4_1(const tran_low_t *dqcoeff, uint8_t *dst, int stride, const TxfmParam *txfm_param);
#endif
RTCD_EXTERN void (*av1_highbd_inv_txfm_add_16x8)(const tran_low_t *dqcoeff, uint8_t *dst, int stride, const TxfmParam *txfm_param);
void av1_highbd_inv_txfm_add_32x32_c(const tran_low_t *dqcoeff, uint8_t *dst, int stride, const TxfmParam *txfm_param);
#ifndef THE_SSE1
void av1_highbd_inv_txfm_add_32x32_sse4_1(const tran_low_t *dqcoeff, uint8_t *dst, int stride, const TxfmParam *txfm_param);
void av1_highbd_inv_txfm_add_32x32_avx2(const tran_low_t *dqcoeff, uint8_t *dst, int stride, const TxfmParam *txfm_param);
#endif
RTCD_EXTERN void (*av1_highbd_inv_txfm_add_32x32)(const tran_low_t *dqcoeff, uint8_t *dst, int stride, const TxfmParam *txfm_param);
void av1_highbd_inv_txfm_add_4x4_c(const tran_low_t *dqcoeff, uint8_t *dst, int stride, const TxfmParam *txfm_param);
#ifndef THE_SSE1
void av1_highbd_inv_txfm_add_4x4_sse4_1(const tran_low_t *dqcoeff, uint8_t *dst, int stride, const TxfmParam *txfm_param);
#endif
RTCD_EXTERN void (*av1_highbd_inv_txfm_add_4x4)(const tran_low_t *dqcoeff, uint8_t *dst, int stride, const TxfmParam *txfm_param);
void av1_highbd_inv_txfm_add_8x16_c(const tran_low_t *dqcoeff, uint8_t *dst, int stride, const TxfmParam *txfm_param);
#ifndef THE_SSE1
void av1_highbd_inv_txfm_add_8x16_sse4_1(const tran_low_t *dqcoeff, uint8_t *dst, int stride, const TxfmParam *txfm_param);
#endif
RTCD_EXTERN void (*av1_highbd_inv_txfm_add_8x16)(const tran_low_t *dqcoeff, uint8_t *dst, int stride, const TxfmParam *txfm_param);
void av1_highbd_inv_txfm_add_8x8_c(const tran_low_t *dqcoeff, uint8_t *dst, int stride, const TxfmParam *txfm_param);
#ifndef THE_SSE1
void av1_highbd_inv_txfm_add_8x8_sse4_1(const tran_low_t *dqcoeff, uint8_t *dst, int stride, const TxfmParam *txfm_param);
#endif
RTCD_EXTERN void (*av1_highbd_inv_txfm_add_8x8)(const tran_low_t *dqcoeff, uint8_t *dst, int stride, const TxfmParam *txfm_param);
void av1_highbd_iwht4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride, int bd);
@ -209,32 +261,44 @@ void av1_highbd_iwht4x4_1_add_c(const tran_low_t *input, uint8_t *dest, int dest
#define av1_highbd_iwht4x4_1_add av1_highbd_iwht4x4_1_add_c
void av1_highbd_jnt_convolve_2d_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
#ifndef THE_SSE1
void av1_highbd_jnt_convolve_2d_sse4_1(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_jnt_convolve_2d_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
#endif
RTCD_EXTERN void (*av1_highbd_jnt_convolve_2d)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_jnt_convolve_2d_copy_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
#ifndef THE_SSE1
void av1_highbd_jnt_convolve_2d_copy_sse4_1(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_jnt_convolve_2d_copy_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
#endif
RTCD_EXTERN void (*av1_highbd_jnt_convolve_2d_copy)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_jnt_convolve_x_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
#ifndef THE_SSE1
void av1_highbd_jnt_convolve_x_sse4_1(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_jnt_convolve_x_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
#endif
RTCD_EXTERN void (*av1_highbd_jnt_convolve_x)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_jnt_convolve_y_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
#ifndef THE_SSE1
void av1_highbd_jnt_convolve_y_sse4_1(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_jnt_convolve_y_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
#endif
RTCD_EXTERN void (*av1_highbd_jnt_convolve_y)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_warp_affine_c(const int32_t *mat, const uint16_t *ref, int width, int height, int stride, uint16_t *pred, int p_col, int p_row, int p_width, int p_height, int p_stride, int subsampling_x, int subsampling_y, int bd, ConvolveParams *conv_params, int16_t alpha, int16_t beta, int16_t gamma, int16_t delta);
#ifndef THE_SSE1
void av1_highbd_warp_affine_sse4_1(const int32_t *mat, const uint16_t *ref, int width, int height, int stride, uint16_t *pred, int p_col, int p_row, int p_width, int p_height, int p_stride, int subsampling_x, int subsampling_y, int bd, ConvolveParams *conv_params, int16_t alpha, int16_t beta, int16_t gamma, int16_t delta);
#endif
RTCD_EXTERN void (*av1_highbd_warp_affine)(const int32_t *mat, const uint16_t *ref, int width, int height, int stride, uint16_t *pred, int p_col, int p_row, int p_width, int p_height, int p_stride, int subsampling_x, int subsampling_y, int bd, ConvolveParams *conv_params, int16_t alpha, int16_t beta, int16_t gamma, int16_t delta);
void av1_highbd_wiener_convolve_add_src_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, const ConvolveParams *conv_params, int bps);
#ifndef THE_SSE1
void av1_highbd_wiener_convolve_add_src_ssse3(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, const ConvolveParams *conv_params, int bps);
void av1_highbd_wiener_convolve_add_src_avx2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, const ConvolveParams *conv_params, int bps);
#endif
RTCD_EXTERN void (*av1_highbd_wiener_convolve_add_src)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, const ConvolveParams *conv_params, int bps);
void av1_inv_txfm2d_add_16x16_c(const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd);
@ -268,7 +332,9 @@ void av1_inv_txfm2d_add_4x16_c(const int32_t *input, uint16_t *output, int strid
#define av1_inv_txfm2d_add_4x16 av1_inv_txfm2d_add_4x16_c
void av1_inv_txfm2d_add_4x4_c(const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd);
#ifndef THE_SSE1
void av1_inv_txfm2d_add_4x4_sse4_1(const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd);
#endif
RTCD_EXTERN void (*av1_inv_txfm2d_add_4x4)(const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd);
void av1_inv_txfm2d_add_4x8_c(const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd);
@ -293,135 +359,183 @@ void av1_inv_txfm2d_add_8x4_c(const int32_t *input, uint16_t *output, int stride
#define av1_inv_txfm2d_add_8x4 av1_inv_txfm2d_add_8x4_c
void av1_inv_txfm2d_add_8x8_c(const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd);
#ifndef THE_SSE1
void av1_inv_txfm2d_add_8x8_sse4_1(const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd);
#endif
RTCD_EXTERN void (*av1_inv_txfm2d_add_8x8)(const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd);
void av1_inv_txfm_add_c(const tran_low_t *dqcoeff, uint8_t *dst, int stride, const TxfmParam *txfm_param);
#ifndef THE_SSE1
void av1_inv_txfm_add_ssse3(const tran_low_t *dqcoeff, uint8_t *dst, int stride, const TxfmParam *txfm_param);
void av1_inv_txfm_add_avx2(const tran_low_t *dqcoeff, uint8_t *dst, int stride, const TxfmParam *txfm_param);
#endif
RTCD_EXTERN void (*av1_inv_txfm_add)(const tran_low_t *dqcoeff, uint8_t *dst, int stride, const TxfmParam *txfm_param);
void av1_jnt_convolve_2d_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
#ifndef THE_SSE1
void av1_jnt_convolve_2d_ssse3(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_jnt_convolve_2d_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
#endif
RTCD_EXTERN void (*av1_jnt_convolve_2d)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_jnt_convolve_2d_copy_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
#ifndef THE_SSE1
void av1_jnt_convolve_2d_copy_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_jnt_convolve_2d_copy_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
#endif
RTCD_EXTERN void (*av1_jnt_convolve_2d_copy)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_jnt_convolve_x_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
#ifndef THE_SSE1
void av1_jnt_convolve_x_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_jnt_convolve_x_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
#endif
RTCD_EXTERN void (*av1_jnt_convolve_x)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_jnt_convolve_y_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
#ifndef THE_SSE1
void av1_jnt_convolve_y_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_jnt_convolve_y_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
#endif
RTCD_EXTERN void (*av1_jnt_convolve_y)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
int av1_selfguided_restoration_c(const uint8_t *dgd8, int width, int height,
int dgd_stride, int32_t *flt0, int32_t *flt1, int flt_stride,
int sgr_params_idx, int bit_depth, int highbd);
#ifndef THE_SSE1
int av1_selfguided_restoration_sse4_1(const uint8_t *dgd8, int width, int height,
int dgd_stride, int32_t *flt0, int32_t *flt1, int flt_stride,
int sgr_params_idx, int bit_depth, int highbd);
int av1_selfguided_restoration_avx2(const uint8_t *dgd8, int width, int height,
int dgd_stride, int32_t *flt0, int32_t *flt1, int flt_stride,
int sgr_params_idx, int bit_depth, int highbd);
#endif
RTCD_EXTERN int (*av1_selfguided_restoration)(const uint8_t *dgd8, int width, int height,
int dgd_stride, int32_t *flt0, int32_t *flt1, int flt_stride,
int sgr_params_idx, int bit_depth, int highbd);
void av1_upsample_intra_edge_c(uint8_t *p, int sz);
#ifndef THE_SSE1
void av1_upsample_intra_edge_sse4_1(uint8_t *p, int sz);
#endif
RTCD_EXTERN void (*av1_upsample_intra_edge)(uint8_t *p, int sz);
void av1_upsample_intra_edge_high_c(uint16_t *p, int sz, int bd);
#ifndef THE_SSE1
void av1_upsample_intra_edge_high_sse4_1(uint16_t *p, int sz, int bd);
#endif
RTCD_EXTERN void (*av1_upsample_intra_edge_high)(uint16_t *p, int sz, int bd);
void av1_warp_affine_c(const int32_t *mat, const uint8_t *ref, int width, int height, int stride, uint8_t *pred, int p_col, int p_row, int p_width, int p_height, int p_stride, int subsampling_x, int subsampling_y, ConvolveParams *conv_params, int16_t alpha, int16_t beta, int16_t gamma, int16_t delta);
#ifndef THE_SSE1
void av1_warp_affine_sse4_1(const int32_t *mat, const uint8_t *ref, int width, int height, int stride, uint8_t *pred, int p_col, int p_row, int p_width, int p_height, int p_stride, int subsampling_x, int subsampling_y, ConvolveParams *conv_params, int16_t alpha, int16_t beta, int16_t gamma, int16_t delta);
#endif
RTCD_EXTERN void (*av1_warp_affine)(const int32_t *mat, const uint8_t *ref, int width, int height, int stride, uint8_t *pred, int p_col, int p_row, int p_width, int p_height, int p_stride, int subsampling_x, int subsampling_y, ConvolveParams *conv_params, int16_t alpha, int16_t beta, int16_t gamma, int16_t delta);
void av1_wiener_convolve_add_src_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, const ConvolveParams *conv_params);
#ifndef THE_SSE1
void av1_wiener_convolve_add_src_sse2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, const ConvolveParams *conv_params);
void av1_wiener_convolve_add_src_avx2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, const ConvolveParams *conv_params);
#endif
RTCD_EXTERN void (*av1_wiener_convolve_add_src)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, const ConvolveParams *conv_params);
void cdef_filter_block_c(uint8_t *dst8, uint16_t *dst16, int dstride, const uint16_t *in, int pri_strength, int sec_strength, int dir, int pri_damping, int sec_damping, int bsize, int max, int coeff_shift);
#ifndef THE_SSE1
void cdef_filter_block_sse2(uint8_t *dst8, uint16_t *dst16, int dstride, const uint16_t *in, int pri_strength, int sec_strength, int dir, int pri_damping, int sec_damping, int bsize, int max, int coeff_shift);
void cdef_filter_block_ssse3(uint8_t *dst8, uint16_t *dst16, int dstride, const uint16_t *in, int pri_strength, int sec_strength, int dir, int pri_damping, int sec_damping, int bsize, int max, int coeff_shift);
void cdef_filter_block_sse4_1(uint8_t *dst8, uint16_t *dst16, int dstride, const uint16_t *in, int pri_strength, int sec_strength, int dir, int pri_damping, int sec_damping, int bsize, int max, int coeff_shift);
void cdef_filter_block_avx2(uint8_t *dst8, uint16_t *dst16, int dstride, const uint16_t *in, int pri_strength, int sec_strength, int dir, int pri_damping, int sec_damping, int bsize, int max, int coeff_shift);
#endif
RTCD_EXTERN void (*cdef_filter_block)(uint8_t *dst8, uint16_t *dst16, int dstride, const uint16_t *in, int pri_strength, int sec_strength, int dir, int pri_damping, int sec_damping, int bsize, int max, int coeff_shift);
int cdef_find_dir_c(const uint16_t *img, int stride, int32_t *var, int coeff_shift);
#ifndef THE_SSE1
int cdef_find_dir_sse2(const uint16_t *img, int stride, int32_t *var, int coeff_shift);
int cdef_find_dir_ssse3(const uint16_t *img, int stride, int32_t *var, int coeff_shift);
int cdef_find_dir_sse4_1(const uint16_t *img, int stride, int32_t *var, int coeff_shift);
int cdef_find_dir_avx2(const uint16_t *img, int stride, int32_t *var, int coeff_shift);
#endif
RTCD_EXTERN int (*cdef_find_dir)(const uint16_t *img, int stride, int32_t *var, int coeff_shift);
cfl_subsample_hbd_fn cfl_get_luma_subsampling_420_hbd_c(TX_SIZE tx_size);
#ifndef THE_SSE1
cfl_subsample_hbd_fn cfl_get_luma_subsampling_420_hbd_ssse3(TX_SIZE tx_size);
cfl_subsample_hbd_fn cfl_get_luma_subsampling_420_hbd_avx2(TX_SIZE tx_size);
#endif
RTCD_EXTERN cfl_subsample_hbd_fn (*cfl_get_luma_subsampling_420_hbd)(TX_SIZE tx_size);
cfl_subsample_lbd_fn cfl_get_luma_subsampling_420_lbd_c(TX_SIZE tx_size);
#ifndef THE_SSE1
cfl_subsample_lbd_fn cfl_get_luma_subsampling_420_lbd_ssse3(TX_SIZE tx_size);
cfl_subsample_lbd_fn cfl_get_luma_subsampling_420_lbd_avx2(TX_SIZE tx_size);
#endif
RTCD_EXTERN cfl_subsample_lbd_fn (*cfl_get_luma_subsampling_420_lbd)(TX_SIZE tx_size);
cfl_subsample_hbd_fn cfl_get_luma_subsampling_422_hbd_c(TX_SIZE tx_size);
#ifndef THE_SSE1
cfl_subsample_hbd_fn cfl_get_luma_subsampling_422_hbd_ssse3(TX_SIZE tx_size);
cfl_subsample_hbd_fn cfl_get_luma_subsampling_422_hbd_avx2(TX_SIZE tx_size);
#endif
RTCD_EXTERN cfl_subsample_hbd_fn (*cfl_get_luma_subsampling_422_hbd)(TX_SIZE tx_size);
cfl_subsample_lbd_fn cfl_get_luma_subsampling_422_lbd_c(TX_SIZE tx_size);
#ifndef THE_SSE1
cfl_subsample_lbd_fn cfl_get_luma_subsampling_422_lbd_ssse3(TX_SIZE tx_size);
cfl_subsample_lbd_fn cfl_get_luma_subsampling_422_lbd_avx2(TX_SIZE tx_size);
#endif
RTCD_EXTERN cfl_subsample_lbd_fn (*cfl_get_luma_subsampling_422_lbd)(TX_SIZE tx_size);
cfl_subsample_hbd_fn cfl_get_luma_subsampling_444_hbd_c(TX_SIZE tx_size);
#ifndef THE_SSE1
cfl_subsample_hbd_fn cfl_get_luma_subsampling_444_hbd_ssse3(TX_SIZE tx_size);
cfl_subsample_hbd_fn cfl_get_luma_subsampling_444_hbd_avx2(TX_SIZE tx_size);
#endif
RTCD_EXTERN cfl_subsample_hbd_fn (*cfl_get_luma_subsampling_444_hbd)(TX_SIZE tx_size);
cfl_subsample_lbd_fn cfl_get_luma_subsampling_444_lbd_c(TX_SIZE tx_size);
#ifndef THE_SSE1
cfl_subsample_lbd_fn cfl_get_luma_subsampling_444_lbd_ssse3(TX_SIZE tx_size);
cfl_subsample_lbd_fn cfl_get_luma_subsampling_444_lbd_avx2(TX_SIZE tx_size);
#endif
RTCD_EXTERN cfl_subsample_lbd_fn (*cfl_get_luma_subsampling_444_lbd)(TX_SIZE tx_size);
void copy_rect8_16bit_to_16bit_c(uint16_t *dst, int dstride, const uint16_t *src, int sstride, int v, int h);
#ifndef THE_SSE1
void copy_rect8_16bit_to_16bit_sse2(uint16_t *dst, int dstride, const uint16_t *src, int sstride, int v, int h);
void copy_rect8_16bit_to_16bit_ssse3(uint16_t *dst, int dstride, const uint16_t *src, int sstride, int v, int h);
void copy_rect8_16bit_to_16bit_sse4_1(uint16_t *dst, int dstride, const uint16_t *src, int sstride, int v, int h);
void copy_rect8_16bit_to_16bit_avx2(uint16_t *dst, int dstride, const uint16_t *src, int sstride, int v, int h);
#endif
RTCD_EXTERN void (*copy_rect8_16bit_to_16bit)(uint16_t *dst, int dstride, const uint16_t *src, int sstride, int v, int h);
void copy_rect8_8bit_to_16bit_c(uint16_t *dst, int dstride, const uint8_t *src, int sstride, int v, int h);
#ifndef THE_SSE1
void copy_rect8_8bit_to_16bit_sse2(uint16_t *dst, int dstride, const uint8_t *src, int sstride, int v, int h);
void copy_rect8_8bit_to_16bit_ssse3(uint16_t *dst, int dstride, const uint8_t *src, int sstride, int v, int h);
void copy_rect8_8bit_to_16bit_sse4_1(uint16_t *dst, int dstride, const uint8_t *src, int sstride, int v, int h);
void copy_rect8_8bit_to_16bit_avx2(uint16_t *dst, int dstride, const uint8_t *src, int sstride, int v, int h);
#endif
RTCD_EXTERN void (*copy_rect8_8bit_to_16bit)(uint16_t *dst, int dstride, const uint8_t *src, int sstride, int v, int h);
cfl_predict_hbd_fn get_predict_hbd_fn_c(TX_SIZE tx_size);
#ifndef THE_SSE1
cfl_predict_hbd_fn get_predict_hbd_fn_ssse3(TX_SIZE tx_size);
cfl_predict_hbd_fn get_predict_hbd_fn_avx2(TX_SIZE tx_size);
#endif
RTCD_EXTERN cfl_predict_hbd_fn (*get_predict_hbd_fn)(TX_SIZE tx_size);
cfl_predict_lbd_fn get_predict_lbd_fn_c(TX_SIZE tx_size);
#ifndef THE_SSE1
cfl_predict_lbd_fn get_predict_lbd_fn_ssse3(TX_SIZE tx_size);
cfl_predict_lbd_fn get_predict_lbd_fn_avx2(TX_SIZE tx_size);
#endif
RTCD_EXTERN cfl_predict_lbd_fn (*get_predict_lbd_fn)(TX_SIZE tx_size);
cfl_subtract_average_fn get_subtract_average_fn_c(TX_SIZE tx_size);
#ifndef THE_SSE1
cfl_subtract_average_fn get_subtract_average_fn_sse2(TX_SIZE tx_size);
cfl_subtract_average_fn get_subtract_average_fn_avx2(TX_SIZE tx_size);
#endif
RTCD_EXTERN cfl_subtract_average_fn (*get_subtract_average_fn)(TX_SIZE tx_size);
void av1_rtcd(void);
@ -430,171 +544,173 @@ void av1_rtcd(void);
#include "aom_ports/x86.h"
static void setup_rtcd_internal(void)
{
apply_selfguided_restoration = apply_selfguided_restoration_c;
av1_build_compound_diffwtd_mask = av1_build_compound_diffwtd_mask_c;
av1_build_compound_diffwtd_mask_d16 = av1_build_compound_diffwtd_mask_d16_c;
av1_build_compound_diffwtd_mask_highbd = av1_build_compound_diffwtd_mask_highbd_c;
av1_convolve_2d_copy_sr = av1_convolve_2d_copy_sr_c;
av1_convolve_2d_scale = av1_convolve_2d_scale_c;
av1_convolve_2d_sr = av1_convolve_2d_sr_c;
av1_convolve_horiz_rs = av1_convolve_horiz_rs_c;
av1_convolve_x_sr = av1_convolve_x_sr_c;
av1_convolve_y_sr = av1_convolve_y_sr_c;
av1_filter_intra_edge = av1_filter_intra_edge_c;
av1_filter_intra_edge_high = av1_filter_intra_edge_high_c;
av1_filter_intra_predictor = av1_filter_intra_predictor_c;
av1_highbd_convolve_2d_copy_sr = av1_highbd_convolve_2d_copy_sr_c;
av1_highbd_convolve_2d_scale = av1_highbd_convolve_2d_scale_c;
av1_highbd_convolve_2d_sr = av1_highbd_convolve_2d_sr_c;
av1_highbd_convolve_horiz_rs = av1_highbd_convolve_horiz_rs_c;
av1_highbd_convolve_x_sr = av1_highbd_convolve_x_sr_c;
av1_highbd_convolve_y_sr = av1_highbd_convolve_y_sr_c;
av1_highbd_inv_txfm_add = av1_highbd_inv_txfm_add_c;
av1_highbd_inv_txfm_add_16x16 = av1_highbd_inv_txfm_add_16x16_c;
av1_highbd_inv_txfm_add_16x8 = av1_highbd_inv_txfm_add_16x8_c;
av1_highbd_inv_txfm_add_32x32 = av1_highbd_inv_txfm_add_32x32_c;
av1_highbd_inv_txfm_add_4x4 = av1_highbd_inv_txfm_add_4x4_c;
av1_highbd_inv_txfm_add_8x16 = av1_highbd_inv_txfm_add_8x16_c;
av1_highbd_inv_txfm_add_8x8 = av1_highbd_inv_txfm_add_8x8_c;
av1_highbd_jnt_convolve_2d = av1_highbd_jnt_convolve_2d_c;
av1_highbd_jnt_convolve_2d_copy = av1_highbd_jnt_convolve_2d_copy_c;
av1_highbd_jnt_convolve_x = av1_highbd_jnt_convolve_x_c;
av1_highbd_jnt_convolve_y = av1_highbd_jnt_convolve_y_c;
av1_highbd_warp_affine = av1_highbd_warp_affine_c;
av1_highbd_wiener_convolve_add_src = av1_highbd_wiener_convolve_add_src_c;
av1_inv_txfm2d_add_4x4 = av1_inv_txfm2d_add_4x4_c;
av1_inv_txfm2d_add_8x8 = av1_inv_txfm2d_add_8x8_c;
av1_inv_txfm_add = av1_inv_txfm_add_c;
av1_jnt_convolve_2d = av1_jnt_convolve_2d_c;
av1_jnt_convolve_2d_copy = av1_jnt_convolve_2d_copy_c;
av1_jnt_convolve_x = av1_jnt_convolve_x_c;
av1_jnt_convolve_y = av1_jnt_convolve_y_c;
av1_selfguided_restoration = av1_selfguided_restoration_c;
av1_upsample_intra_edge_high = av1_upsample_intra_edge_high_c;
av1_upsample_intra_edge = av1_upsample_intra_edge_c;
av1_warp_affine = av1_warp_affine_c;
av1_wiener_convolve_add_src = av1_wiener_convolve_add_src_c;
cdef_filter_block = cdef_filter_block_c;
cdef_find_dir = cdef_find_dir_c;
cfl_get_luma_subsampling_420_hbd = cfl_get_luma_subsampling_420_hbd_c;
cfl_get_luma_subsampling_420_lbd = cfl_get_luma_subsampling_420_lbd_c;
cfl_get_luma_subsampling_422_hbd = cfl_get_luma_subsampling_422_hbd_c;
cfl_get_luma_subsampling_422_lbd = cfl_get_luma_subsampling_422_lbd_c;
cfl_get_luma_subsampling_444_hbd = cfl_get_luma_subsampling_444_hbd_c;
cfl_get_luma_subsampling_444_lbd = cfl_get_luma_subsampling_444_lbd_c;
copy_rect8_16bit_to_16bit = copy_rect8_16bit_to_16bit_c;
copy_rect8_8bit_to_16bit = copy_rect8_8bit_to_16bit_c;
get_predict_hbd_fn = get_predict_hbd_fn_c;
get_predict_lbd_fn = get_predict_lbd_fn_c;
get_subtract_average_fn = get_subtract_average_fn_c;
#ifndef THE_SSE1
int flags = x86_simd_caps();
(void)flags;
apply_selfguided_restoration = apply_selfguided_restoration_c;
if (flags & HAS_SSE4_1) apply_selfguided_restoration = apply_selfguided_restoration_sse4_1;
if (flags & HAS_AVX2) apply_selfguided_restoration = apply_selfguided_restoration_avx2;
av1_build_compound_diffwtd_mask = av1_build_compound_diffwtd_mask_c;
if (flags & HAS_SSE4_1) av1_build_compound_diffwtd_mask = av1_build_compound_diffwtd_mask_sse4_1;
if (flags & HAS_AVX2) av1_build_compound_diffwtd_mask = av1_build_compound_diffwtd_mask_avx2;
av1_build_compound_diffwtd_mask_d16 = av1_build_compound_diffwtd_mask_d16_c;
if (flags & HAS_SSE4_1) av1_build_compound_diffwtd_mask_d16 = av1_build_compound_diffwtd_mask_d16_sse4_1;
if (flags & HAS_AVX2) av1_build_compound_diffwtd_mask_d16 = av1_build_compound_diffwtd_mask_d16_avx2;
av1_build_compound_diffwtd_mask_highbd = av1_build_compound_diffwtd_mask_highbd_c;
if (flags & HAS_SSSE3) av1_build_compound_diffwtd_mask_highbd = av1_build_compound_diffwtd_mask_highbd_ssse3;
if (flags & HAS_AVX2) av1_build_compound_diffwtd_mask_highbd = av1_build_compound_diffwtd_mask_highbd_avx2;
av1_convolve_2d_copy_sr = av1_convolve_2d_copy_sr_c;
if (flags & HAS_SSE2) av1_convolve_2d_copy_sr = av1_convolve_2d_copy_sr_sse2;
if (flags & HAS_AVX2) av1_convolve_2d_copy_sr = av1_convolve_2d_copy_sr_avx2;
av1_convolve_2d_scale = av1_convolve_2d_scale_c;
if (flags & HAS_SSE4_1) av1_convolve_2d_scale = av1_convolve_2d_scale_sse4_1;
av1_convolve_2d_sr = av1_convolve_2d_sr_c;
if (flags & HAS_SSE2) av1_convolve_2d_sr = av1_convolve_2d_sr_sse2;
if (flags & HAS_AVX2) av1_convolve_2d_sr = av1_convolve_2d_sr_avx2;
av1_convolve_horiz_rs = av1_convolve_horiz_rs_c;
if (flags & HAS_SSE4_1) av1_convolve_horiz_rs = av1_convolve_horiz_rs_sse4_1;
av1_convolve_x_sr = av1_convolve_x_sr_c;
if (flags & HAS_SSE2) av1_convolve_x_sr = av1_convolve_x_sr_sse2;
if (flags & HAS_AVX2) av1_convolve_x_sr = av1_convolve_x_sr_avx2;
av1_convolve_y_sr = av1_convolve_y_sr_c;
if (flags & HAS_SSE2) av1_convolve_y_sr = av1_convolve_y_sr_sse2;
if (flags & HAS_AVX2) av1_convolve_y_sr = av1_convolve_y_sr_avx2;
av1_filter_intra_edge = av1_filter_intra_edge_c;
if (flags & HAS_SSE4_1) av1_filter_intra_edge = av1_filter_intra_edge_sse4_1;
av1_filter_intra_edge_high = av1_filter_intra_edge_high_c;
if (flags & HAS_SSE4_1) av1_filter_intra_edge_high = av1_filter_intra_edge_high_sse4_1;
av1_filter_intra_predictor = av1_filter_intra_predictor_c;
if (flags & HAS_SSE4_1) av1_filter_intra_predictor = av1_filter_intra_predictor_sse4_1;
av1_highbd_convolve_2d_copy_sr = av1_highbd_convolve_2d_copy_sr_c;
if (flags & HAS_SSE2) av1_highbd_convolve_2d_copy_sr = av1_highbd_convolve_2d_copy_sr_sse2;
if (flags & HAS_AVX2) av1_highbd_convolve_2d_copy_sr = av1_highbd_convolve_2d_copy_sr_avx2;
av1_highbd_convolve_2d_scale = av1_highbd_convolve_2d_scale_c;
if (flags & HAS_SSE4_1) av1_highbd_convolve_2d_scale = av1_highbd_convolve_2d_scale_sse4_1;
av1_highbd_convolve_2d_sr = av1_highbd_convolve_2d_sr_c;
if (flags & HAS_SSSE3) av1_highbd_convolve_2d_sr = av1_highbd_convolve_2d_sr_ssse3;
if (flags & HAS_AVX2) av1_highbd_convolve_2d_sr = av1_highbd_convolve_2d_sr_avx2;
av1_highbd_convolve_horiz_rs = av1_highbd_convolve_horiz_rs_c;
if (flags & HAS_SSE4_1) av1_highbd_convolve_horiz_rs = av1_highbd_convolve_horiz_rs_sse4_1;
av1_highbd_convolve_x_sr = av1_highbd_convolve_x_sr_c;
if (flags & HAS_SSSE3) av1_highbd_convolve_x_sr = av1_highbd_convolve_x_sr_ssse3;
if (flags & HAS_AVX2) av1_highbd_convolve_x_sr = av1_highbd_convolve_x_sr_avx2;
av1_highbd_convolve_y_sr = av1_highbd_convolve_y_sr_c;
if (flags & HAS_SSSE3) av1_highbd_convolve_y_sr = av1_highbd_convolve_y_sr_ssse3;
if (flags & HAS_AVX2) av1_highbd_convolve_y_sr = av1_highbd_convolve_y_sr_avx2;
av1_highbd_inv_txfm_add = av1_highbd_inv_txfm_add_c;
if (flags & HAS_SSE4_1) av1_highbd_inv_txfm_add = av1_highbd_inv_txfm_add_sse4_1;
if (flags & HAS_AVX2) av1_highbd_inv_txfm_add = av1_highbd_inv_txfm_add_avx2;
av1_highbd_inv_txfm_add_16x16 = av1_highbd_inv_txfm_add_16x16_c;
if (flags & HAS_SSE4_1) av1_highbd_inv_txfm_add_16x16 = av1_highbd_inv_txfm_add_16x16_sse4_1;
av1_highbd_inv_txfm_add_16x8 = av1_highbd_inv_txfm_add_16x8_c;
if (flags & HAS_SSE4_1) av1_highbd_inv_txfm_add_16x8 = av1_highbd_inv_txfm_add_16x8_sse4_1;
av1_highbd_inv_txfm_add_32x32 = av1_highbd_inv_txfm_add_32x32_c;
if (flags & HAS_SSE4_1) av1_highbd_inv_txfm_add_32x32 = av1_highbd_inv_txfm_add_32x32_sse4_1;
if (flags & HAS_AVX2) av1_highbd_inv_txfm_add_32x32 = av1_highbd_inv_txfm_add_32x32_avx2;
av1_highbd_inv_txfm_add_4x4 = av1_highbd_inv_txfm_add_4x4_c;
if (flags & HAS_SSE4_1) av1_highbd_inv_txfm_add_4x4 = av1_highbd_inv_txfm_add_4x4_sse4_1;
av1_highbd_inv_txfm_add_8x16 = av1_highbd_inv_txfm_add_8x16_c;
if (flags & HAS_SSE4_1) av1_highbd_inv_txfm_add_8x16 = av1_highbd_inv_txfm_add_8x16_sse4_1;
av1_highbd_inv_txfm_add_8x8 = av1_highbd_inv_txfm_add_8x8_c;
if (flags & HAS_SSE4_1) av1_highbd_inv_txfm_add_8x8 = av1_highbd_inv_txfm_add_8x8_sse4_1;
av1_highbd_jnt_convolve_2d = av1_highbd_jnt_convolve_2d_c;
if (flags & HAS_SSE4_1) av1_highbd_jnt_convolve_2d = av1_highbd_jnt_convolve_2d_sse4_1;
if (flags & HAS_AVX2) av1_highbd_jnt_convolve_2d = av1_highbd_jnt_convolve_2d_avx2;
av1_highbd_jnt_convolve_2d_copy = av1_highbd_jnt_convolve_2d_copy_c;
if (flags & HAS_SSE4_1) av1_highbd_jnt_convolve_2d_copy = av1_highbd_jnt_convolve_2d_copy_sse4_1;
if (flags & HAS_AVX2) av1_highbd_jnt_convolve_2d_copy = av1_highbd_jnt_convolve_2d_copy_avx2;
av1_highbd_jnt_convolve_x = av1_highbd_jnt_convolve_x_c;
if (flags & HAS_SSE4_1) av1_highbd_jnt_convolve_x = av1_highbd_jnt_convolve_x_sse4_1;
if (flags & HAS_AVX2) av1_highbd_jnt_convolve_x = av1_highbd_jnt_convolve_x_avx2;
av1_highbd_jnt_convolve_y = av1_highbd_jnt_convolve_y_c;
if (flags & HAS_SSE4_1) av1_highbd_jnt_convolve_y = av1_highbd_jnt_convolve_y_sse4_1;
if (flags & HAS_AVX2) av1_highbd_jnt_convolve_y = av1_highbd_jnt_convolve_y_avx2;
av1_highbd_warp_affine = av1_highbd_warp_affine_c;
if (flags & HAS_SSE4_1) av1_highbd_warp_affine = av1_highbd_warp_affine_sse4_1;
av1_highbd_wiener_convolve_add_src = av1_highbd_wiener_convolve_add_src_c;
if (flags & HAS_SSSE3) av1_highbd_wiener_convolve_add_src = av1_highbd_wiener_convolve_add_src_ssse3;
if (flags & HAS_AVX2) av1_highbd_wiener_convolve_add_src = av1_highbd_wiener_convolve_add_src_avx2;
av1_inv_txfm2d_add_4x4 = av1_inv_txfm2d_add_4x4_c;
if (flags & HAS_SSE4_1) av1_inv_txfm2d_add_4x4 = av1_inv_txfm2d_add_4x4_sse4_1;
av1_inv_txfm2d_add_8x8 = av1_inv_txfm2d_add_8x8_c;
if (flags & HAS_SSE4_1) av1_inv_txfm2d_add_8x8 = av1_inv_txfm2d_add_8x8_sse4_1;
av1_inv_txfm_add = av1_inv_txfm_add_c;
if (flags & HAS_SSSE3) av1_inv_txfm_add = av1_inv_txfm_add_ssse3;
if (flags & HAS_AVX2) av1_inv_txfm_add = av1_inv_txfm_add_avx2;
av1_jnt_convolve_2d = av1_jnt_convolve_2d_c;
if (flags & HAS_SSSE3) av1_jnt_convolve_2d = av1_jnt_convolve_2d_ssse3;
if (flags & HAS_AVX2) av1_jnt_convolve_2d = av1_jnt_convolve_2d_avx2;
av1_jnt_convolve_2d_copy = av1_jnt_convolve_2d_copy_c;
if (flags & HAS_SSE2) av1_jnt_convolve_2d_copy = av1_jnt_convolve_2d_copy_sse2;
if (flags & HAS_AVX2) av1_jnt_convolve_2d_copy = av1_jnt_convolve_2d_copy_avx2;
av1_jnt_convolve_x = av1_jnt_convolve_x_c;
if (flags & HAS_SSE2) av1_jnt_convolve_x = av1_jnt_convolve_x_sse2;
if (flags & HAS_AVX2) av1_jnt_convolve_x = av1_jnt_convolve_x_avx2;
av1_jnt_convolve_y = av1_jnt_convolve_y_c;
if (flags & HAS_SSE2) av1_jnt_convolve_y = av1_jnt_convolve_y_sse2;
if (flags & HAS_AVX2) av1_jnt_convolve_y = av1_jnt_convolve_y_avx2;
av1_selfguided_restoration = av1_selfguided_restoration_c;
if (flags & HAS_SSE4_1) av1_selfguided_restoration = av1_selfguided_restoration_sse4_1;
if (flags & HAS_AVX2) av1_selfguided_restoration = av1_selfguided_restoration_avx2;
av1_upsample_intra_edge = av1_upsample_intra_edge_c;
if (flags & HAS_SSE4_1) av1_upsample_intra_edge = av1_upsample_intra_edge_sse4_1;
av1_upsample_intra_edge_high = av1_upsample_intra_edge_high_c;
if (flags & HAS_SSE4_1) av1_upsample_intra_edge_high = av1_upsample_intra_edge_high_sse4_1;
av1_warp_affine = av1_warp_affine_c;
if (flags & HAS_SSE4_1) av1_warp_affine = av1_warp_affine_sse4_1;
av1_wiener_convolve_add_src = av1_wiener_convolve_add_src_c;
if (flags & HAS_SSE2) av1_wiener_convolve_add_src = av1_wiener_convolve_add_src_sse2;
if (flags & HAS_AVX2) av1_wiener_convolve_add_src = av1_wiener_convolve_add_src_avx2;
cdef_filter_block = cdef_filter_block_c;
if (flags & HAS_SSE2) cdef_filter_block = cdef_filter_block_sse2;
if (flags & HAS_SSSE3) cdef_filter_block = cdef_filter_block_ssse3;
if (flags & HAS_SSE4_1) cdef_filter_block = cdef_filter_block_sse4_1;
if (flags & HAS_AVX2) cdef_filter_block = cdef_filter_block_avx2;
cdef_find_dir = cdef_find_dir_c;
if (flags & HAS_SSE2) cdef_find_dir = cdef_find_dir_sse2;
if (flags & HAS_SSSE3) cdef_find_dir = cdef_find_dir_ssse3;
if (flags & HAS_SSE4_1) cdef_find_dir = cdef_find_dir_sse4_1;
if (flags & HAS_AVX2) cdef_find_dir = cdef_find_dir_avx2;
cfl_get_luma_subsampling_420_hbd = cfl_get_luma_subsampling_420_hbd_c;
if (flags & HAS_SSSE3) cfl_get_luma_subsampling_420_hbd = cfl_get_luma_subsampling_420_hbd_ssse3;
if (flags & HAS_AVX2) cfl_get_luma_subsampling_420_hbd = cfl_get_luma_subsampling_420_hbd_avx2;
cfl_get_luma_subsampling_420_lbd = cfl_get_luma_subsampling_420_lbd_c;
if (flags & HAS_SSSE3) cfl_get_luma_subsampling_420_lbd = cfl_get_luma_subsampling_420_lbd_ssse3;
if (flags & HAS_AVX2) cfl_get_luma_subsampling_420_lbd = cfl_get_luma_subsampling_420_lbd_avx2;
cfl_get_luma_subsampling_422_hbd = cfl_get_luma_subsampling_422_hbd_c;
if (flags & HAS_SSSE3) cfl_get_luma_subsampling_422_hbd = cfl_get_luma_subsampling_422_hbd_ssse3;
if (flags & HAS_AVX2) cfl_get_luma_subsampling_422_hbd = cfl_get_luma_subsampling_422_hbd_avx2;
cfl_get_luma_subsampling_422_lbd = cfl_get_luma_subsampling_422_lbd_c;
if (flags & HAS_SSSE3) cfl_get_luma_subsampling_422_lbd = cfl_get_luma_subsampling_422_lbd_ssse3;
if (flags & HAS_AVX2) cfl_get_luma_subsampling_422_lbd = cfl_get_luma_subsampling_422_lbd_avx2;
cfl_get_luma_subsampling_444_hbd = cfl_get_luma_subsampling_444_hbd_c;
if (flags & HAS_SSSE3) cfl_get_luma_subsampling_444_hbd = cfl_get_luma_subsampling_444_hbd_ssse3;
if (flags & HAS_AVX2) cfl_get_luma_subsampling_444_hbd = cfl_get_luma_subsampling_444_hbd_avx2;
cfl_get_luma_subsampling_444_lbd = cfl_get_luma_subsampling_444_lbd_c;
if (flags & HAS_SSSE3) cfl_get_luma_subsampling_444_lbd = cfl_get_luma_subsampling_444_lbd_ssse3;
if (flags & HAS_AVX2) cfl_get_luma_subsampling_444_lbd = cfl_get_luma_subsampling_444_lbd_avx2;
copy_rect8_16bit_to_16bit = copy_rect8_16bit_to_16bit_c;
if (flags & HAS_SSE2) copy_rect8_16bit_to_16bit = copy_rect8_16bit_to_16bit_sse2;
if (flags & HAS_SSSE3) copy_rect8_16bit_to_16bit = copy_rect8_16bit_to_16bit_ssse3;
if (flags & HAS_SSE4_1) copy_rect8_16bit_to_16bit = copy_rect8_16bit_to_16bit_sse4_1;
if (flags & HAS_AVX2) copy_rect8_16bit_to_16bit = copy_rect8_16bit_to_16bit_avx2;
copy_rect8_8bit_to_16bit = copy_rect8_8bit_to_16bit_c;
if (flags & HAS_SSE2) copy_rect8_8bit_to_16bit = copy_rect8_8bit_to_16bit_sse2;
if (flags & HAS_SSSE3) copy_rect8_8bit_to_16bit = copy_rect8_8bit_to_16bit_ssse3;
if (flags & HAS_SSE4_1) copy_rect8_8bit_to_16bit = copy_rect8_8bit_to_16bit_sse4_1;
if (flags & HAS_AVX2) copy_rect8_8bit_to_16bit = copy_rect8_8bit_to_16bit_avx2;
get_predict_hbd_fn = get_predict_hbd_fn_c;
if (flags & HAS_SSSE3) get_predict_hbd_fn = get_predict_hbd_fn_ssse3;
if (flags & HAS_AVX2) get_predict_hbd_fn = get_predict_hbd_fn_avx2;
get_predict_lbd_fn = get_predict_lbd_fn_c;
if (flags & HAS_SSSE3) get_predict_lbd_fn = get_predict_lbd_fn_ssse3;
if (flags & HAS_AVX2) get_predict_lbd_fn = get_predict_lbd_fn_avx2;
get_subtract_average_fn = get_subtract_average_fn_c;
if (flags & HAS_SSE2) get_subtract_average_fn = get_subtract_average_fn_sse2;
if (flags & HAS_AVX2) get_subtract_average_fn = get_subtract_average_fn_avx2;
#endif
}
#endif

View File

@ -35,6 +35,8 @@ if CONFIG['CPU_ARCH'] == 'x86_64':
elif CONFIG['CPU_ARCH'] == 'x86':
EXPORTS.aom += files['IA32_EXPORTS']
SOURCES += files['IA32_SOURCES']
if not CONFIG['THE_SSE1']:
SOURCES += files['IA32_SSE2_SOURCES']
USE_YASM = True
if CONFIG['OS_TARGET'] == 'WINNT':
if CONFIG['CC_TYPE'] == 'gcc':

View File

@ -247,6 +247,60 @@ files = {
'../../third_party/aom/aom_dsp/loopfilter.c',
'../../third_party/aom/aom_dsp/subtract.c',
'../../third_party/aom/aom_dsp/x86/aom_asm_stubs.c',
'../../third_party/aom/aom_mem/aom_mem.c',
'../../third_party/aom/aom_ports/emms.asm',
'../../third_party/aom/aom_ports/x86_abi_support.asm',
'../../third_party/aom/aom_scale/aom_scale_rtcd.c',
'../../third_party/aom/aom_scale/generic/aom_scale.c',
'../../third_party/aom/aom_scale/generic/gen_scalers.c',
'../../third_party/aom/aom_scale/generic/yv12config.c',
'../../third_party/aom/aom_scale/generic/yv12extend.c',
'../../third_party/aom/aom_util/aom_thread.c',
'../../third_party/aom/aom_util/debug_util.c',
'../../third_party/aom/av1/av1_dx_iface.c',
'../../third_party/aom/av1/common/alloccommon.c',
'../../third_party/aom/av1/common/av1_inv_txfm1d.c',
'../../third_party/aom/av1/common/av1_inv_txfm2d.c',
'../../third_party/aom/av1/common/av1_loopfilter.c',
'../../third_party/aom/av1/common/av1_rtcd.c',
'../../third_party/aom/av1/common/av1_txfm.c',
'../../third_party/aom/av1/common/blockd.c',
'../../third_party/aom/av1/common/cdef.c',
'../../third_party/aom/av1/common/cdef_block.c',
'../../third_party/aom/av1/common/cfl.c',
'../../third_party/aom/av1/common/convolve.c',
'../../third_party/aom/av1/common/debugmodes.c',
'../../third_party/aom/av1/common/entropy.c',
'../../third_party/aom/av1/common/entropymode.c',
'../../third_party/aom/av1/common/entropymv.c',
'../../third_party/aom/av1/common/frame_buffers.c',
'../../third_party/aom/av1/common/idct.c',
'../../third_party/aom/av1/common/mvref_common.c',
'../../third_party/aom/av1/common/obu_util.c',
'../../third_party/aom/av1/common/odintrin.c',
'../../third_party/aom/av1/common/pred_common.c',
'../../third_party/aom/av1/common/quant_common.c',
'../../third_party/aom/av1/common/reconinter.c',
'../../third_party/aom/av1/common/reconintra.c',
'../../third_party/aom/av1/common/resize.c',
'../../third_party/aom/av1/common/restoration.c',
'../../third_party/aom/av1/common/scale.c',
'../../third_party/aom/av1/common/scan.c',
'../../third_party/aom/av1/common/seg_common.c',
'../../third_party/aom/av1/common/thread_common.c',
'../../third_party/aom/av1/common/tile_common.c',
'../../third_party/aom/av1/common/timing.c',
'../../third_party/aom/av1/common/txb_common.c',
'../../third_party/aom/av1/common/warped_motion.c',
'../../third_party/aom/av1/decoder/decodeframe.c',
'../../third_party/aom/av1/decoder/decodemv.c',
'../../third_party/aom/av1/decoder/decoder.c',
'../../third_party/aom/av1/decoder/decodetxb.c',
'../../third_party/aom/av1/decoder/detokenize.c',
'../../third_party/aom/av1/decoder/dthread.c',
'../../third_party/aom/av1/decoder/obu.c',
],
'IA32_SSE2_SOURCES': [
'../../third_party/aom/aom_dsp/x86/aom_convolve_copy_sse2.asm',
'../../third_party/aom/aom_dsp/x86/aom_high_subpixel_8t_sse2.asm',
'../../third_party/aom/aom_dsp/x86/aom_high_subpixel_bilinear_sse2.asm',
@ -274,55 +328,10 @@ files = {
'../../third_party/aom/aom_dsp/x86/intrapred_ssse3.c',
'../../third_party/aom/aom_dsp/x86/inv_wht_sse2.asm',
'../../third_party/aom/aom_dsp/x86/loopfilter_sse2.c',
'../../third_party/aom/aom_mem/aom_mem.c',
'../../third_party/aom/aom_ports/emms.asm',
'../../third_party/aom/aom_ports/x86_abi_support.asm',
'../../third_party/aom/aom_scale/aom_scale_rtcd.c',
'../../third_party/aom/aom_scale/generic/aom_scale.c',
'../../third_party/aom/aom_scale/generic/gen_scalers.c',
'../../third_party/aom/aom_scale/generic/yv12config.c',
'../../third_party/aom/aom_scale/generic/yv12extend.c',
'../../third_party/aom/aom_util/aom_thread.c',
'../../third_party/aom/aom_util/debug_util.c',
'../../third_party/aom/av1/av1_dx_iface.c',
'../../third_party/aom/av1/common/alloccommon.c',
'../../third_party/aom/av1/common/av1_inv_txfm1d.c',
'../../third_party/aom/av1/common/av1_inv_txfm2d.c',
'../../third_party/aom/av1/common/av1_loopfilter.c',
'../../third_party/aom/av1/common/av1_rtcd.c',
'../../third_party/aom/av1/common/av1_txfm.c',
'../../third_party/aom/av1/common/blockd.c',
'../../third_party/aom/av1/common/cdef.c',
'../../third_party/aom/av1/common/cdef_block.c',
'../../third_party/aom/av1/common/cdef_block_avx2.c',
'../../third_party/aom/av1/common/cdef_block_sse2.c',
'../../third_party/aom/av1/common/cdef_block_sse4.c',
'../../third_party/aom/av1/common/cdef_block_ssse3.c',
'../../third_party/aom/av1/common/cfl.c',
'../../third_party/aom/av1/common/convolve.c',
'../../third_party/aom/av1/common/debugmodes.c',
'../../third_party/aom/av1/common/entropy.c',
'../../third_party/aom/av1/common/entropymode.c',
'../../third_party/aom/av1/common/entropymv.c',
'../../third_party/aom/av1/common/frame_buffers.c',
'../../third_party/aom/av1/common/idct.c',
'../../third_party/aom/av1/common/mvref_common.c',
'../../third_party/aom/av1/common/obu_util.c',
'../../third_party/aom/av1/common/odintrin.c',
'../../third_party/aom/av1/common/pred_common.c',
'../../third_party/aom/av1/common/quant_common.c',
'../../third_party/aom/av1/common/reconinter.c',
'../../third_party/aom/av1/common/reconintra.c',
'../../third_party/aom/av1/common/resize.c',
'../../third_party/aom/av1/common/restoration.c',
'../../third_party/aom/av1/common/scale.c',
'../../third_party/aom/av1/common/scan.c',
'../../third_party/aom/av1/common/seg_common.c',
'../../third_party/aom/av1/common/thread_common.c',
'../../third_party/aom/av1/common/tile_common.c',
'../../third_party/aom/av1/common/timing.c',
'../../third_party/aom/av1/common/txb_common.c',
'../../third_party/aom/av1/common/warped_motion.c',
'../../third_party/aom/av1/common/x86/av1_convolve_horiz_rs_sse4.c',
'../../third_party/aom/av1/common/x86/av1_convolve_scale_sse4.c',
'../../third_party/aom/av1/common/x86/av1_highbd_convolve_sse4.c',
@ -360,13 +369,6 @@ files = {
'../../third_party/aom/av1/common/x86/warp_plane_sse4.c',
'../../third_party/aom/av1/common/x86/wiener_convolve_avx2.c',
'../../third_party/aom/av1/common/x86/wiener_convolve_sse2.c',
'../../third_party/aom/av1/decoder/decodeframe.c',
'../../third_party/aom/av1/decoder/decodemv.c',
'../../third_party/aom/av1/decoder/decoder.c',
'../../third_party/aom/av1/decoder/decodetxb.c',
'../../third_party/aom/av1/decoder/detokenize.c',
'../../third_party/aom/av1/decoder/dthread.c',
'../../third_party/aom/av1/decoder/obu.c',
],
'X64_EXPORTS': [
'../../third_party/aom/aom/aom.h',

View File

@ -91,7 +91,7 @@ opus_val32 celt_inner_prod_sse2(
int N);
#endif
#if defined(OPUS_X86_MAY_HAVE_SSE2) && !defined(FIXED_POINT)
#if defined(OPUS_X86_MAY_HAVE_SSE) && !defined(FIXED_POINT)
opus_val32 celt_inner_prod_sse(
const opus_val16 *x,
const opus_val16 *y,

View File

@ -84,13 +84,14 @@ else:
if CONFIG['CPU_ARCH'] in ('x86', 'x86_64'):
DEFINES['OPUS_HAVE_RTCD'] = True
DEFINES['OPUS_X86_MAY_HAVE_SSE'] = True
DEFINES['OPUS_X86_MAY_HAVE_SSE2'] = True
DEFINES['OPUS_X86_MAY_HAVE_SSE4_1'] = True
DEFINES['OPUS_X86_MAY_HAVE_AVX'] = True
SOURCES += celt_sources_sse
SOURCES += celt_sources_sse2
SOURCES += celt_sources_sse4_1
SOURCES += silk_sources_sse4_1
if not CONFIG['THE_SSE1']:
DEFINES['OPUS_X86_MAY_HAVE_SSE2'] = True
DEFINES['OPUS_X86_MAY_HAVE_SSE4_1'] = True
DEFINES['OPUS_X86_MAY_HAVE_AVX'] = True
SOURCES += celt_sources_sse2
SOURCES += celt_sources_sse4_1
SOURCES += silk_sources_sse4_1
if not CONFIG['MOZ_SAMPLE_TYPE_FLOAT32']:
SOURCES += silk_sources_fixed_sse4_1
for f in SOURCES:

View File

@ -41,7 +41,7 @@ if CONFIG['CPU_ARCH'] == 'arm' or CONFIG['CPU_ARCH'] == 'aarch64':
'arm/filter_neon.S'
]
if CONFIG['INTEL_ARCHITECTURE']:
if CONFIG['INTEL_ARCHITECTURE'] and not CONFIG['THE_SSE1']:
DEFINES['MOZ_PNG_USE_INTEL_SSE'] = True
UNIFIED_SOURCES += [
'intel/filter_sse2_intrinsics.c',

View File

@ -6,4 +6,3 @@ with Files("**"):
BUG_COMPONENT = ("Core", "Audio/Video: Playback")
DIRS += ['src']

View File

@ -155,7 +155,7 @@ namespace soundtouch
// data type for sample accumulation: Use double to utilize full precision.
typedef double LONG_SAMPLETYPE;
#ifdef SOUNDTOUCH_ALLOW_X86_OPTIMIZATIONS
#if defined(SOUNDTOUCH_ALLOW_X86_OPTIMIZATIONS) && !defined(THE_SSE1)
// Allow SSE optimizations
#define SOUNDTOUCH_ALLOW_SSE 1
#endif

View File

@ -25,7 +25,7 @@ UNIFIED_SOURCES += [
]
if CONFIG['INTEL_ARCHITECTURE']:
if CONFIG['MOZ_SAMPLE_TYPE_FLOAT32']:
if CONFIG['MOZ_SAMPLE_TYPE_FLOAT32'] and not CONFIG['THE_SSE1']:
SOURCES += ['sse_optimized.cpp']
SOURCES['sse_optimized.cpp'].flags += CONFIG['SSE2_FLAGS']
else:

View File

@ -29,7 +29,7 @@ else:
DEFINES['FLOATING_POINT'] = True
# Only use SSE code when using floating point samples, and on x86
if CONFIG['INTEL_ARCHITECTURE'] and not CONFIG['MOZ_SAMPLE_TYPE_S16']:
if CONFIG['INTEL_ARCHITECTURE'] and not CONFIG['MOZ_SAMPLE_TYPE_S16'] and not CONFIG['THE_SSE1']:
DEFINES['_USE_SSE'] = True
DEFINES['_USE_SSE2'] = True
SOURCES += [

View File

@ -28,8 +28,10 @@ extern "C" {
#endif
void vp8_bilinear_predict16x16_c(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
#ifndef THE_SSE1
void vp8_bilinear_predict16x16_sse2(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
void vp8_bilinear_predict16x16_ssse3(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
#endif
RTCD_EXTERN void (*vp8_bilinear_predict16x16)(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
void vp8_bilinear_predict4x4_c(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
@ -41,8 +43,10 @@ void vp8_bilinear_predict8x4_mmx(unsigned char *src, int src_pitch, int xofst, i
RTCD_EXTERN void (*vp8_bilinear_predict8x4)(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
void vp8_bilinear_predict8x8_c(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
#ifndef THE_SSE1
void vp8_bilinear_predict8x8_sse2(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
void vp8_bilinear_predict8x8_ssse3(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
#endif
RTCD_EXTERN void (*vp8_bilinear_predict8x8)(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
void vp8_blend_b_c(unsigned char *y, unsigned char *u, unsigned char *v, int y1, int u1, int v1, int alpha, int stride);
@ -55,16 +59,22 @@ void vp8_blend_mb_outer_c(unsigned char *y, unsigned char *u, unsigned char *v,
#define vp8_blend_mb_outer vp8_blend_mb_outer_c
int vp8_block_error_c(short *coeff, short *dqcoeff);
#ifndef THE_SSE1
int vp8_block_error_sse2(short *coeff, short *dqcoeff);
#endif
RTCD_EXTERN int (*vp8_block_error)(short *coeff, short *dqcoeff);
void vp8_copy32xn_c(const unsigned char *src_ptr, int source_stride, unsigned char *dst_ptr, int dst_stride, int n);
#ifndef THE_SSE1
void vp8_copy32xn_sse2(const unsigned char *src_ptr, int source_stride, unsigned char *dst_ptr, int dst_stride, int n);
void vp8_copy32xn_sse3(const unsigned char *src_ptr, int source_stride, unsigned char *dst_ptr, int dst_stride, int n);
#endif
RTCD_EXTERN void (*vp8_copy32xn)(const unsigned char *src_ptr, int source_stride, unsigned char *dst_ptr, int dst_stride, int n);
void vp8_copy_mem16x16_c(unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch);
#ifndef THE_SSE1
void vp8_copy_mem16x16_sse2(unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch);
#endif
RTCD_EXTERN void (*vp8_copy_mem16x16)(unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch);
void vp8_copy_mem8x4_c(unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch);
@ -80,11 +90,15 @@ void vp8_dc_only_idct_add_mmx(short input, unsigned char *pred, int pred_stride,
RTCD_EXTERN void (*vp8_dc_only_idct_add)(short input, unsigned char *pred, int pred_stride, unsigned char *dst, int dst_stride);
int vp8_denoiser_filter_c(unsigned char *mc_running_avg_y, int mc_avg_y_stride, unsigned char *running_avg_y, int avg_y_stride, unsigned char *sig, int sig_stride, unsigned int motion_magnitude, int increase_denoising);
#ifndef THE_SSE1
int vp8_denoiser_filter_sse2(unsigned char *mc_running_avg_y, int mc_avg_y_stride, unsigned char *running_avg_y, int avg_y_stride, unsigned char *sig, int sig_stride, unsigned int motion_magnitude, int increase_denoising);
#endif
RTCD_EXTERN int (*vp8_denoiser_filter)(unsigned char *mc_running_avg_y, int mc_avg_y_stride, unsigned char *running_avg_y, int avg_y_stride, unsigned char *sig, int sig_stride, unsigned int motion_magnitude, int increase_denoising);
int vp8_denoiser_filter_uv_c(unsigned char *mc_running_avg, int mc_avg_stride, unsigned char *running_avg, int avg_stride, unsigned char *sig, int sig_stride, unsigned int motion_magnitude, int increase_denoising);
#ifndef THE_SSE1
int vp8_denoiser_filter_uv_sse2(unsigned char *mc_running_avg, int mc_avg_stride, unsigned char *running_avg, int avg_stride, unsigned char *sig, int sig_stride, unsigned int motion_magnitude, int increase_denoising);
#endif
RTCD_EXTERN int (*vp8_denoiser_filter_uv)(unsigned char *mc_running_avg, int mc_avg_stride, unsigned char *running_avg, int avg_stride, unsigned char *sig, int sig_stride, unsigned int motion_magnitude, int increase_denoising);
void vp8_dequant_idct_add_c(short *input, short *dq, unsigned char *output, int stride);
@ -92,11 +106,15 @@ void vp8_dequant_idct_add_mmx(short *input, short *dq, unsigned char *output, in
RTCD_EXTERN void (*vp8_dequant_idct_add)(short *input, short *dq, unsigned char *output, int stride);
void vp8_dequant_idct_add_uv_block_c(short *q, short *dq, unsigned char *dst_u, unsigned char *dst_v, int stride, char *eobs);
#ifndef THE_SSE1
void vp8_dequant_idct_add_uv_block_sse2(short *q, short *dq, unsigned char *dst_u, unsigned char *dst_v, int stride, char *eobs);
#endif
RTCD_EXTERN void (*vp8_dequant_idct_add_uv_block)(short *q, short *dq, unsigned char *dst_u, unsigned char *dst_v, int stride, char *eobs);
void vp8_dequant_idct_add_y_block_c(short *q, short *dq, unsigned char *dst, int stride, char *eobs);
#ifndef THE_SSE1
void vp8_dequant_idct_add_y_block_sse2(short *q, short *dq, unsigned char *dst, int stride, char *eobs);
#endif
RTCD_EXTERN void (*vp8_dequant_idct_add_y_block)(short *q, short *dq, unsigned char *dst, int stride, char *eobs);
void vp8_dequantize_b_c(struct blockd*, short *dqc);
@ -108,19 +126,25 @@ int vp8_diamond_search_sadx4(struct macroblock *x, struct block *b, struct block
RTCD_EXTERN int (*vp8_diamond_search_sad)(struct macroblock *x, struct block *b, struct blockd *d, union int_mv *ref_mv, union int_mv *best_mv, int search_param, int sad_per_bit, int *num00, struct variance_vtable *fn_ptr, int *mvcost[2], union int_mv *center_mv);
void vp8_fast_quantize_b_c(struct block *, struct blockd *);
#ifndef THE_SSE1
void vp8_fast_quantize_b_sse2(struct block *, struct blockd *);
void vp8_fast_quantize_b_ssse3(struct block *, struct blockd *);
#endif
RTCD_EXTERN void (*vp8_fast_quantize_b)(struct block *, struct blockd *);
void vp8_filter_by_weight16x16_c(unsigned char *src, int src_stride, unsigned char *dst, int dst_stride, int src_weight);
#ifndef THE_SSE1
void vp8_filter_by_weight16x16_sse2(unsigned char *src, int src_stride, unsigned char *dst, int dst_stride, int src_weight);
#endif
RTCD_EXTERN void (*vp8_filter_by_weight16x16)(unsigned char *src, int src_stride, unsigned char *dst, int dst_stride, int src_weight);
void vp8_filter_by_weight4x4_c(unsigned char *src, int src_stride, unsigned char *dst, int dst_stride, int src_weight);
#define vp8_filter_by_weight4x4 vp8_filter_by_weight4x4_c
void vp8_filter_by_weight8x8_c(unsigned char *src, int src_stride, unsigned char *dst, int dst_stride, int src_weight);
#ifndef THE_SSE1
void vp8_filter_by_weight8x8_sse2(unsigned char *src, int src_stride, unsigned char *dst, int dst_stride, int src_weight);
#endif
RTCD_EXTERN void (*vp8_filter_by_weight8x8)(unsigned char *src, int src_stride, unsigned char *dst, int dst_stride, int src_weight);
int vp8_full_search_sad_c(struct macroblock *x, struct block *b, struct blockd *d, union int_mv *ref_mv, int sad_per_bit, int distance, struct variance_vtable *fn_ptr, int *mvcost[2], union int_mv *center_mv);
@ -129,43 +153,63 @@ int vp8_full_search_sadx8(struct macroblock *x, struct block *b, struct blockd *
RTCD_EXTERN int (*vp8_full_search_sad)(struct macroblock *x, struct block *b, struct blockd *d, union int_mv *ref_mv, int sad_per_bit, int distance, struct variance_vtable *fn_ptr, int *mvcost[2], union int_mv *center_mv);
void vp8_loop_filter_bh_c(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi);
#ifndef THE_SSE1
void vp8_loop_filter_bh_sse2(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi);
#endif
RTCD_EXTERN void (*vp8_loop_filter_bh)(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi);
void vp8_loop_filter_bv_c(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi);
#ifndef THE_SSE1
void vp8_loop_filter_bv_sse2(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi);
#endif
RTCD_EXTERN void (*vp8_loop_filter_bv)(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi);
void vp8_loop_filter_mbh_c(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi);
#ifndef THE_SSE1
void vp8_loop_filter_mbh_sse2(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi);
#endif
RTCD_EXTERN void (*vp8_loop_filter_mbh)(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi);
void vp8_loop_filter_mbv_c(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi);
#ifndef THE_SSE1
void vp8_loop_filter_mbv_sse2(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi);
#endif
RTCD_EXTERN void (*vp8_loop_filter_mbv)(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi);
void vp8_loop_filter_bhs_c(unsigned char *y, int ystride, const unsigned char *blimit);
#ifndef THE_SSE1
void vp8_loop_filter_bhs_sse2(unsigned char *y, int ystride, const unsigned char *blimit);
#endif
RTCD_EXTERN void (*vp8_loop_filter_simple_bh)(unsigned char *y, int ystride, const unsigned char *blimit);
void vp8_loop_filter_bvs_c(unsigned char *y, int ystride, const unsigned char *blimit);
#ifndef THE_SSE1
void vp8_loop_filter_bvs_sse2(unsigned char *y, int ystride, const unsigned char *blimit);
#endif
RTCD_EXTERN void (*vp8_loop_filter_simple_bv)(unsigned char *y, int ystride, const unsigned char *blimit);
void vp8_loop_filter_simple_horizontal_edge_c(unsigned char *y, int ystride, const unsigned char *blimit);
#ifndef THE_SSE1
void vp8_loop_filter_simple_horizontal_edge_sse2(unsigned char *y, int ystride, const unsigned char *blimit);
#endif
RTCD_EXTERN void (*vp8_loop_filter_simple_mbh)(unsigned char *y, int ystride, const unsigned char *blimit);
void vp8_loop_filter_simple_vertical_edge_c(unsigned char *y, int ystride, const unsigned char *blimit);
#ifndef THE_SSE1
void vp8_loop_filter_simple_vertical_edge_sse2(unsigned char *y, int ystride, const unsigned char *blimit);
#endif
RTCD_EXTERN void (*vp8_loop_filter_simple_mbv)(unsigned char *y, int ystride, const unsigned char *blimit);
int vp8_mbblock_error_c(struct macroblock *mb, int dc);
#ifndef THE_SSE1
int vp8_mbblock_error_sse2(struct macroblock *mb, int dc);
#endif
RTCD_EXTERN int (*vp8_mbblock_error)(struct macroblock *mb, int dc);
int vp8_mbuverror_c(struct macroblock *mb);
#ifndef THE_SSE1
int vp8_mbuverror_sse2(struct macroblock *mb);
#endif
RTCD_EXTERN int (*vp8_mbuverror)(struct macroblock *mb);
int vp8_refining_search_sad_c(struct macroblock *x, struct block *b, struct blockd *d, union int_mv *ref_mv, int sad_per_bit, int distance, struct variance_vtable *fn_ptr, int *mvcost[2], union int_mv *center_mv);
@ -173,16 +217,22 @@ int vp8_refining_search_sadx4(struct macroblock *x, struct block *b, struct bloc
RTCD_EXTERN int (*vp8_refining_search_sad)(struct macroblock *x, struct block *b, struct blockd *d, union int_mv *ref_mv, int sad_per_bit, int distance, struct variance_vtable *fn_ptr, int *mvcost[2], union int_mv *center_mv);
void vp8_regular_quantize_b_c(struct block *, struct blockd *);
#ifndef THE_SSE1
void vp8_regular_quantize_b_sse2(struct block *, struct blockd *);
void vp8_regular_quantize_b_sse4_1(struct block *, struct blockd *);
#endif
RTCD_EXTERN void (*vp8_regular_quantize_b)(struct block *, struct blockd *);
void vp8_short_fdct4x4_c(short *input, short *output, int pitch);
#ifndef THE_SSE1
void vp8_short_fdct4x4_sse2(short *input, short *output, int pitch);
#endif
RTCD_EXTERN void (*vp8_short_fdct4x4)(short *input, short *output, int pitch);
void vp8_short_fdct8x4_c(short *input, short *output, int pitch);
#ifndef THE_SSE1
void vp8_short_fdct8x4_sse2(short *input, short *output, int pitch);
#endif
RTCD_EXTERN void (*vp8_short_fdct8x4)(short *input, short *output, int pitch);
void vp8_short_idct4x4llm_c(short *input, unsigned char *pred, int pitch, unsigned char *dst, int dst_stride);
@ -190,38 +240,52 @@ void vp8_short_idct4x4llm_mmx(short *input, unsigned char *pred, int pitch, unsi
RTCD_EXTERN void (*vp8_short_idct4x4llm)(short *input, unsigned char *pred, int pitch, unsigned char *dst, int dst_stride);
void vp8_short_inv_walsh4x4_c(short *input, short *output);
#ifndef THE_SSE1
void vp8_short_inv_walsh4x4_sse2(short *input, short *output);
#endif
RTCD_EXTERN void (*vp8_short_inv_walsh4x4)(short *input, short *output);
void vp8_short_inv_walsh4x4_1_c(short *input, short *output);
#define vp8_short_inv_walsh4x4_1 vp8_short_inv_walsh4x4_1_c
void vp8_short_walsh4x4_c(short *input, short *output, int pitch);
#ifndef THE_SSE1
void vp8_short_walsh4x4_sse2(short *input, short *output, int pitch);
#endif
RTCD_EXTERN void (*vp8_short_walsh4x4)(short *input, short *output, int pitch);
void vp8_sixtap_predict16x16_c(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
#ifndef THE_SSE1
void vp8_sixtap_predict16x16_sse2(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
void vp8_sixtap_predict16x16_ssse3(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
#endif
RTCD_EXTERN void (*vp8_sixtap_predict16x16)(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
void vp8_sixtap_predict4x4_c(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
void vp8_sixtap_predict4x4_mmx(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
#ifndef THE_SSE1
void vp8_sixtap_predict4x4_ssse3(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
#endif
RTCD_EXTERN void (*vp8_sixtap_predict4x4)(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
void vp8_sixtap_predict8x4_c(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
#ifndef THE_SSE1
void vp8_sixtap_predict8x4_sse2(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
void vp8_sixtap_predict8x4_ssse3(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
#endif
RTCD_EXTERN void (*vp8_sixtap_predict8x4)(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
void vp8_sixtap_predict8x8_c(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
#ifndef THE_SSE1
void vp8_sixtap_predict8x8_sse2(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
void vp8_sixtap_predict8x8_ssse3(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
#endif
RTCD_EXTERN void (*vp8_sixtap_predict8x8)(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
void vp8_temporal_filter_apply_c(unsigned char *frame1, unsigned int stride, unsigned char *frame2, unsigned int block_size, int strength, int filter_weight, unsigned int *accumulator, unsigned short *count);
#ifndef THE_SSE1
void vp8_temporal_filter_apply_sse2(unsigned char *frame1, unsigned int stride, unsigned char *frame2, unsigned int block_size, int strength, int filter_weight, unsigned int *accumulator, unsigned short *count);
#endif
RTCD_EXTERN void (*vp8_temporal_filter_apply)(unsigned char *frame1, unsigned int stride, unsigned char *frame2, unsigned int block_size, int strength, int filter_weight, unsigned int *accumulator, unsigned short *count);
void vp8_rtcd(void);
@ -235,22 +299,14 @@ static void setup_rtcd_internal(void)
(void)flags;
vp8_bilinear_predict16x16 = vp8_bilinear_predict16x16_c;
if (flags & HAS_SSE2) vp8_bilinear_predict16x16 = vp8_bilinear_predict16x16_sse2;
if (flags & HAS_SSSE3) vp8_bilinear_predict16x16 = vp8_bilinear_predict16x16_ssse3;
vp8_bilinear_predict4x4 = vp8_bilinear_predict4x4_c;
if (flags & HAS_MMX) vp8_bilinear_predict4x4 = vp8_bilinear_predict4x4_mmx;
vp8_bilinear_predict8x4 = vp8_bilinear_predict8x4_c;
if (flags & HAS_MMX) vp8_bilinear_predict8x4 = vp8_bilinear_predict8x4_mmx;
vp8_bilinear_predict8x8 = vp8_bilinear_predict8x8_c;
if (flags & HAS_SSE2) vp8_bilinear_predict8x8 = vp8_bilinear_predict8x8_sse2;
if (flags & HAS_SSSE3) vp8_bilinear_predict8x8 = vp8_bilinear_predict8x8_ssse3;
vp8_block_error = vp8_block_error_c;
if (flags & HAS_SSE2) vp8_block_error = vp8_block_error_sse2;
vp8_copy32xn = vp8_copy32xn_c;
if (flags & HAS_SSE2) vp8_copy32xn = vp8_copy32xn_sse2;
if (flags & HAS_SSE3) vp8_copy32xn = vp8_copy32xn_sse3;
vp8_copy_mem16x16 = vp8_copy_mem16x16_c;
if (flags & HAS_SSE2) vp8_copy_mem16x16 = vp8_copy_mem16x16_sse2;
vp8_copy_mem8x4 = vp8_copy_mem8x4_c;
if (flags & HAS_MMX) vp8_copy_mem8x4 = vp8_copy_mem8x4_mmx;
vp8_copy_mem8x8 = vp8_copy_mem8x8_c;
@ -258,78 +314,88 @@ static void setup_rtcd_internal(void)
vp8_dc_only_idct_add = vp8_dc_only_idct_add_c;
if (flags & HAS_MMX) vp8_dc_only_idct_add = vp8_dc_only_idct_add_mmx;
vp8_denoiser_filter = vp8_denoiser_filter_c;
if (flags & HAS_SSE2) vp8_denoiser_filter = vp8_denoiser_filter_sse2;
vp8_denoiser_filter_uv = vp8_denoiser_filter_uv_c;
if (flags & HAS_SSE2) vp8_denoiser_filter_uv = vp8_denoiser_filter_uv_sse2;
vp8_dequant_idct_add = vp8_dequant_idct_add_c;
if (flags & HAS_MMX) vp8_dequant_idct_add = vp8_dequant_idct_add_mmx;
vp8_dequant_idct_add_uv_block = vp8_dequant_idct_add_uv_block_c;
if (flags & HAS_SSE2) vp8_dequant_idct_add_uv_block = vp8_dequant_idct_add_uv_block_sse2;
vp8_dequant_idct_add_y_block = vp8_dequant_idct_add_y_block_c;
if (flags & HAS_SSE2) vp8_dequant_idct_add_y_block = vp8_dequant_idct_add_y_block_sse2;
vp8_dequantize_b = vp8_dequantize_b_c;
if (flags & HAS_MMX) vp8_dequantize_b = vp8_dequantize_b_mmx;
vp8_diamond_search_sad = vp8_diamond_search_sad_c;
if (flags & HAS_SSE2) vp8_diamond_search_sad = vp8_diamond_search_sadx4;
vp8_fast_quantize_b = vp8_fast_quantize_b_c;
if (flags & HAS_SSE2) vp8_fast_quantize_b = vp8_fast_quantize_b_sse2;
if (flags & HAS_SSSE3) vp8_fast_quantize_b = vp8_fast_quantize_b_ssse3;
vp8_filter_by_weight16x16 = vp8_filter_by_weight16x16_c;
if (flags & HAS_SSE2) vp8_filter_by_weight16x16 = vp8_filter_by_weight16x16_sse2;
vp8_filter_by_weight8x8 = vp8_filter_by_weight8x8_c;
if (flags & HAS_SSE2) vp8_filter_by_weight8x8 = vp8_filter_by_weight8x8_sse2;
vp8_full_search_sad = vp8_full_search_sad_c;
if (flags & HAS_SSE3) vp8_full_search_sad = vp8_full_search_sadx3;
if (flags & HAS_SSE4_1) vp8_full_search_sad = vp8_full_search_sadx8;
vp8_loop_filter_bh = vp8_loop_filter_bh_c;
if (flags & HAS_SSE2) vp8_loop_filter_bh = vp8_loop_filter_bh_sse2;
vp8_loop_filter_bv = vp8_loop_filter_bv_c;
if (flags & HAS_SSE2) vp8_loop_filter_bv = vp8_loop_filter_bv_sse2;
vp8_loop_filter_mbh = vp8_loop_filter_mbh_c;
if (flags & HAS_SSE2) vp8_loop_filter_mbh = vp8_loop_filter_mbh_sse2;
vp8_loop_filter_mbv = vp8_loop_filter_mbv_c;
if (flags & HAS_SSE2) vp8_loop_filter_mbv = vp8_loop_filter_mbv_sse2;
vp8_loop_filter_simple_bh = vp8_loop_filter_bhs_c;
if (flags & HAS_SSE2) vp8_loop_filter_simple_bh = vp8_loop_filter_bhs_sse2;
vp8_loop_filter_simple_bv = vp8_loop_filter_bvs_c;
if (flags & HAS_SSE2) vp8_loop_filter_simple_bv = vp8_loop_filter_bvs_sse2;
vp8_loop_filter_simple_mbh = vp8_loop_filter_simple_horizontal_edge_c;
if (flags & HAS_SSE2) vp8_loop_filter_simple_mbh = vp8_loop_filter_simple_horizontal_edge_sse2;
vp8_loop_filter_simple_mbv = vp8_loop_filter_simple_vertical_edge_c;
if (flags & HAS_SSE2) vp8_loop_filter_simple_mbv = vp8_loop_filter_simple_vertical_edge_sse2;
vp8_mbblock_error = vp8_mbblock_error_c;
if (flags & HAS_SSE2) vp8_mbblock_error = vp8_mbblock_error_sse2;
vp8_mbuverror = vp8_mbuverror_c;
if (flags & HAS_SSE2) vp8_mbuverror = vp8_mbuverror_sse2;
vp8_refining_search_sad = vp8_refining_search_sad_c;
if (flags & HAS_SSE2) vp8_refining_search_sad = vp8_refining_search_sadx4;
vp8_regular_quantize_b = vp8_regular_quantize_b_c;
if (flags & HAS_SSE2) vp8_regular_quantize_b = vp8_regular_quantize_b_sse2;
if (flags & HAS_SSE4_1) vp8_regular_quantize_b = vp8_regular_quantize_b_sse4_1;
vp8_short_fdct4x4 = vp8_short_fdct4x4_c;
if (flags & HAS_SSE2) vp8_short_fdct4x4 = vp8_short_fdct4x4_sse2;
vp8_short_fdct8x4 = vp8_short_fdct8x4_c;
if (flags & HAS_SSE2) vp8_short_fdct8x4 = vp8_short_fdct8x4_sse2;
vp8_short_idct4x4llm = vp8_short_idct4x4llm_c;
if (flags & HAS_MMX) vp8_short_idct4x4llm = vp8_short_idct4x4llm_mmx;
vp8_short_inv_walsh4x4 = vp8_short_inv_walsh4x4_c;
if (flags & HAS_SSE2) vp8_short_inv_walsh4x4 = vp8_short_inv_walsh4x4_sse2;
vp8_short_walsh4x4 = vp8_short_walsh4x4_c;
if (flags & HAS_SSE2) vp8_short_walsh4x4 = vp8_short_walsh4x4_sse2;
vp8_sixtap_predict16x16 = vp8_sixtap_predict16x16_c;
if (flags & HAS_SSE2) vp8_sixtap_predict16x16 = vp8_sixtap_predict16x16_sse2;
if (flags & HAS_SSSE3) vp8_sixtap_predict16x16 = vp8_sixtap_predict16x16_ssse3;
vp8_sixtap_predict4x4 = vp8_sixtap_predict4x4_c;
if (flags & HAS_MMX) vp8_sixtap_predict4x4 = vp8_sixtap_predict4x4_mmx;
if (flags & HAS_SSSE3) vp8_sixtap_predict4x4 = vp8_sixtap_predict4x4_ssse3;
vp8_sixtap_predict8x4 = vp8_sixtap_predict8x4_c;
vp8_sixtap_predict8x8 = vp8_sixtap_predict8x8_c;
vp8_temporal_filter_apply = vp8_temporal_filter_apply_c;
#ifndef THE_SSE1
if (flags & HAS_SSE2) vp8_bilinear_predict16x16 = vp8_bilinear_predict16x16_sse2;
if (flags & HAS_SSSE3) vp8_bilinear_predict16x16 = vp8_bilinear_predict16x16_ssse3;
if (flags & HAS_SSE2) vp8_bilinear_predict8x8 = vp8_bilinear_predict8x8_sse2;
if (flags & HAS_SSSE3) vp8_bilinear_predict8x8 = vp8_bilinear_predict8x8_ssse3;
if (flags & HAS_SSE2) vp8_block_error = vp8_block_error_sse2;
if (flags & HAS_SSE2) vp8_copy32xn = vp8_copy32xn_sse2;
if (flags & HAS_SSE3) vp8_copy32xn = vp8_copy32xn_sse3;
if (flags & HAS_SSE2) vp8_copy_mem16x16 = vp8_copy_mem16x16_sse2;
if (flags & HAS_SSE2) vp8_denoiser_filter = vp8_denoiser_filter_sse2;
if (flags & HAS_SSE2) vp8_denoiser_filter_uv = vp8_denoiser_filter_uv_sse2;
if (flags & HAS_SSE2) vp8_dequant_idct_add_uv_block = vp8_dequant_idct_add_uv_block_sse2;
if (flags & HAS_SSE2) vp8_dequant_idct_add_y_block = vp8_dequant_idct_add_y_block_sse2;
if (flags & HAS_SSE2) vp8_diamond_search_sad = vp8_diamond_search_sadx4;
if (flags & HAS_SSE2) vp8_fast_quantize_b = vp8_fast_quantize_b_sse2;
if (flags & HAS_SSSE3) vp8_fast_quantize_b = vp8_fast_quantize_b_ssse3;
if (flags & HAS_SSE2) vp8_filter_by_weight16x16 = vp8_filter_by_weight16x16_sse2;
if (flags & HAS_SSE2) vp8_filter_by_weight8x8 = vp8_filter_by_weight8x8_sse2;
if (flags & HAS_SSE3) vp8_full_search_sad = vp8_full_search_sadx3;
if (flags & HAS_SSE4_1) vp8_full_search_sad = vp8_full_search_sadx8;
if (flags & HAS_SSE2) vp8_loop_filter_bh = vp8_loop_filter_bh_sse2;
if (flags & HAS_SSE2) vp8_loop_filter_bv = vp8_loop_filter_bv_sse2;
if (flags & HAS_SSE2) vp8_loop_filter_mbh = vp8_loop_filter_mbh_sse2;
if (flags & HAS_SSE2) vp8_loop_filter_mbv = vp8_loop_filter_mbv_sse2;
if (flags & HAS_SSE2) vp8_loop_filter_simple_bh = vp8_loop_filter_bhs_sse2;
if (flags & HAS_SSE2) vp8_loop_filter_simple_bv = vp8_loop_filter_bvs_sse2;
if (flags & HAS_SSE2) vp8_loop_filter_simple_mbh = vp8_loop_filter_simple_horizontal_edge_sse2;
if (flags & HAS_SSE2) vp8_loop_filter_simple_mbv = vp8_loop_filter_simple_vertical_edge_sse2;
if (flags & HAS_SSE2) vp8_mbblock_error = vp8_mbblock_error_sse2;
if (flags & HAS_SSE2) vp8_mbuverror = vp8_mbuverror_sse2;
if (flags & HAS_SSE2) vp8_refining_search_sad = vp8_refining_search_sadx4;
if (flags & HAS_SSE2) vp8_regular_quantize_b = vp8_regular_quantize_b_sse2;
if (flags & HAS_SSE4_1) vp8_regular_quantize_b = vp8_regular_quantize_b_sse4_1;
if (flags & HAS_SSE2) vp8_short_fdct4x4 = vp8_short_fdct4x4_sse2;
if (flags & HAS_SSE2) vp8_short_fdct8x4 = vp8_short_fdct8x4_sse2;
if (flags & HAS_SSE2) vp8_short_inv_walsh4x4 = vp8_short_inv_walsh4x4_sse2;
if (flags & HAS_SSE2) vp8_short_walsh4x4 = vp8_short_walsh4x4_sse2;
if (flags & HAS_SSE2) vp8_sixtap_predict16x16 = vp8_sixtap_predict16x16_sse2;
if (flags & HAS_SSSE3) vp8_sixtap_predict16x16 = vp8_sixtap_predict16x16_ssse3;
if (flags & HAS_SSSE3) vp8_sixtap_predict4x4 = vp8_sixtap_predict4x4_ssse3;
if (flags & HAS_SSE2) vp8_sixtap_predict8x4 = vp8_sixtap_predict8x4_sse2;
if (flags & HAS_SSSE3) vp8_sixtap_predict8x4 = vp8_sixtap_predict8x4_ssse3;
vp8_sixtap_predict8x8 = vp8_sixtap_predict8x8_c;
if (flags & HAS_SSE2) vp8_sixtap_predict8x8 = vp8_sixtap_predict8x8_sse2;
if (flags & HAS_SSSE3) vp8_sixtap_predict8x8 = vp8_sixtap_predict8x8_ssse3;
vp8_temporal_filter_apply = vp8_temporal_filter_apply_c;
if (flags & HAS_SSE2) vp8_temporal_filter_apply = vp8_temporal_filter_apply_sse2;
#endif
}
#endif

View File

@ -32,73 +32,105 @@ extern "C" {
#endif
int64_t vp9_block_error_c(const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz);
#ifndef THE_SSE1
int64_t vp9_block_error_sse2(const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz);
int64_t vp9_block_error_avx2(const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz);
#endif
RTCD_EXTERN int64_t (*vp9_block_error)(const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz);
int64_t vp9_block_error_fp_c(const tran_low_t *coeff, const tran_low_t *dqcoeff, int block_size);
#ifndef THE_SSE1
int64_t vp9_block_error_fp_sse2(const tran_low_t *coeff, const tran_low_t *dqcoeff, int block_size);
int64_t vp9_block_error_fp_avx2(const tran_low_t *coeff, const tran_low_t *dqcoeff, int block_size);
#endif
RTCD_EXTERN int64_t (*vp9_block_error_fp)(const tran_low_t *coeff, const tran_low_t *dqcoeff, int block_size);
int vp9_diamond_search_sad_c(const struct macroblock *x, const struct search_site_config *cfg, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv);
#ifndef THE_SSE1
int vp9_diamond_search_sad_avx(const struct macroblock *x, const struct search_site_config *cfg, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv);
#endif
RTCD_EXTERN int (*vp9_diamond_search_sad)(const struct macroblock *x, const struct search_site_config *cfg, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv);
void vp9_fdct8x8_quant_c(const int16_t *input, int stride, tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *round_ptr, const int16_t *quant_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan);
#ifndef THE_SSE1
void vp9_fdct8x8_quant_sse2(const int16_t *input, int stride, tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *round_ptr, const int16_t *quant_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan);
void vp9_fdct8x8_quant_ssse3(const int16_t *input, int stride, tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *round_ptr, const int16_t *quant_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan);
#endif
RTCD_EXTERN void (*vp9_fdct8x8_quant)(const int16_t *input, int stride, tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *round_ptr, const int16_t *quant_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan);
void vp9_fht16x16_c(const int16_t *input, tran_low_t *output, int stride, int tx_type);
#ifndef THE_SSE1
void vp9_fht16x16_sse2(const int16_t *input, tran_low_t *output, int stride, int tx_type);
#endif
RTCD_EXTERN void (*vp9_fht16x16)(const int16_t *input, tran_low_t *output, int stride, int tx_type);
void vp9_fht4x4_c(const int16_t *input, tran_low_t *output, int stride, int tx_type);
#ifndef THE_SSE1
void vp9_fht4x4_sse2(const int16_t *input, tran_low_t *output, int stride, int tx_type);
#endif
RTCD_EXTERN void (*vp9_fht4x4)(const int16_t *input, tran_low_t *output, int stride, int tx_type);
void vp9_fht8x8_c(const int16_t *input, tran_low_t *output, int stride, int tx_type);
#ifndef THE_SSE1
void vp9_fht8x8_sse2(const int16_t *input, tran_low_t *output, int stride, int tx_type);
#endif
RTCD_EXTERN void (*vp9_fht8x8)(const int16_t *input, tran_low_t *output, int stride, int tx_type);
void vp9_filter_by_weight16x16_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int src_weight);
#ifndef THE_SSE1
void vp9_filter_by_weight16x16_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int src_weight);
#endif
RTCD_EXTERN void (*vp9_filter_by_weight16x16)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int src_weight);
void vp9_filter_by_weight8x8_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int src_weight);
#ifndef THE_SSE1
void vp9_filter_by_weight8x8_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int src_weight);
#endif
RTCD_EXTERN void (*vp9_filter_by_weight8x8)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int src_weight);
void vp9_fwht4x4_c(const int16_t *input, tran_low_t *output, int stride);
#ifndef THE_SSE1
void vp9_fwht4x4_sse2(const int16_t *input, tran_low_t *output, int stride);
#endif
RTCD_EXTERN void (*vp9_fwht4x4)(const int16_t *input, tran_low_t *output, int stride);
void vp9_iht16x16_256_add_c(const tran_low_t *input, uint8_t *output, int pitch, int tx_type);
#ifndef THE_SSE1
void vp9_iht16x16_256_add_sse2(const tran_low_t *input, uint8_t *output, int pitch, int tx_type);
#endif
RTCD_EXTERN void (*vp9_iht16x16_256_add)(const tran_low_t *input, uint8_t *output, int pitch, int tx_type);
void vp9_iht4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int stride, int tx_type);
#ifndef THE_SSE1
void vp9_iht4x4_16_add_sse2(const tran_low_t *input, uint8_t *dest, int stride, int tx_type);
#endif
RTCD_EXTERN void (*vp9_iht4x4_16_add)(const tran_low_t *input, uint8_t *dest, int stride, int tx_type);
void vp9_iht8x8_64_add_c(const tran_low_t *input, uint8_t *dest, int stride, int tx_type);
#ifndef THE_SSE1
void vp9_iht8x8_64_add_sse2(const tran_low_t *input, uint8_t *dest, int stride, int tx_type);
#endif
RTCD_EXTERN void (*vp9_iht8x8_64_add)(const tran_low_t *input, uint8_t *dest, int stride, int tx_type);
void vp9_quantize_fp_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *round_ptr, const int16_t *quant_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan);
#ifndef THE_SSE1
void vp9_quantize_fp_sse2(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *round_ptr, const int16_t *quant_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan);
#endif
RTCD_EXTERN void (*vp9_quantize_fp)(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *round_ptr, const int16_t *quant_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan);
void vp9_quantize_fp_32x32_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *round_ptr, const int16_t *quant_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan);
#define vp9_quantize_fp_32x32 vp9_quantize_fp_32x32_c
void vp9_scale_and_extend_frame_c(const struct yv12_buffer_config *src, struct yv12_buffer_config *dst, INTERP_FILTER filter_type, int phase_scaler);
#ifndef THE_SSE1
void vp9_scale_and_extend_frame_ssse3(const struct yv12_buffer_config *src, struct yv12_buffer_config *dst, INTERP_FILTER filter_type, int phase_scaler);
#endif
RTCD_EXTERN void (*vp9_scale_and_extend_frame)(const struct yv12_buffer_config *src, struct yv12_buffer_config *dst, INTERP_FILTER filter_type, int phase_scaler);
void vp9_temporal_filter_apply_c(const uint8_t *frame1, unsigned int stride, const uint8_t *frame2, unsigned int block_width, unsigned int block_height, int strength, int filter_weight, uint32_t *accumulator, uint16_t *count);
#ifndef THE_SSE1
void vp9_temporal_filter_apply_sse4_1(const uint8_t *frame1, unsigned int stride, const uint8_t *frame2, unsigned int block_width, unsigned int block_height, int strength, int filter_weight, uint32_t *accumulator, uint16_t *count);
#endif
RTCD_EXTERN void (*vp9_temporal_filter_apply)(const uint8_t *frame1, unsigned int stride, const uint8_t *frame2, unsigned int block_width, unsigned int block_height, int strength, int filter_weight, uint32_t *accumulator, uint16_t *count);
void vp9_rtcd(void);
@ -107,45 +139,47 @@ void vp9_rtcd(void);
#include "vpx_ports/x86.h"
static void setup_rtcd_internal(void)
{
vp9_block_error = vp9_block_error_c;
vp9_block_error_fp = vp9_block_error_fp_c;
vp9_diamond_search_sad = vp9_diamond_search_sad_c;
vp9_fdct8x8_quant = vp9_fdct8x8_quant_c;
vp9_fht16x16 = vp9_fht16x16_c;
vp9_fht4x4 = vp9_fht4x4_c;
vp9_fht8x8 = vp9_fht8x8_c;
vp9_filter_by_weight16x16 = vp9_filter_by_weight16x16_c;
vp9_filter_by_weight8x8 = vp9_filter_by_weight8x8_c;
vp9_fwht4x4 = vp9_fwht4x4_c;
vp9_iht16x16_256_add = vp9_iht16x16_256_add_c;
vp9_iht4x4_16_add = vp9_iht4x4_16_add_c;
vp9_iht8x8_64_add = vp9_iht8x8_64_add_c;
vp9_quantize_fp = vp9_quantize_fp_c;
vp9_scale_and_extend_frame = vp9_scale_and_extend_frame_c;
vp9_temporal_filter_apply = vp9_temporal_filter_apply_c;
#ifndef THE_SSE1
int flags = x86_simd_caps();
(void)flags;
vp9_block_error = vp9_block_error_c;
if (flags & HAS_SSE2) vp9_block_error = vp9_block_error_sse2;
if (flags & HAS_AVX2) vp9_block_error = vp9_block_error_avx2;
vp9_block_error_fp = vp9_block_error_fp_c;
if (flags & HAS_SSE2) vp9_block_error_fp = vp9_block_error_fp_sse2;
if (flags & HAS_AVX2) vp9_block_error_fp = vp9_block_error_fp_avx2;
vp9_diamond_search_sad = vp9_diamond_search_sad_c;
if (flags & HAS_AVX) vp9_diamond_search_sad = vp9_diamond_search_sad_avx;
vp9_fdct8x8_quant = vp9_fdct8x8_quant_c;
if (flags & HAS_SSE2) vp9_fdct8x8_quant = vp9_fdct8x8_quant_sse2;
if (flags & HAS_SSSE3) vp9_fdct8x8_quant = vp9_fdct8x8_quant_ssse3;
vp9_fht16x16 = vp9_fht16x16_c;
if (flags & HAS_SSE2) vp9_fht16x16 = vp9_fht16x16_sse2;
vp9_fht4x4 = vp9_fht4x4_c;
if (flags & HAS_SSE2) vp9_fht4x4 = vp9_fht4x4_sse2;
vp9_fht8x8 = vp9_fht8x8_c;
if (flags & HAS_SSE2) vp9_fht8x8 = vp9_fht8x8_sse2;
vp9_filter_by_weight16x16 = vp9_filter_by_weight16x16_c;
if (flags & HAS_SSE2) vp9_filter_by_weight16x16 = vp9_filter_by_weight16x16_sse2;
vp9_filter_by_weight8x8 = vp9_filter_by_weight8x8_c;
if (flags & HAS_SSE2) vp9_filter_by_weight8x8 = vp9_filter_by_weight8x8_sse2;
vp9_fwht4x4 = vp9_fwht4x4_c;
if (flags & HAS_SSE2) vp9_fwht4x4 = vp9_fwht4x4_sse2;
vp9_iht16x16_256_add = vp9_iht16x16_256_add_c;
if (flags & HAS_SSE2) vp9_iht16x16_256_add = vp9_iht16x16_256_add_sse2;
vp9_iht4x4_16_add = vp9_iht4x4_16_add_c;
if (flags & HAS_SSE2) vp9_iht4x4_16_add = vp9_iht4x4_16_add_sse2;
vp9_iht8x8_64_add = vp9_iht8x8_64_add_c;
if (flags & HAS_SSE2) vp9_iht8x8_64_add = vp9_iht8x8_64_add_sse2;
vp9_quantize_fp = vp9_quantize_fp_c;
if (flags & HAS_SSE2) vp9_quantize_fp = vp9_quantize_fp_sse2;
vp9_scale_and_extend_frame = vp9_scale_and_extend_frame_c;
if (flags & HAS_SSSE3) vp9_scale_and_extend_frame = vp9_scale_and_extend_frame_ssse3;
vp9_temporal_filter_apply = vp9_temporal_filter_apply_c;
if (flags & HAS_SSE4_1) vp9_temporal_filter_apply = vp9_temporal_filter_apply_sse4_1;
#endif
}
#endif

View File

@ -23,12 +23,21 @@
#define HAVE_MIPS64 0
#define HAVE_MMX 1
#define HAVE_SSE 1
#ifndef THE_SSE1
#define HAVE_SSE2 1
#define HAVE_SSE3 1
#define HAVE_SSSE3 1
#define HAVE_SSE4_1 1
#define HAVE_AVX 1
#define HAVE_AVX2 1
#else
#define HAVE_SSE2 0
#define HAVE_SSE3 0
#define HAVE_SSSE3 0
#define HAVE_SSE4_1 0
#define HAVE_AVX 0
#define HAVE_AVX2 0
#endif
#define HAVE_AVX512 0
#define HAVE_VSX 0
#define HAVE_MMI 0

File diff suppressed because it is too large Load Diff

View File

@ -27,6 +27,8 @@ extern void vp8_filter_block1dc_v6_mmx(
unsigned int pixels_per_line, unsigned int pixel_step,
unsigned int output_height, unsigned int output_width,
const short *vp8_filter);
#if HAVE_SSE2
extern void vp8_filter_block1d8_h6_sse2(unsigned char *src_ptr,
unsigned short *output_ptr,
unsigned int src_pixels_per_line,
@ -74,6 +76,7 @@ extern void vp8_filter_block1d8_v6_only_sse2(unsigned char *src_ptr,
int dst_ptich,
unsigned int output_height,
const short *vp8_filter);
#endif
#if HAVE_MMX
void vp8_sixtap_predict4x4_mmx(unsigned char *src_ptr, int src_pixels_per_line,

View File

@ -30,6 +30,8 @@ if CONFIG['CPU_ARCH'] == 'x86_64':
elif CONFIG['CPU_ARCH'] == 'x86':
EXPORTS.vpx += files['IA32_EXPORTS']
SOURCES += files['IA32_SOURCES']
if not CONFIG['THE_SSE1']:
SOURCES += files['IA32_SSE2_SOURCES']
if CONFIG['OS_TARGET'] == 'WINNT':
if CONFIG['CC_TYPE'] == 'gcc':
ASFLAGS += [ '-I%s/media/libvpx/config/win/mingw32/' % TOPSRCDIR ]

View File

@ -313,23 +313,13 @@ files = {
'libvpx/vp8/common/treecoder.c',
'libvpx/vp8/common/vp8_loopfilter.c',
'libvpx/vp8/common/vp8_skin_detection.c',
'libvpx/vp8/common/x86/copy_sse2.asm',
'libvpx/vp8/common/x86/copy_sse3.asm',
'libvpx/vp8/common/x86/dequantize_mmx.asm',
'libvpx/vp8/common/x86/filter_x86.c',
'libvpx/vp8/common/x86/idct_blk_mmx.c',
'libvpx/vp8/common/x86/idct_blk_sse2.c',
'libvpx/vp8/common/x86/idctllm_mmx.asm',
'libvpx/vp8/common/x86/idctllm_sse2.asm',
'libvpx/vp8/common/x86/iwalsh_sse2.asm',
'libvpx/vp8/common/x86/loopfilter_sse2.asm',
'libvpx/vp8/common/x86/loopfilter_x86.c',
'libvpx/vp8/common/x86/mfqe_sse2.asm',
'libvpx/vp8/common/x86/recon_mmx.asm',
'libvpx/vp8/common/x86/recon_sse2.asm',
'libvpx/vp8/common/x86/subpixel_mmx.asm',
'libvpx/vp8/common/x86/subpixel_sse2.asm',
'libvpx/vp8/common/x86/subpixel_ssse3.asm',
'libvpx/vp8/common/x86/vp8_asm_stubs.c',
'libvpx/vp8/decoder/dboolhuff.c',
'libvpx/vp8/decoder/decodeframe.c',
@ -361,15 +351,7 @@ files = {
'libvpx/vp8/encoder/tokenize.c',
'libvpx/vp8/encoder/treewriter.c',
'libvpx/vp8/encoder/vp8_quantize.c',
'libvpx/vp8/encoder/x86/dct_sse2.asm',
'libvpx/vp8/encoder/x86/denoising_sse2.c',
'libvpx/vp8/encoder/x86/encodeopt.asm',
'libvpx/vp8/encoder/x86/fwalsh_sse2.asm',
'libvpx/vp8/encoder/x86/quantize_sse4.c',
'libvpx/vp8/encoder/x86/temporal_filter_apply_sse2.asm',
'libvpx/vp8/encoder/x86/vp8_enc_stubs_sse2.c',
'libvpx/vp8/encoder/x86/vp8_quantize_sse2.c',
'libvpx/vp8/encoder/x86/vp8_quantize_ssse3.c',
'libvpx/vp8/vp8_cx_iface.c',
'libvpx/vp8/vp8_dx_iface.c',
'libvpx/vp9/common/vp9_alloccommon.c',
@ -395,8 +377,6 @@ files = {
'libvpx/vp9/common/vp9_seg_common.c',
'libvpx/vp9/common/vp9_thread_common.c',
'libvpx/vp9/common/vp9_tile_common.c',
'libvpx/vp9/common/x86/vp9_idct_intrin_sse2.c',
'libvpx/vp9/common/x86/vp9_mfqe_sse2.asm',
'libvpx/vp9/decoder/vp9_decodeframe.c',
'libvpx/vp9/decoder/vp9_decodemv.c',
'libvpx/vp9/decoder/vp9_decoder.c',
@ -439,15 +419,6 @@ files = {
'libvpx/vp9/encoder/vp9_temporal_filter.c',
'libvpx/vp9/encoder/vp9_tokenize.c',
'libvpx/vp9/encoder/vp9_treewriter.c',
'libvpx/vp9/encoder/x86/temporal_filter_sse4.c',
'libvpx/vp9/encoder/x86/vp9_dct_intrin_sse2.c',
'libvpx/vp9/encoder/x86/vp9_dct_sse2.asm',
'libvpx/vp9/encoder/x86/vp9_dct_ssse3.c',
'libvpx/vp9/encoder/x86/vp9_diamond_search_sad_avx.c',
'libvpx/vp9/encoder/x86/vp9_error_avx2.c',
'libvpx/vp9/encoder/x86/vp9_error_sse2.asm',
'libvpx/vp9/encoder/x86/vp9_frame_scale_ssse3.c',
'libvpx/vp9/encoder/x86/vp9_quantize_sse2.c',
'libvpx/vp9/vp9_cx_iface.c',
'libvpx/vp9/vp9_dx_iface.c',
'libvpx/vpx/src/vpx_codec.c',
@ -475,6 +446,47 @@ files = {
'libvpx/vpx_dsp/variance.c',
'libvpx/vpx_dsp/vpx_convolve.c',
'libvpx/vpx_dsp/vpx_dsp_rtcd.c',
'libvpx/vpx_dsp/x86/vpx_asm_stubs.c',
'libvpx/vpx_mem/vpx_mem.c',
'libvpx/vpx_ports/emms.asm',
'libvpx/vpx_scale/generic/gen_scalers.c',
'libvpx/vpx_scale/generic/vpx_scale.c',
'libvpx/vpx_scale/generic/yv12config.c',
'libvpx/vpx_scale/generic/yv12extend.c',
'libvpx/vpx_scale/vpx_scale_rtcd.c',
'libvpx/vpx_util/vpx_thread.c',
'libvpx/vpx_util/vpx_write_yuv_frame.c',
],
'IA32_SSE2_SOURCES': [
'libvpx/vp8/common/x86/copy_sse2.asm',
'libvpx/vp8/common/x86/copy_sse3.asm',
'libvpx/vp8/common/x86/idct_blk_sse2.c',
'libvpx/vp8/common/x86/idctllm_sse2.asm',
'libvpx/vp8/common/x86/iwalsh_sse2.asm',
'libvpx/vp8/common/x86/loopfilter_sse2.asm',
'libvpx/vp8/common/x86/mfqe_sse2.asm',
'libvpx/vp8/common/x86/recon_sse2.asm',
'libvpx/vp8/common/x86/subpixel_sse2.asm',
'libvpx/vp8/common/x86/subpixel_ssse3.asm',
'libvpx/vp8/encoder/x86/dct_sse2.asm',
'libvpx/vp8/encoder/x86/denoising_sse2.c',
'libvpx/vp8/encoder/x86/fwalsh_sse2.asm',
'libvpx/vp8/encoder/x86/quantize_sse4.c',
'libvpx/vp8/encoder/x86/temporal_filter_apply_sse2.asm',
'libvpx/vp8/encoder/x86/vp8_enc_stubs_sse2.c',
'libvpx/vp8/encoder/x86/vp8_quantize_sse2.c',
'libvpx/vp8/encoder/x86/vp8_quantize_ssse3.c',
'libvpx/vp9/common/x86/vp9_idct_intrin_sse2.c',
'libvpx/vp9/common/x86/vp9_mfqe_sse2.asm',
'libvpx/vp9/encoder/x86/temporal_filter_sse4.c',
'libvpx/vp9/encoder/x86/vp9_dct_intrin_sse2.c',
'libvpx/vp9/encoder/x86/vp9_dct_sse2.asm',
'libvpx/vp9/encoder/x86/vp9_dct_ssse3.c',
'libvpx/vp9/encoder/x86/vp9_diamond_search_sad_avx.c',
'libvpx/vp9/encoder/x86/vp9_error_avx2.c',
'libvpx/vp9/encoder/x86/vp9_error_sse2.asm',
'libvpx/vp9/encoder/x86/vp9_frame_scale_ssse3.c',
'libvpx/vp9/encoder/x86/vp9_quantize_sse2.c',
'libvpx/vpx_dsp/x86/add_noise_sse2.asm',
'libvpx/vpx_dsp/x86/avg_intrin_avx2.c',
'libvpx/vpx_dsp/x86/avg_intrin_sse2.c',
@ -504,7 +516,6 @@ files = {
'libvpx/vpx_dsp/x86/sum_squares_sse2.c',
'libvpx/vpx_dsp/x86/variance_avx2.c',
'libvpx/vpx_dsp/x86/variance_sse2.c',
'libvpx/vpx_dsp/x86/vpx_asm_stubs.c',
'libvpx/vpx_dsp/x86/vpx_convolve_copy_sse2.asm',
'libvpx/vpx_dsp/x86/vpx_subpixel_8t_intrin_avx2.c',
'libvpx/vpx_dsp/x86/vpx_subpixel_8t_intrin_ssse3.c',
@ -512,15 +523,6 @@ files = {
'libvpx/vpx_dsp/x86/vpx_subpixel_8t_ssse3.asm',
'libvpx/vpx_dsp/x86/vpx_subpixel_bilinear_sse2.asm',
'libvpx/vpx_dsp/x86/vpx_subpixel_bilinear_ssse3.asm',
'libvpx/vpx_mem/vpx_mem.c',
'libvpx/vpx_ports/emms.asm',
'libvpx/vpx_scale/generic/gen_scalers.c',
'libvpx/vpx_scale/generic/vpx_scale.c',
'libvpx/vpx_scale/generic/yv12config.c',
'libvpx/vpx_scale/generic/yv12extend.c',
'libvpx/vpx_scale/vpx_scale_rtcd.c',
'libvpx/vpx_util/vpx_thread.c',
'libvpx/vpx_util/vpx_write_yuv_frame.c',
],
'ARM_EXPORTS': [
'libvpx/vpx/vp8.h',

View File

@ -426,8 +426,12 @@ void (*WebPAlphaReplace)(uint32_t* src, int length, uint32_t color);
// Init function
extern void WebPInitAlphaProcessingMIPSdspR2(void);
#if defined(WEBP_HAVE_SSE2)
extern void WebPInitAlphaProcessingSSE2(void);
#if defined(WEBP_HAVE_SSE41)
extern void WebPInitAlphaProcessingSSE41(void);
#endif
#endif
extern void WebPInitAlphaProcessingNEON(void);
WEBP_DSP_INIT_FUNC(WebPInitAlphaProcessing) {

View File

@ -376,7 +376,9 @@ VP8SetResidualCoeffsFunc VP8SetResidualCoeffs;
extern void VP8EncDspCostInitMIPS32(void);
extern void VP8EncDspCostInitMIPSdspR2(void);
#if defined(WEBP_HAVE_SSE2)
extern void VP8EncDspCostInitSSE2(void);
#endif
extern void VP8EncDspCostInitNEON(void);
WEBP_DSP_INIT_FUNC(VP8EncDspCostInit) {

View File

@ -68,43 +68,6 @@ extern "C" {
# define __has_builtin(x) 0
#endif
#if !defined(HAVE_CONFIG_H)
#if defined(_MSC_VER) && _MSC_VER > 1310 && \
(defined(_M_X64) || defined(_M_IX86))
#define WEBP_MSC_SSE2 // Visual C++ SSE2 targets
#endif
#if defined(_MSC_VER) && _MSC_VER >= 1500 && \
(defined(_M_X64) || defined(_M_IX86))
#define WEBP_MSC_SSE41 // Visual C++ SSE4.1 targets
#endif
#endif
// WEBP_HAVE_* are used to indicate the presence of the instruction set in dsp
// files without intrinsics, allowing the corresponding Init() to be called.
// Files containing intrinsics will need to be built targeting the instruction
// set so should succeed on one of the earlier tests.
#if (defined(__SSE2__) || defined(WEBP_MSC_SSE2)) && \
(!defined(HAVE_CONFIG_H) || defined(WEBP_HAVE_SSE2))
#define WEBP_USE_SSE2
#endif
#if defined(WEBP_USE_SSE2) && !defined(WEBP_HAVE_SSE2)
#define WEBP_HAVE_SSE2
#endif
#if (defined(__SSE4_1__) || defined(WEBP_MSC_SSE41)) && \
(!defined(HAVE_CONFIG_H) || defined(WEBP_HAVE_SSE41))
#define WEBP_USE_SSE41
#endif
#if defined(WEBP_USE_SSE41) && !defined(WEBP_HAVE_SSE41)
#define WEBP_HAVE_SSE41
#endif
#undef WEBP_MSC_SSE41
#undef WEBP_MSC_SSE2
// The intrinsics currently cause compiler errors with arm-nacl-gcc and the
// inline assembly would need to be modified for use with Native Client.
#if ((defined(__ARM_NEON__) || defined(__aarch64__)) && \

View File

@ -55,7 +55,7 @@ elif CONFIG['CPU_ARCH'] == 'aarch64':
'yuv_neon.c',
]
DEFINES['WEBP_HAVE_NEON'] = 1;
elif CONFIG['INTEL_ARCHITECTURE']:
elif CONFIG['INTEL_ARCHITECTURE'] and not CONFIG['THE_SSE1']:
SOURCES += [
'alpha_processing_sse2.c',
'alpha_processing_sse41.c',

View File

@ -253,7 +253,9 @@ struct AecCore {
AecCore* WebRtcAec_CreateAec(int instance_count); // Returns NULL on error.
void WebRtcAec_FreeAec(AecCore* aec);
int WebRtcAec_InitAec(AecCore* aec, int sampFreq);
#if defined(WEBRTC_ARCH_X86_FAMILY)
void WebRtcAec_InitAec_SSE2(void);
#endif
#if defined(MIPS_FPU_LE)
void WebRtcAec_InitAec_mips(void);
#endif

View File

@ -481,7 +481,7 @@ if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "WINNT":
"/media/webrtc/trunk/webrtc/modules/video_processing/video_processing_neon_gn"
]
if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "WINNT":
if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "WINNT" and not CONFIG['THE_SSE1']:
DIRS += [
"/media/webrtc/trunk/webrtc/common_audio/common_audio_sse2_gn",

View File

@ -28,7 +28,9 @@
#define WEBRTC_ARCH_64_BITS
#define WEBRTC_ARCH_LITTLE_ENDIAN
#elif defined(_M_IX86) || defined(__i386__)
#define WEBRTC_ARCH_X86_FAMILY
# ifndef THE_SSE1
# define WEBRTC_ARCH_X86_FAMILY
# endif
#define WEBRTC_ARCH_X86
#define WEBRTC_ARCH_32_BITS
#define WEBRTC_ARCH_LITTLE_ENDIAN

View File

@ -250,80 +250,80 @@ inline bool supports_sse() { return sse_private::sse_enabled; }
inline bool supports_sse() { return false; }
#endif
#if defined(MOZILLA_PRESUME_SSE2)
#if defined(MOZILLA_PRESUME_SSE2) && !defined(THE_SSE1)
# define MOZILLA_MAY_SUPPORT_SSE2 1
inline bool supports_sse2() { return true; }
#elif defined(MOZILLA_SSE_HAVE_CPUID_DETECTION)
#elif defined(MOZILLA_SSE_HAVE_CPUID_DETECTION) && !defined(THE_SSE1)
# define MOZILLA_MAY_SUPPORT_SSE2 1
inline bool supports_sse2() { return sse_private::sse2_enabled; }
#else
inline bool supports_sse2() { return false; }
#endif
#if defined(MOZILLA_PRESUME_SSE3)
#if defined(MOZILLA_PRESUME_SSE3) && !defined(THE_SSE1)
# define MOZILLA_MAY_SUPPORT_SSE3 1
inline bool supports_sse3() { return true; }
#elif defined(MOZILLA_SSE_HAVE_CPUID_DETECTION)
#elif defined(MOZILLA_SSE_HAVE_CPUID_DETECTION) && !defined(THE_SSE1)
# define MOZILLA_MAY_SUPPORT_SSE3 1
inline bool supports_sse3() { return sse_private::sse3_enabled; }
#else
inline bool supports_sse3() { return false; }
#endif
#if defined(MOZILLA_PRESUME_SSSE3)
#if defined(MOZILLA_PRESUME_SSSE3) && !defined(THE_SSE1)
# define MOZILLA_MAY_SUPPORT_SSSE3 1
inline bool supports_ssse3() { return true; }
#elif defined(MOZILLA_SSE_HAVE_CPUID_DETECTION)
#elif defined(MOZILLA_SSE_HAVE_CPUID_DETECTION) && !defined(THE_SSE1)
# define MOZILLA_MAY_SUPPORT_SSSE3 1
inline bool supports_ssse3() { return sse_private::ssse3_enabled; }
#else
inline bool supports_ssse3() { return false; }
#endif
#if defined(MOZILLA_PRESUME_SSE4A)
#if defined(MOZILLA_PRESUME_SSE4A) && !defined(THE_SSE1)
# define MOZILLA_MAY_SUPPORT_SSE4A 1
inline bool supports_sse4a() { return true; }
#elif defined(MOZILLA_SSE_HAVE_CPUID_DETECTION)
#elif defined(MOZILLA_SSE_HAVE_CPUID_DETECTION) && !defined(THE_SSE1)
# define MOZILLA_MAY_SUPPORT_SSE4A 1
inline bool supports_sse4a() { return sse_private::sse4a_enabled; }
#else
inline bool supports_sse4a() { return false; }
#endif
#if defined(MOZILLA_PRESUME_SSE4_1)
#if defined(MOZILLA_PRESUME_SSE4_1) && !defined(THE_SSE1)
# define MOZILLA_MAY_SUPPORT_SSE4_1 1
inline bool supports_sse4_1() { return true; }
#elif defined(MOZILLA_SSE_HAVE_CPUID_DETECTION)
#elif defined(MOZILLA_SSE_HAVE_CPUID_DETECTION) && !defined(THE_SSE1)
# define MOZILLA_MAY_SUPPORT_SSE4_1 1
inline bool supports_sse4_1() { return sse_private::sse4_1_enabled; }
#else
inline bool supports_sse4_1() { return false; }
#endif
#if defined(MOZILLA_PRESUME_SSE4_2)
#if defined(MOZILLA_PRESUME_SSE4_2) && !defined(THE_SSE1)
# define MOZILLA_MAY_SUPPORT_SSE4_2 1
inline bool supports_sse4_2() { return true; }
#elif defined(MOZILLA_SSE_HAVE_CPUID_DETECTION)
#elif defined(MOZILLA_SSE_HAVE_CPUID_DETECTION) && !defined(THE_SSE1)
# define MOZILLA_MAY_SUPPORT_SSE4_2 1
inline bool supports_sse4_2() { return sse_private::sse4_2_enabled; }
#else
inline bool supports_sse4_2() { return false; }
#endif
#if defined(MOZILLA_PRESUME_AVX)
#if defined(MOZILLA_PRESUME_AVX) && !defined(THE_SSE1)
# define MOZILLA_MAY_SUPPORT_AVX 1
inline bool supports_avx() { return true; }
#elif defined(MOZILLA_SSE_HAVE_CPUID_DETECTION)
#elif defined(MOZILLA_SSE_HAVE_CPUID_DETECTION) && !defined(THE_SSE1)
# define MOZILLA_MAY_SUPPORT_AVX 1
inline bool supports_avx() { return sse_private::avx_enabled; }
#else
inline bool supports_avx() { return false; }
#endif
#if defined(MOZILLA_PRESUME_AVX2)
#if defined(MOZILLA_PRESUME_AVX2) && !defined(THE_SSE1)
# define MOZILLA_MAY_SUPPORT_AVX2 1
inline bool supports_avx2() { return true; }
#elif defined(MOZILLA_SSE_HAVE_CPUID_DETECTION)
#elif defined(MOZILLA_SSE_HAVE_CPUID_DETECTION) && !defined(THE_SSE1)
# define MOZILLA_MAY_SUPPORT_AVX2 1
inline bool supports_avx2() { return sse_private::avx2_enabled; }
#else

View File

@ -707,19 +707,23 @@ case "$target" in
CPPFLAGS="$CPPFLAGS -utf-8"
fi
if test "$CPU_ARCH" = "x86"; then
SSE_FLAGS="-arch:SSE"
if test "$THE_SSE1" = 1; then
SSE2_FLAGS="-arch:SSE"
else
SSE2_FLAGS="-arch:SSE2"
fi
dnl VS2012+ defaults to -arch:SSE2. We want to target nothing
dnl more recent, so set that explicitly here unless another
dnl target arch has already been set.
changequote(,)
if test -z `echo $CFLAGS | grep -i [-/]arch:`; then
CFLAGS="$CFLAGS -arch:SSE2"
CFLAGS="$CFLAGS $SSE2_FLAGS"
fi
if test -z `echo $CXXFLAGS | grep -i [-/]arch:`; then
CXXFLAGS="$CXXFLAGS -arch:SSE2"
CXXFLAGS="$CXXFLAGS $SSE2_FLAGS"
fi
changequote([,])
SSE_FLAGS="-arch:SSE"
SSE2_FLAGS="-arch:SSE2"
dnl MSVC allows the use of intrinsics without any flags
dnl and doesn't have a separate arch for SSSE3
SSSE3_FLAGS="-arch:SSE2"
@ -925,6 +929,10 @@ if test -z "$MOZ_OPTIMIZE_FLAGS"; then
MOZ_OPTIMIZE_FLAGS="-O"
fi
if test -n "$THE_SSE1"; then
AC_DEFINE(THE_SSE1)
fi
AC_SUBST(THE_SSE1)
AC_SUBST_LIST(MMX_FLAGS)
AC_SUBST_LIST(SSE_FLAGS)
AC_SUBST_LIST(SSE2_FLAGS)