[zstd] Update: 1.4.9 -> 1.5.0

TODO: zstd has its own official CMake build scripts. We should migrate over to it sometime.
2025-06-18 11:35:38 -04:00 · 2021-05-15 00:04:50 -04:00 · 2021-05-15 00:04:50 -04:00 · 5885b62c9e
commit 5885b62c9e
parent de562899f8
86 changed files with 3414 additions and 1210 deletions
--- a/extlib/zstd/BUCK
+++ b/extlib/zstd/BUCK
@ -65,9 +65,7 @@ cxx_library(
    name='zdict',
    header_namespace='',
    visibility=['PUBLIC'],
-    exported_headers=subdir_glob([
-        ('dictBuilder', 'zdict.h'),
-    ]),
+    exported_headers=['zdict.h'],
    headers=subdir_glob([
        ('dictBuilder', 'divsufsort.h'),
        ('dictBuilder', 'cover.h'),
@ -131,10 +129,10 @@ cxx_library(
    name='errors',
    header_namespace='',
    visibility=['PUBLIC'],
-    exported_headers=subdir_glob([
-        ('common', 'error_private.h'),
-        ('common', 'zstd_errors.h'),
-    ]),
+    exported_headers=[
+        'zstd_errors.h',
+        'common/error_private.h',
+    ]
    srcs=['common/error_private.c'],
 )

--- a/extlib/zstd/CMakeLists.txt
+++ b/extlib/zstd/CMakeLists.txt
@ -1,5 +1,5 @@
 # zstd: Fast compression library.
-PROJECT(zstd VERSION 1.4.5 LANGUAGES C)
+PROJECT(zstd VERSION 1.5.0 LANGUAGES C)

 # rom-properties: Enable multithreading.
 SET(ZSTD_MULTITHREAD ON)
@ -39,6 +39,12 @@ SET(ZSTD_LIB_DECOMPRESSION ON)
 SET(ZSTD_LIB_DICTBUILDER OFF)
 SET(ZSTD_LIB_DEPRECATED OFF)

+# Enable zstd multithreading on pthread systems.
+FIND_PACKAGE(Threads)
+IF(CMAKE_USE_PTHREADS_INIT)
+	SET(ZSTD_MULTITHREAD ON)
+ENDIF(CMAKE_USE_PTHREADS_INIT)
+
 # Legacy support
 SET(ZSTD_LEGACY_SUPPORT 0)
 SET(ZSTD_LEGACY_MULTITHREADED_API OFF)
@ -139,19 +145,14 @@ IF(BUILD_STATIC_LIBS)
 	DO_SPLIT_DEBUG(zstdstatic)
 ENDIF(BUILD_STATIC_LIBS)

-IF(ZSTD_MULTITHREAD)
-	IF(NOT WIN32)
-		FIND_PACKAGE(Threads REQUIRED)
-	ENDIF(NOT WIN32)
-	IF(CMAKE_THREAD_LIBS_INIT)
-		IF(BUILD_SHARED_LIBS)
-			TARGET_LINK_LIBRARIES(zstd PRIVATE ${CMAKE_THREAD_LIBS_INIT})
-		ENDIF(BUILD_SHARED_LIBS)
-		IF(BUILD_STATIC_LIBS)
-			TARGET_LINK_LIBRARIES(zstdstatic PRIVATE ${CMAKE_THREAD_LIBS_INIT})
-		ENDIF(BUILD_STATIC_LIBS)
-	ENDIF(CMAKE_THREAD_LIBS_INIT)
-ENDIF(ZSTD_MULTITHREAD)
+IF(ZSTD_MULTITHREAD AND CMAKE_THREAD_LIBS_INIT)
+	IF(BUILD_SHARED_LIBS)
+		TARGET_LINK_LIBRARIES(zstd PRIVATE ${CMAKE_THREAD_LIBS_INIT})
+	ENDIF(BUILD_SHARED_LIBS)
+	IF(BUILD_STATIC_LIBS)
+		TARGET_LINK_LIBRARIES(zstdstatic PRIVATE ${CMAKE_THREAD_LIBS_INIT})
+	ENDIF(BUILD_STATIC_LIBS)
+ENDIF(ZSTD_MULTITHREAD AND CMAKE_THREAD_LIBS_INIT)

 # Install DLLs.
 IF(NOT SKIP_INSTALL_LIBRARIES AND NOT SKIP_INSTALL_ALL)
--- a/extlib/zstd/Makefile
+++ b/extlib/zstd/Makefile
@ -1,5 +1,5 @@
 # ################################################################
-# Copyright (c) 2015-2021, Yann Collet, Facebook, Inc.
+# Copyright (c) Yann Collet, Facebook, Inc.
 # All rights reserved.
 #
 # This source code is licensed under both the BSD-style license (found in the
@ -8,6 +8,9 @@
 # You may select, at your option, one of the above-listed licenses.
 # ################################################################

+# Note: by default, the static library is built single-threaded and dynamic library is built
+# multi-threaded. It is possible to force multi or single threaded builds by appending
+# -mt or -nomt to the build target (like lib-mt for multi-threaded, lib-nomt for single-threaded).
 .PHONY: default
 default: lib-release

@ -68,6 +71,10 @@ DEBUGFLAGS= -Wall -Wextra -Wcast-qual -Wcast-align -Wshadow \
 CFLAGS   += $(DEBUGFLAGS) $(MOREFLAGS)
 FLAGS     = $(CPPFLAGS) $(CFLAGS)

+CPPFLAGS_DYNLIB  = -DZSTD_MULTITHREAD # dynamic library build defaults to multi-threaded
+LDFLAGS_DYNLIB   = -pthread
+CPPFLAGS_STATLIB =                    # static library build defaults to single-threaded
+
 HAVE_COLORNEVER = $(shell echo a | grep --color=never a > /dev/null 2> /dev/null && echo 1 || echo 0)
 GREP_OPTIONS ?=
 ifeq ($HAVE_COLORNEVER, 1)
@ -91,7 +98,7 @@ endif
 ZSTD_LIB_COMPRESSION ?= 1
 ZSTD_LIB_DECOMPRESSION ?= 1
 ZSTD_LIB_DICTBUILDER ?= 1
-ZSTD_LIB_DEPRECATED ?= 1
+ZSTD_LIB_DEPRECATED ?= 0

 # Legacy support
 ZSTD_LEGACY_SUPPORT ?= 5
@ -176,7 +183,9 @@ UNAME := $(shell uname)

 ifndef BUILD_DIR
 ifeq ($(UNAME), Darwin)
-  HASH ?= md5
+  ifeq ($(shell md5 < /dev/null > /dev/null; echo $$?), 0)
+    HASH ?= md5
+  endif
 else ifeq ($(UNAME), FreeBSD)
  HASH ?= gmd5sum
 else ifeq ($(UNAME), NetBSD)
@ -222,6 +231,7 @@ all: lib


 .PHONY: libzstd.a  # must be run every time
+libzstd.a: CPPFLAGS += $(CPPFLAGS_STATLIB)

 ifndef BUILD_DIR
 # determine BUILD_DIR from compilation flags
@ -238,7 +248,10 @@ ZSTD_STATLIB_OBJ := $(addprefix $(ZSTD_STATLIB_DIR)/,$(ZSTD_LOCAL_OBJ))
 $(ZSTD_STATLIB): ARFLAGS = rcs
 $(ZSTD_STATLIB): | $(ZSTD_STATLIB_DIR)
 $(ZSTD_STATLIB): $(ZSTD_STATLIB_OBJ)
-	@echo compiling static library
+  # Check for multithread flag at target execution time
+	$(if $(filter -DZSTD_MULTITHREAD,$(CPPFLAGS)),\
+    @echo compiling multi-threaded static library $(LIBVER),\
+    @echo compiling single-threaded static library $(LIBVER))
 	$(AR) $(ARFLAGS) $@ $^

 libzstd.a: $(ZSTD_STATLIB)
@ -257,8 +270,9 @@ else  # not Windows

 LIBZSTD = libzstd.$(SHARED_EXT_VER)
 .PHONY: $(LIBZSTD)  # must be run every time
-$(LIBZSTD): CFLAGS += -fPIC -fvisibility=hidden
-$(LIBZSTD): LDFLAGS += -shared
+$(LIBZSTD): CPPFLAGS += $(CPPFLAGS_DYNLIB)
+$(LIBZSTD): CFLAGS   += -fPIC -fvisibility=hidden
+$(LIBZSTD): LDFLAGS  += -shared $(LDFLAGS_DYNLIB)

 ifndef BUILD_DIR
 # determine BUILD_DIR from compilation flags
@ -275,7 +289,10 @@ ZSTD_DYNLIB_OBJ := $(addprefix $(ZSTD_DYNLIB_DIR)/,$(ZSTD_LOCAL_OBJ))

 $(ZSTD_DYNLIB): | $(ZSTD_DYNLIB_DIR)
 $(ZSTD_DYNLIB): $(ZSTD_DYNLIB_OBJ)
-	@echo compiling dynamic library $(LIBVER)
+# Check for multithread flag at target execution time
+	$(if $(filter -DZSTD_MULTITHREAD,$(CPPFLAGS)),\
+    @echo compiling multi-threaded dynamic library $(LIBVER),\
+    @echo compiling single-threaded dynamic library $(LIBVER))
 	$(CC) $(FLAGS) $^ $(LDFLAGS) $(SONAME_FLAGS) -o $@
 	@echo creating versioned links
 	ln -sf $@ libzstd.$(SHARED_EXT_MAJOR)
@ -297,10 +314,17 @@ lib : libzstd.a libzstd
 # note : do not define lib-mt or lib-release as .PHONY
 # make does not consider implicit pattern rule for .PHONY target

-%-mt : CPPFLAGS += -DZSTD_MULTITHREAD
-%-mt : LDFLAGS  += -pthread
+%-mt : CPPFLAGS_DYNLIB  := -DZSTD_MULTITHREAD
+%-mt : CPPFLAGS_STATLIB := -DZSTD_MULTITHREAD
+%-mt : LDFLAGS_DYNLIB   := -pthread
 %-mt : %
-	@echo multi-threading build completed
+	@echo multi-threaded build completed
+
+%-nomt : CPPFLAGS_DYNLIB  :=
+%-nomt : LDFLAGS_DYNLIB   :=
+%-nomt : CPPFLAGS_STATLIB :=
+%-nomt : %
+	@echo single-threaded build completed

 %-release : DEBUGFLAGS :=
 %-release : %
@ -332,7 +356,8 @@ include $(wildcard $(DEPFILES))
 # Special case : building library in single-thread mode _and_ without zstdmt_compress.c
 ZSTDMT_FILES = compress/zstdmt_compress.c
 ZSTD_NOMT_FILES = $(filter-out $(ZSTDMT_FILES),$(ZSTD_FILES))
-libzstd-nomt: LDFLAGS += -shared -fPIC -fvisibility=hidden
+libzstd-nomt: CFLAGS += -fPIC -fvisibility=hidden
+libzstd-nomt: LDFLAGS += -shared
 libzstd-nomt: $(ZSTD_NOMT_FILES)
 	@echo compiling single-thread dynamic library $(LIBVER)
 	@echo files : $(ZSTD_NOMT_FILES)
@ -411,10 +436,12 @@ libzstd.pc: libzstd.pc.in
 install: install-pc install-static install-shared install-includes
 	@echo zstd static and shared library installed

+.PHONY: install-pc
 install-pc: libzstd.pc
 	[ -e $(DESTDIR)$(PKGCONFIGDIR) ] || $(INSTALL) -d -m 755 $(DESTDIR)$(PKGCONFIGDIR)/
 	$(INSTALL_DATA) libzstd.pc $(DESTDIR)$(PKGCONFIGDIR)/

+.PHONY: install-static
 install-static:
 	# only generate libzstd.a if it's not already present
 	[ -e libzstd.a ] || $(MAKE) libzstd.a-release
@ -422,6 +449,7 @@ install-static:
 	@echo Installing static library
 	$(INSTALL_DATA) libzstd.a $(DESTDIR)$(LIBDIR)

+.PHONY: install-shared
 install-shared:
 	# only generate libzstd.so if it's not already present
 	[ -e $(LIBZSTD) ] || $(MAKE) libzstd-release
@ -431,12 +459,13 @@ install-shared:
 	ln -sf $(LIBZSTD) $(DESTDIR)$(LIBDIR)/libzstd.$(SHARED_EXT_MAJOR)
 	ln -sf $(LIBZSTD) $(DESTDIR)$(LIBDIR)/libzstd.$(SHARED_EXT)

+.PHONY: install-includes
 install-includes:
 	[ -e $(DESTDIR)$(INCLUDEDIR) ] || $(INSTALL) -d -m 755 $(DESTDIR)$(INCLUDEDIR)/
 	@echo Installing includes
 	$(INSTALL_DATA) zstd.h $(DESTDIR)$(INCLUDEDIR)
-	$(INSTALL_DATA) common/zstd_errors.h $(DESTDIR)$(INCLUDEDIR)
-	$(INSTALL_DATA) dictBuilder/zdict.h $(DESTDIR)$(INCLUDEDIR)
+	$(INSTALL_DATA) zstd_errors.h $(DESTDIR)$(INCLUDEDIR)
+	$(INSTALL_DATA) zdict.h $(DESTDIR)$(INCLUDEDIR)

 .PHONY: uninstall
 uninstall:
--- a/extlib/zstd/README.md
+++ b/extlib/zstd/README.md
@ -19,12 +19,16 @@ The scope can be reduced on demand (see paragraph _modular build_).

 #### Multithreading support

-Multithreading is disabled by default when building with `make`.
+When building with `make`, by default the dynamic library is multithreaded and static library is single-threaded (for compatibility reasons).
+
 Enabling multithreading requires 2 conditions :
 - set build macro `ZSTD_MULTITHREAD` (`-DZSTD_MULTITHREAD` for `gcc`)
 - for POSIX systems : compile with pthread (`-pthread` compilation flag for `gcc`)

-Both conditions are automatically applied when invoking `make lib-mt` target.
+For convenience, we provide a build target to generate multi and single threaded libraries:
+- Force enable multithreading on both dynamic and static libraries by appending `-mt` to the target, e.g. `make lib-mt`.
+- Force disable multithreading on both dynamic and static libraries by appending `-nomt` to the target, e.g. `make lib-nomt`.
+- By default, as mentioned before, dynamic library is multithreaded, and static library is single-threaded, e.g. `make lib`.

 When linking a POSIX program with a multithreaded version of `libzstd`,
 note that it's necessary to invoke the `-pthread` flag during link stage.
@ -42,8 +46,8 @@ Zstandard's stable API is exposed within [lib/zstd.h](zstd.h).

 Optional advanced features are exposed via :

- `lib/common/zstd_errors.h` : translates `size_t` function results
-                               into a `ZSTD_ErrorCode`, for accurate error handling.
+- `lib/zstd_errors.h` : translates `size_t` function results
+                        into a `ZSTD_ErrorCode`, for accurate error handling.

 - `ZSTD_STATIC_LINKING_ONLY` : if this macro is defined _before_ including `zstd.h`,
                          it unlocks access to the experimental API,
--- a/extlib/zstd/_MODIFIED_ZSTD.txt
+++ b/extlib/zstd/_MODIFIED_ZSTD.txt
@ -1,9 +1,9 @@
-This copy of zstd-1.4.9 is a modified version of the original.
+This copy of zstd-1.5.0 is a modified version of the original.

-commit e4558ffd1dc49399faf4ee5d85abed4386b4dcf5
-Merge pull request #2515 from facebook/dev
+commit a488ba114ec17ea1054b9057c26a046fc122b3b6
+Zstd 1.5.0 Release

-Tag: v1.4.9
+Tag: v1.5.0

 The following changes have been made to the original:

@ -11,5 +11,5 @@ The following changes have been made to the original:

 - Added the Win32 resource file to the DLL build.

-To obtain the original zstd-1.4.8, visit:
+To obtain the original zstd-1.5.0, visit:
 https://github.com/facebook/zstd
--- a/extlib/zstd/common/bitstream.h
+++ b/extlib/zstd/common/bitstream.h
@ -1,7 +1,7 @@
 /* ******************************************************************
 * bitstream
 * Part of FSE library
- * Copyright (c) 2013-2021, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
 *
 * You can contact the author at :
 * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
--- a/extlib/zstd/common/compiler.h
+++ b/extlib/zstd/common/compiler.h
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
 * All rights reserved.
 *
 * This source code is licensed under both the BSD-style license (found in the
--- a/extlib/zstd/common/cpu.h
+++ b/extlib/zstd/common/cpu.h
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2021, Facebook, Inc.
+ * Copyright (c) Facebook, Inc.
 * All rights reserved.
 *
 * This source code is licensed under both the BSD-style license (found in the
--- a/extlib/zstd/common/debug.c
+++ b/extlib/zstd/common/debug.c
@ -1,7 +1,7 @@
 /* ******************************************************************
 * debug
 * Part of FSE library
- * Copyright (c) 2013-2021, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
 *
 * You can contact the author at :
 * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
--- a/extlib/zstd/common/debug.h
+++ b/extlib/zstd/common/debug.h
@ -1,7 +1,7 @@
 /* ******************************************************************
 * debug
 * Part of FSE library
- * Copyright (c) 2013-2021, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
 *
 * You can contact the author at :
 * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
--- a/extlib/zstd/common/entropy_common.c
+++ b/extlib/zstd/common/entropy_common.c
@ -1,6 +1,6 @@
 /* ******************************************************************
 * Common functions of New Generation Entropy library
- * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
 *
 *  You can contact the author at :
 *  - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy
--- a/extlib/zstd/common/error_private.c
+++ b/extlib/zstd/common/error_private.c
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
 * All rights reserved.
 *
 * This source code is licensed under both the BSD-style license (found in the
--- a/extlib/zstd/common/error_private.h
+++ b/extlib/zstd/common/error_private.h
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
 * All rights reserved.
 *
 * This source code is licensed under both the BSD-style license (found in the
@ -21,8 +21,8 @@ extern "C" {
 /* ****************************************
 *  Dependencies
 ******************************************/
-#include "zstd_deps.h"    /* size_t */
-#include "zstd_errors.h"  /* enum list */
+#include "../zstd_errors.h"  /* enum list */
+#include "zstd_deps.h"       /* size_t */


 /* ****************************************
--- a/extlib/zstd/common/fse.h
+++ b/extlib/zstd/common/fse.h
@ -1,7 +1,7 @@
 /* ******************************************************************
 * FSE : Finite State Entropy codec
 * Public Prototypes declaration
- * Copyright (c) 2013-2021, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
 *
 * You can contact the author at :
 * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
@ -352,7 +352,7 @@ size_t FSE_buildDTable_raw (FSE_DTable* dt, unsigned nbBits);
 size_t FSE_buildDTable_rle (FSE_DTable* dt, unsigned char symbolValue);
 /**< build a fake FSE_DTable, designed to always generate the same symbolValue */

-#define FSE_DECOMPRESS_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) (FSE_DTABLE_SIZE_U32(maxTableLog) + FSE_BUILD_DTABLE_WKSP_SIZE_U32(maxTableLog, maxSymbolValue))
+#define FSE_DECOMPRESS_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) (FSE_DTABLE_SIZE_U32(maxTableLog) + FSE_BUILD_DTABLE_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) + (FSE_MAX_SYMBOL_VALUE + 1) / 2 + 1)
 #define FSE_DECOMPRESS_WKSP_SIZE(maxTableLog, maxSymbolValue) (FSE_DECOMPRESS_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) * sizeof(unsigned))
 size_t FSE_decompress_wksp(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize);
 /**< same as FSE_decompress(), using an externally allocated `workSpace` produced with `FSE_DECOMPRESS_WKSP_SIZE_U32(maxLog, maxSymbolValue)` */
--- a/extlib/zstd/common/fse_decompress.c
+++ b/extlib/zstd/common/fse_decompress.c
@ -1,6 +1,6 @@
 /* ******************************************************************
 * FSE : Finite State Entropy decoder
- * Copyright (c) 2013-2021, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
 *
 *  You can contact the author at :
 *  - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy
@ -310,6 +310,12 @@ size_t FSE_decompress_wksp(void* dst, size_t dstCapacity, const void* cSrc, size
    return FSE_decompress_wksp_bmi2(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize, /* bmi2 */ 0);
 }

+typedef struct {
+    short ncount[FSE_MAX_SYMBOL_VALUE + 1];
+    FSE_DTable dtable[1]; /* Dynamically sized */
+} FSE_DecompressWksp;
+
+
 FORCE_INLINE_TEMPLATE size_t FSE_decompress_wksp_body(
        void* dst, size_t dstCapacity,
        const void* cSrc, size_t cSrcSize,
@ -318,33 +324,37 @@ FORCE_INLINE_TEMPLATE size_t FSE_decompress_wksp_body(
 {
    const BYTE* const istart = (const BYTE*)cSrc;
    const BYTE* ip = istart;
-    short counting[FSE_MAX_SYMBOL_VALUE+1];
    unsigned tableLog;
    unsigned maxSymbolValue = FSE_MAX_SYMBOL_VALUE;
-    FSE_DTable* const dtable = (FSE_DTable*)workSpace;
+    FSE_DecompressWksp* const wksp = (FSE_DecompressWksp*)workSpace;
+
+    DEBUG_STATIC_ASSERT((FSE_MAX_SYMBOL_VALUE + 1) % 2 == 0);
+    if (wkspSize < sizeof(*wksp)) return ERROR(GENERIC);

    /* normal FSE decoding mode */
-    size_t const NCountLength = FSE_readNCount_bmi2(counting, &maxSymbolValue, &tableLog, istart, cSrcSize, bmi2);
-    if (FSE_isError(NCountLength)) return NCountLength;
-    if (tableLog > maxLog) return ERROR(tableLog_tooLarge);
-    assert(NCountLength <= cSrcSize);
-    ip += NCountLength;
-    cSrcSize -= NCountLength;
+    {
+        size_t const NCountLength = FSE_readNCount_bmi2(wksp->ncount, &maxSymbolValue, &tableLog, istart, cSrcSize, bmi2);
+        if (FSE_isError(NCountLength)) return NCountLength;
+        if (tableLog > maxLog) return ERROR(tableLog_tooLarge);
+        assert(NCountLength <= cSrcSize);
+        ip += NCountLength;
+        cSrcSize -= NCountLength;
+    }

    if (FSE_DECOMPRESS_WKSP_SIZE(tableLog, maxSymbolValue) > wkspSize) return ERROR(tableLog_tooLarge);
-    workSpace = dtable + FSE_DTABLE_SIZE_U32(tableLog);
-    wkspSize -= FSE_DTABLE_SIZE(tableLog);
+    workSpace = wksp->dtable + FSE_DTABLE_SIZE_U32(tableLog);
+    wkspSize -= sizeof(*wksp) + FSE_DTABLE_SIZE(tableLog);

-    CHECK_F( FSE_buildDTable_internal(dtable, counting, maxSymbolValue, tableLog, workSpace, wkspSize) );
+    CHECK_F( FSE_buildDTable_internal(wksp->dtable, wksp->ncount, maxSymbolValue, tableLog, workSpace, wkspSize) );

    {
-        const void* ptr = dtable;
+        const void* ptr = wksp->dtable;
        const FSE_DTableHeader* DTableH = (const FSE_DTableHeader*)ptr;
        const U32 fastMode = DTableH->fastMode;

        /* select fast mode (static) */
-        if (fastMode) return FSE_decompress_usingDTable_generic(dst, dstCapacity, ip, cSrcSize, dtable, 1);
-        return FSE_decompress_usingDTable_generic(dst, dstCapacity, ip, cSrcSize, dtable, 0);
+        if (fastMode) return FSE_decompress_usingDTable_generic(dst, dstCapacity, ip, cSrcSize, wksp->dtable, 1);
+        return FSE_decompress_usingDTable_generic(dst, dstCapacity, ip, cSrcSize, wksp->dtable, 0);
    }
 }

--- a/extlib/zstd/common/huf.h
+++ b/extlib/zstd/common/huf.h
@ -1,7 +1,7 @@
 /* ******************************************************************
 * huff0 huffman codec,
 * part of Finite State Entropy library
- * Copyright (c) 2013-2021, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
 *
 * You can contact the author at :
 * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
@ -192,6 +192,7 @@ size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
 unsigned HUF_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue);
 size_t HUF_buildCTable (HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue, unsigned maxNbBits);   /* @return : maxNbBits; CTable and count can overlap. In which case, CTable will overwrite count content */
 size_t HUF_writeCTable (void* dst, size_t maxDstSize, const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog);
+size_t HUF_writeCTable_wksp(void* dst, size_t maxDstSize, const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog, void* workspace, size_t workspaceSize);
 size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable);
 size_t HUF_estimateCompressedSize(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue);
 int HUF_validateCTable(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue);
@ -278,7 +279,7 @@ U32 HUF_selectDecoder (size_t dstSize, size_t cSrcSize);
 *  a required workspace size greater than that specified in the following
 *  macro.
 */
-#define HUF_DECOMPRESS_WORKSPACE_SIZE (2 << 10)
+#define HUF_DECOMPRESS_WORKSPACE_SIZE ((2 << 10) + (1 << 9))
 #define HUF_DECOMPRESS_WORKSPACE_SIZE_U32 (HUF_DECOMPRESS_WORKSPACE_SIZE / sizeof(U32))

 #ifndef HUF_FORCE_DECOMPRESS_X2
--- a/extlib/zstd/common/mem.h
+++ b/extlib/zstd/common/mem.h
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
 * All rights reserved.
 *
 * This source code is licensed under both the BSD-style license (found in the
@ -143,9 +143,7 @@ MEM_STATIC size_t MEM_swapST(size_t in);
 * Prefer these methods in priority order (0 > 1 > 2)
 */
 #ifndef MEM_FORCE_MEMORY_ACCESS   /* can be defined externally, on command line for example */
-#  if defined(__GNUC__) && ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) )
-#    define MEM_FORCE_MEMORY_ACCESS 2
-#  elif defined(__INTEL_COMPILER) || defined(__GNUC__) || defined(__ICCARM__)
+#  if defined(__INTEL_COMPILER) || defined(__GNUC__) || defined(__ICCARM__)
 #    define MEM_FORCE_MEMORY_ACCESS 1
 #  endif
 #endif
@ -308,7 +306,7 @@ MEM_STATIC void MEM_writeLE16(void* memPtr, U16 val)

 MEM_STATIC U32 MEM_readLE24(const void* memPtr)
 {
-    return MEM_readLE16(memPtr) + (((const BYTE*)memPtr)[2] << 16);
+    return (U32)MEM_readLE16(memPtr) + ((U32)(((const BYTE*)memPtr)[2]) << 16);
 }

 MEM_STATIC void MEM_writeLE24(void* memPtr, U32 val)
--- a/extlib/zstd/common/pool.c
+++ b/extlib/zstd/common/pool.c
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
 * All rights reserved.
 *
 * This source code is licensed under both the BSD-style license (found in the
--- a/extlib/zstd/common/pool.h
+++ b/extlib/zstd/common/pool.h
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
 * All rights reserved.
 *
 * This source code is licensed under both the BSD-style license (found in the
--- a/extlib/zstd/common/xxhash.c
+++ b/extlib/zstd/common/xxhash.c
@ -1,6 +1,6 @@
 /*
 *  xxHash - Fast Hash algorithm
- *  Copyright (c) 2012-2021, Yann Collet, Facebook, Inc.
+ *  Copyright (c) Yann Collet, Facebook, Inc.
 *
 *  You can contact the author at :
 *  - xxHash homepage: http://www.xxhash.com
@ -30,9 +30,7 @@
 * Prefer these methods in priority order (0 > 1 > 2)
 */
 #ifndef XXH_FORCE_MEMORY_ACCESS   /* can be defined externally, on command line for example */
-#  if defined(__GNUC__) && ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) )
-#    define XXH_FORCE_MEMORY_ACCESS 2
-#  elif (defined(__INTEL_COMPILER) && !defined(WIN32)) || \
+#  if (defined(__INTEL_COMPILER) && !defined(WIN32)) || \
  (defined(__GNUC__) && ( defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7S__) )) || \
  defined(__ICCARM__)
 #    define XXH_FORCE_MEMORY_ACCESS 1
--- a/extlib/zstd/common/xxhash.h
+++ b/extlib/zstd/common/xxhash.h
@ -1,7 +1,7 @@
 /*
 * xxHash - Extremely Fast Hash algorithm
 * Header File
- * Copyright (c) 2012-2021, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
 *
 * You can contact the author at :
 * - xxHash source repository : https://github.com/Cyan4973/xxHash
--- a/extlib/zstd/common/zstd_common.c
+++ b/extlib/zstd/common/zstd_common.c
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
 * All rights reserved.
 *
 * This source code is licensed under both the BSD-style license (found in the
--- a/extlib/zstd/common/zstd_deps.h
+++ b/extlib/zstd/common/zstd_deps.h
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2021, Facebook, Inc.
+ * Copyright (c) Facebook, Inc.
 * All rights reserved.
 *
 * This source code is licensed under both the BSD-style license (found in the
--- a/extlib/zstd/common/zstd_internal.h
+++ b/extlib/zstd/common/zstd_internal.h
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
 * All rights reserved.
 *
 * This source code is licensed under both the BSD-style license (found in the
@ -36,6 +36,11 @@
 #  define XXH_STATIC_LINKING_ONLY  /* XXH64_state_t */
 #endif
 #include "xxhash.h"                /* XXH_reset, update, digest */
+#ifndef ZSTD_NO_TRACE
+#  include "zstd_trace.h"
+#else
+#  define ZSTD_TRACE 0
+#endif

 #if defined (__cplusplus)
 extern "C" {
@ -347,11 +352,18 @@ typedef enum {
 *  Private declarations
 *********************************************/
 typedef struct seqDef_s {
-    U32 offset;         /* Offset code of the sequence */
+    U32 offset;         /* offset == rawOffset + ZSTD_REP_NUM, or equivalently, offCode + 1 */
    U16 litLength;
    U16 matchLength;
 } seqDef;

+/* Controls whether seqStore has a single "long" litLength or matchLength. See seqStore_t. */
+typedef enum {
+    ZSTD_llt_none = 0,             /* no longLengthType */
+    ZSTD_llt_literalLength = 1,    /* represents a long literal */
+    ZSTD_llt_matchLength = 2       /* represents a long match */
+} ZSTD_longLengthType_e;
+
 typedef struct {
    seqDef* sequencesStart;
    seqDef* sequences;      /* ptr to end of sequences */
@ -363,12 +375,12 @@ typedef struct {
    size_t maxNbSeq;
    size_t maxNbLit;

-    /* longLengthPos and longLengthID to allow us to represent either a single litLength or matchLength
+    /* longLengthPos and longLengthType to allow us to represent either a single litLength or matchLength
     * in the seqStore that has a value larger than U16 (if it exists). To do so, we increment
-     * the existing value of the litLength or matchLength by 0x10000. 
+     * the existing value of the litLength or matchLength by 0x10000.
     */
-    U32   longLengthID;   /* 0 == no longLength; 1 == Represent the long literal; 2 == Represent the long match; */
-    U32   longLengthPos;  /* Index of the sequence to apply long length modification to */
+    ZSTD_longLengthType_e   longLengthType;
+    U32                     longLengthPos;  /* Index of the sequence to apply long length modification to */
 } seqStore_t;

 typedef struct {
@ -378,7 +390,7 @@ typedef struct {

 /**
 * Returns the ZSTD_sequenceLength for the given sequences. It handles the decoding of long sequences
- * indicated by longLengthPos and longLengthID, and adds MINMATCH back to matchLength.
+ * indicated by longLengthPos and longLengthType, and adds MINMATCH back to matchLength.
 */
 MEM_STATIC ZSTD_sequenceLength ZSTD_getSequenceLength(seqStore_t const* seqStore, seqDef const* seq)
 {
@ -386,10 +398,10 @@ MEM_STATIC ZSTD_sequenceLength ZSTD_getSequenceLength(seqStore_t const* seqStore
    seqLen.litLength = seq->litLength;
    seqLen.matchLength = seq->matchLength + MINMATCH;
    if (seqStore->longLengthPos == (U32)(seq - seqStore->sequencesStart)) {
-        if (seqStore->longLengthID == 1) {
+        if (seqStore->longLengthType == ZSTD_llt_literalLength) {
            seqLen.litLength += 0xFFFF;
        }
-        if (seqStore->longLengthID == 2) {
+        if (seqStore->longLengthType == ZSTD_llt_matchLength) {
            seqLen.matchLength += 0xFFFF;
        }
    }
--- a/extlib/zstd/common/zstd_trace.c
+++ b/extlib/zstd/common/zstd_trace.c
@ -1,42 +0,0 @@
-/*
- * Copyright (c) 2016-2021, Facebook, Inc.
- * All rights reserved.
- *
- * This source code is licensed under both the BSD-style license (found in the
- * LICENSE file in the root directory of this source tree) and the GPLv2 (found
- * in the COPYING file in the root directory of this source tree).
- * You may select, at your option, one of the above-listed licenses.
- */
-
-#include "zstd_trace.h"
-#include "../zstd.h"
-
-#include "compiler.h"
-
-#if ZSTD_TRACE && ZSTD_HAVE_WEAK_SYMBOLS
-
-ZSTD_WEAK_ATTR ZSTD_TraceCtx ZSTD_trace_compress_begin(ZSTD_CCtx const* cctx)
-{
-    (void)cctx;
-    return 0;
-}
-
-ZSTD_WEAK_ATTR void ZSTD_trace_compress_end(ZSTD_TraceCtx ctx, ZSTD_Trace const* trace)
-{
-    (void)ctx;
-    (void)trace;
-}
-
-ZSTD_WEAK_ATTR ZSTD_TraceCtx ZSTD_trace_decompress_begin(ZSTD_DCtx const* dctx)
-{
-    (void)dctx;
-    return 0;
-}
-
-ZSTD_WEAK_ATTR void ZSTD_trace_decompress_end(ZSTD_TraceCtx ctx, ZSTD_Trace const* trace)
-{
-    (void)ctx;
-    (void)trace;
-}
-
-#endif
--- a/extlib/zstd/common/zstd_trace.h
+++ b/extlib/zstd/common/zstd_trace.h
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2021, Facebook, Inc.
+ * Copyright (c) Facebook, Inc.
 * All rights reserved.
 *
 * This source code is licensed under both the BSD-style license (found in the
@ -114,14 +114,15 @@ typedef unsigned long long ZSTD_TraceCtx;
 * @returns Non-zero if tracing is enabled. The return value is
 *          passed to ZSTD_trace_compress_end().
 */
-ZSTD_TraceCtx ZSTD_trace_compress_begin(struct ZSTD_CCtx_s const* cctx);
+ZSTD_WEAK_ATTR ZSTD_TraceCtx ZSTD_trace_compress_begin(
+    struct ZSTD_CCtx_s const* cctx);

 /**
 * Trace the end of a compression call.
 * @param ctx The return value of ZSTD_trace_compress_begin().
 * @param trace The zstd tracing info.
 */
-void ZSTD_trace_compress_end(
+ZSTD_WEAK_ATTR void ZSTD_trace_compress_end(
    ZSTD_TraceCtx ctx,
    ZSTD_Trace const* trace);

@ -132,14 +133,15 @@ void ZSTD_trace_compress_end(
 * @returns Non-zero if tracing is enabled. The return value is
 *          passed to ZSTD_trace_compress_end().
 */
-ZSTD_TraceCtx ZSTD_trace_decompress_begin(struct ZSTD_DCtx_s const* dctx);
+ZSTD_WEAK_ATTR ZSTD_TraceCtx ZSTD_trace_decompress_begin(
+    struct ZSTD_DCtx_s const* dctx);

 /**
 * Trace the end of a decompression call.
 * @param ctx The return value of ZSTD_trace_decompress_begin().
 * @param trace The zstd tracing info.
 */
-void ZSTD_trace_decompress_end(
+ZSTD_WEAK_ATTR void ZSTD_trace_decompress_end(
    ZSTD_TraceCtx ctx,
    ZSTD_Trace const* trace);

--- a/extlib/zstd/compress/fse_compress.c
+++ b/extlib/zstd/compress/fse_compress.c
@ -1,6 +1,6 @@
 /* ******************************************************************
 * FSE : Finite State Entropy encoder
- * Copyright (c) 2013-2021, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
 *
 *  You can contact the author at :
 *  - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy
--- a/extlib/zstd/compress/hist.c
+++ b/extlib/zstd/compress/hist.c
@ -1,7 +1,7 @@
 /* ******************************************************************
 * hist : Histogram functions
 * part of Finite State Entropy project
- * Copyright (c) 2013-2021, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
 *
 *  You can contact the author at :
 *  - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy
--- a/extlib/zstd/compress/hist.h
+++ b/extlib/zstd/compress/hist.h
@ -1,7 +1,7 @@
 /* ******************************************************************
 * hist : Histogram functions
 * part of Finite State Entropy project
- * Copyright (c) 2013-2021, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
 *
 *  You can contact the author at :
 *  - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy
--- a/extlib/zstd/compress/huf_compress.c
+++ b/extlib/zstd/compress/huf_compress.c
@ -1,6 +1,6 @@
 /* ******************************************************************
 * Huffman encoder, part of New Generation Entropy library
- * Copyright (c) 2013-2021, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
 *
 *  You can contact the author at :
 *  - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy
@ -59,7 +59,15 @@ unsigned HUF_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxS
 * Note : all elements within weightTable are supposed to be <= HUF_TABLELOG_MAX.
 */
 #define MAX_FSE_TABLELOG_FOR_HUFF_HEADER 6
-static size_t HUF_compressWeights (void* dst, size_t dstSize, const void* weightTable, size_t wtSize)
+
+typedef struct {
+    FSE_CTable CTable[FSE_CTABLE_SIZE_U32(MAX_FSE_TABLELOG_FOR_HUFF_HEADER, HUF_TABLELOG_MAX)];
+    U32 scratchBuffer[FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(HUF_TABLELOG_MAX, MAX_FSE_TABLELOG_FOR_HUFF_HEADER)];
+    unsigned count[HUF_TABLELOG_MAX+1];
+    S16 norm[HUF_TABLELOG_MAX+1];
+} HUF_CompressWeightsWksp;
+
+static size_t HUF_compressWeights(void* dst, size_t dstSize, const void* weightTable, size_t wtSize, void* workspace, size_t workspaceSize)
 {
    BYTE* const ostart = (BYTE*) dst;
    BYTE* op = ostart;
@ -67,33 +75,30 @@ static size_t HUF_compressWeights (void* dst, size_t dstSize, const void* weight

    unsigned maxSymbolValue = HUF_TABLELOG_MAX;
    U32 tableLog = MAX_FSE_TABLELOG_FOR_HUFF_HEADER;
+    HUF_CompressWeightsWksp* wksp = (HUF_CompressWeightsWksp*)workspace;

-    FSE_CTable CTable[FSE_CTABLE_SIZE_U32(MAX_FSE_TABLELOG_FOR_HUFF_HEADER, HUF_TABLELOG_MAX)];
-    U32 scratchBuffer[FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(HUF_TABLELOG_MAX, MAX_FSE_TABLELOG_FOR_HUFF_HEADER)];
-
-    unsigned count[HUF_TABLELOG_MAX+1];
-    S16 norm[HUF_TABLELOG_MAX+1];
+    if (workspaceSize < sizeof(HUF_CompressWeightsWksp)) return ERROR(GENERIC);

    /* init conditions */
    if (wtSize <= 1) return 0;  /* Not compressible */

    /* Scan input and build symbol stats */
-    {   unsigned const maxCount = HIST_count_simple(count, &maxSymbolValue, weightTable, wtSize);   /* never fails */
+    {   unsigned const maxCount = HIST_count_simple(wksp->count, &maxSymbolValue, weightTable, wtSize);   /* never fails */
        if (maxCount == wtSize) return 1;   /* only a single symbol in src : rle */
        if (maxCount == 1) return 0;        /* each symbol present maximum once => not compressible */
    }

    tableLog = FSE_optimalTableLog(tableLog, wtSize, maxSymbolValue);
-    CHECK_F( FSE_normalizeCount(norm, tableLog, count, wtSize, maxSymbolValue, /* useLowProbCount */ 0) );
+    CHECK_F( FSE_normalizeCount(wksp->norm, tableLog, wksp->count, wtSize, maxSymbolValue, /* useLowProbCount */ 0) );

    /* Write table description header */
-    {   CHECK_V_F(hSize, FSE_writeNCount(op, (size_t)(oend-op), norm, maxSymbolValue, tableLog) );
+    {   CHECK_V_F(hSize, FSE_writeNCount(op, (size_t)(oend-op), wksp->norm, maxSymbolValue, tableLog) );
        op += hSize;
    }

    /* Compress */
-    CHECK_F( FSE_buildCTable_wksp(CTable, norm, maxSymbolValue, tableLog, scratchBuffer, sizeof(scratchBuffer)) );
-    {   CHECK_V_F(cSize, FSE_compress_usingCTable(op, (size_t)(oend - op), weightTable, wtSize, CTable) );
+    CHECK_F( FSE_buildCTable_wksp(wksp->CTable, wksp->norm, maxSymbolValue, tableLog, wksp->scratchBuffer, sizeof(wksp->scratchBuffer)) );
+    {   CHECK_V_F(cSize, FSE_compress_usingCTable(op, (size_t)(oend - op), weightTable, wtSize, wksp->CTable) );
        if (cSize == 0) return 0;   /* not enough space for compressed data */
        op += cSize;
    }
@ -102,29 +107,33 @@ static size_t HUF_compressWeights (void* dst, size_t dstSize, const void* weight
 }


-/*! HUF_writeCTable() :
-    `CTable` : Huffman tree to save, using huf representation.
-    @return : size of saved CTable */
-size_t HUF_writeCTable (void* dst, size_t maxDstSize,
-                        const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog)
-{
+typedef struct {
+    HUF_CompressWeightsWksp wksp;
    BYTE bitsToWeight[HUF_TABLELOG_MAX + 1];   /* precomputed conversion table */
    BYTE huffWeight[HUF_SYMBOLVALUE_MAX];
+} HUF_WriteCTableWksp;
+
+size_t HUF_writeCTable_wksp(void* dst, size_t maxDstSize,
+                            const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog,
+                            void* workspace, size_t workspaceSize)
+{
    BYTE* op = (BYTE*)dst;
    U32 n;
+    HUF_WriteCTableWksp* wksp = (HUF_WriteCTableWksp*)workspace;

-     /* check conditions */
+    /* check conditions */
+    if (workspaceSize < sizeof(HUF_WriteCTableWksp)) return ERROR(GENERIC);
    if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) return ERROR(maxSymbolValue_tooLarge);

    /* convert to weight */
-    bitsToWeight[0] = 0;
+    wksp->bitsToWeight[0] = 0;
    for (n=1; n<huffLog+1; n++)
-        bitsToWeight[n] = (BYTE)(huffLog + 1 - n);
+        wksp->bitsToWeight[n] = (BYTE)(huffLog + 1 - n);
    for (n=0; n<maxSymbolValue; n++)
-        huffWeight[n] = bitsToWeight[CTable[n].nbBits];
+        wksp->huffWeight[n] = wksp->bitsToWeight[CTable[n].nbBits];

    /* attempt weights compression by FSE */
-    {   CHECK_V_F(hSize, HUF_compressWeights(op+1, maxDstSize-1, huffWeight, maxSymbolValue) );
+    {   CHECK_V_F(hSize, HUF_compressWeights(op+1, maxDstSize-1, wksp->huffWeight, maxSymbolValue, &wksp->wksp, sizeof(wksp->wksp)) );
        if ((hSize>1) & (hSize < maxSymbolValue/2)) {   /* FSE compressed */
            op[0] = (BYTE)hSize;
            return hSize+1;
@ -134,12 +143,22 @@ size_t HUF_writeCTable (void* dst, size_t maxDstSize,
    if (maxSymbolValue > (256-128)) return ERROR(GENERIC);   /* should not happen : likely means source cannot be compressed */
    if (((maxSymbolValue+1)/2) + 1 > maxDstSize) return ERROR(dstSize_tooSmall);   /* not enough space within dst buffer */
    op[0] = (BYTE)(128 /*special case*/ + (maxSymbolValue-1));
-    huffWeight[maxSymbolValue] = 0;   /* to be sure it doesn't cause msan issue in final combination */
+    wksp->huffWeight[maxSymbolValue] = 0;   /* to be sure it doesn't cause msan issue in final combination */
    for (n=0; n<maxSymbolValue; n+=2)
-        op[(n/2)+1] = (BYTE)((huffWeight[n] << 4) + huffWeight[n+1]);
+        op[(n/2)+1] = (BYTE)((wksp->huffWeight[n] << 4) + wksp->huffWeight[n+1]);
    return ((maxSymbolValue+1)/2) + 1;
 }

+/*! HUF_writeCTable() :
+    `CTable` : Huffman tree to save, using huf representation.
+    @return : size of saved CTable */
+size_t HUF_writeCTable (void* dst, size_t maxDstSize,
+                        const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog)
+{
+    HUF_WriteCTableWksp wksp;
+    return HUF_writeCTable_wksp(dst, maxDstSize, CTable, maxSymbolValue, huffLog, &wksp, sizeof(wksp));
+}
+

 size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize, unsigned* hasZeroWeights)
 {
@ -732,7 +751,10 @@ static size_t HUF_compressCTable_internal(
 typedef struct {
    unsigned count[HUF_SYMBOLVALUE_MAX + 1];
    HUF_CElt CTable[HUF_SYMBOLVALUE_MAX + 1];
-    HUF_buildCTable_wksp_tables buildCTable_wksp;
+    union {
+        HUF_buildCTable_wksp_tables buildCTable_wksp;
+        HUF_WriteCTableWksp writeCTable_wksp;
+    } wksps;
 } HUF_compress_tables_t;

 /* HUF_compress_internal() :
@ -795,7 +817,7 @@ HUF_compress_internal (void* dst, size_t dstSize,
    huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue);
    {   size_t const maxBits = HUF_buildCTable_wksp(table->CTable, table->count,
                                            maxSymbolValue, huffLog,
-                                            &table->buildCTable_wksp, sizeof(table->buildCTable_wksp));
+                                            &table->wksps.buildCTable_wksp, sizeof(table->wksps.buildCTable_wksp));
        CHECK_F(maxBits);
        huffLog = (U32)maxBits;
        /* Zero unused symbols in CTable, so we can check it for validity */
@ -804,7 +826,8 @@ HUF_compress_internal (void* dst, size_t dstSize,
    }

    /* Write table description header */
-    {   CHECK_V_F(hSize, HUF_writeCTable (op, dstSize, table->CTable, maxSymbolValue, huffLog) );
+    {   CHECK_V_F(hSize, HUF_writeCTable_wksp(op, dstSize, table->CTable, maxSymbolValue, huffLog,
+                                              &table->wksps.writeCTable_wksp, sizeof(table->wksps.writeCTable_wksp)) );
        /* Check if using previous huffman table is beneficial */
        if (repeat && *repeat != HUF_repeat_none) {
            size_t const oldSize = HUF_estimateCompressedSize(oldHufTable, table->count, maxSymbolValue);
--- a/extlib/zstd/compress/zstd_compress.c
+++ b/extlib/zstd/compress/zstd_compress.c
--- a/extlib/zstd/compress/zstd_compress_internal.h
+++ b/extlib/zstd/compress/zstd_compress_internal.h
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
 * All rights reserved.
 *
 * This source code is licensed under both the BSD-style license (found in the
@ -19,7 +19,6 @@
 *  Dependencies
 ***************************************/
 #include "../common/zstd_internal.h"
-#include "../common/zstd_trace.h"  /* ZSTD_TraceCtx */
 #include "zstd_cwksp.h"
 #ifdef ZSTD_MULTITHREAD
 #  include "zstdmt_compress.h"
@ -82,6 +81,53 @@ typedef struct {
    ZSTD_fseCTables_t fse;
 } ZSTD_entropyCTables_t;

+/***********************************************
+*  Entropy buffer statistics structs and funcs *
+***********************************************/
+/** ZSTD_hufCTablesMetadata_t :
+ *  Stores Literals Block Type for a super-block in hType, and
+ *  huffman tree description in hufDesBuffer.
+ *  hufDesSize refers to the size of huffman tree description in bytes.
+ *  This metadata is populated in ZSTD_buildBlockEntropyStats_literals() */
+typedef struct {
+    symbolEncodingType_e hType;
+    BYTE hufDesBuffer[ZSTD_MAX_HUF_HEADER_SIZE];
+    size_t hufDesSize;
+} ZSTD_hufCTablesMetadata_t;
+
+/** ZSTD_fseCTablesMetadata_t :
+ *  Stores symbol compression modes for a super-block in {ll, ol, ml}Type, and
+ *  fse tables in fseTablesBuffer.
+ *  fseTablesSize refers to the size of fse tables in bytes.
+ *  This metadata is populated in ZSTD_buildBlockEntropyStats_sequences() */
+typedef struct {
+    symbolEncodingType_e llType;
+    symbolEncodingType_e ofType;
+    symbolEncodingType_e mlType;
+    BYTE fseTablesBuffer[ZSTD_MAX_FSE_HEADERS_SIZE];
+    size_t fseTablesSize;
+    size_t lastCountSize; /* This is to account for bug in 1.3.4. More detail in ZSTD_entropyCompressSeqStore_internal() */
+} ZSTD_fseCTablesMetadata_t;
+
+typedef struct {
+    ZSTD_hufCTablesMetadata_t hufMetadata;
+    ZSTD_fseCTablesMetadata_t fseMetadata;
+} ZSTD_entropyCTablesMetadata_t;
+
+/** ZSTD_buildBlockEntropyStats() :
+ *  Builds entropy for the block.
+ *  @return : 0 on success or error code */
+size_t ZSTD_buildBlockEntropyStats(seqStore_t* seqStorePtr,
+                             const ZSTD_entropyCTables_t* prevEntropy,
+                                   ZSTD_entropyCTables_t* nextEntropy,
+                             const ZSTD_CCtx_params* cctxParams,
+                                   ZSTD_entropyCTablesMetadata_t* entropyMetadata,
+                                   void* workspace, size_t wkspSize);
+
+/*********************************
+*  Compression internals structs *
+*********************************/
+
 typedef struct {
    U32 off;            /* Offset code (offset + ZSTD_REP_MOVE) for the match */
    U32 len;            /* Raw length of match */
@ -142,14 +188,21 @@ typedef struct {
 } ZSTD_compressedBlockState_t;

 typedef struct {
-    BYTE const* nextSrc;    /* next block here to continue on current prefix */
-    BYTE const* base;       /* All regular indexes relative to this position */
-    BYTE const* dictBase;   /* extDict indexes relative to this position */
-    U32 dictLimit;          /* below that point, need extDict */
-    U32 lowLimit;           /* below that point, no more valid data */
+    BYTE const* nextSrc;       /* next block here to continue on current prefix */
+    BYTE const* base;          /* All regular indexes relative to this position */
+    BYTE const* dictBase;      /* extDict indexes relative to this position */
+    U32 dictLimit;             /* below that point, need extDict */
+    U32 lowLimit;              /* below that point, no more valid data */
+    U32 nbOverflowCorrections; /* Number of times overflow correction has run since
+                                * ZSTD_window_init(). Useful for debugging coredumps
+                                * and for ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY.
+                                */
 } ZSTD_window_t;

 typedef struct ZSTD_matchState_t ZSTD_matchState_t;
+
+#define ZSTD_ROW_HASH_CACHE_SIZE 8       /* Size of prefetching hash cache for row-based matchfinder */
+
 struct ZSTD_matchState_t {
    ZSTD_window_t window;   /* State for window round buffer management */
    U32 loadedDictEnd;      /* index of end of dictionary, within context's referential.
@ -161,9 +214,17 @@ struct ZSTD_matchState_t {
                             */
    U32 nextToUpdate;       /* index from which to continue table update */
    U32 hashLog3;           /* dispatch table for matches of len==3 : larger == faster, more memory */
+
+    U32 rowHashLog;                          /* For row-based matchfinder: Hashlog based on nb of rows in the hashTable.*/
+    U16* tagTable;                           /* For row-based matchFinder: A row-based table containing the hashes and head index. */
+    U32 hashCache[ZSTD_ROW_HASH_CACHE_SIZE]; /* For row-based matchFinder: a cache of hashes to improve speed */
+
    U32* hashTable;
    U32* hashTable3;
    U32* chainTable;
+
+    U32 forceNonContiguous; /* Non-zero if we should force non-contiguous load for the next window update. */
+
    int dedicatedDictSearch;  /* Indicates whether this matchState is using the
                               * dedicated dictionary search structure.
                               */
@ -256,6 +317,15 @@ struct ZSTD_CCtx_params_s {
    ZSTD_sequenceFormat_e blockDelimiters;
    int validateSequences;

+    /* Block splitting */
+    int splitBlocks;
+
+    /* Param for deciding whether to use row-based matchfinder */
+    ZSTD_useRowMatchFinderMode_e useRowMatchFinder;
+
+    /* Always load a dictionary in ext-dict mode (not prefix mode)? */
+    int deterministicRefPrefix;
+
    /* Internal use, for createCCtxParams() and freeCCtxParams() only */
    ZSTD_customMem customMem;
 };  /* typedef'd to ZSTD_CCtx_params within "zstd.h" */
@ -279,6 +349,7 @@ struct ZSTD_CCtx_s {
    int bmi2;                            /* == 1 if the CPU supports BMI2 and 0 otherwise. CPU support is determined dynamically once per context lifetime. */
    ZSTD_CCtx_params requestedParams;
    ZSTD_CCtx_params appliedParams;
+    ZSTD_CCtx_params simpleApiParams;    /* Param storage used by the simple API - not sticky. Must only be used in top-level simple API functions for storage. */
    U32   dictID;
    size_t dictContentSize;

@ -371,7 +442,7 @@ typedef enum {
 typedef size_t (*ZSTD_blockCompressor) (
        ZSTD_matchState_t* bs, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
        void const* src, size_t srcSize);
-ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, ZSTD_dictMode_e dictMode);
+ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, ZSTD_useRowMatchFinderMode_e rowMatchfinderMode, ZSTD_dictMode_e dictMode);


 MEM_STATIC U32 ZSTD_LLcode(U32 litLength)
@ -548,8 +619,8 @@ void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const BYTE* litera

    /* literal Length */
    if (litLength>0xFFFF) {
-        assert(seqStorePtr->longLengthID == 0); /* there can only be a single long length */
-        seqStorePtr->longLengthID = 1;
+        assert(seqStorePtr->longLengthType == ZSTD_llt_none); /* there can only be a single long length */
+        seqStorePtr->longLengthType = ZSTD_llt_literalLength;
        seqStorePtr->longLengthPos = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart);
    }
    seqStorePtr->sequences[0].litLength = (U16)litLength;
@ -559,8 +630,8 @@ void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const BYTE* litera

    /* match Length */
    if (mlBase>0xFFFF) {
-        assert(seqStorePtr->longLengthID == 0); /* there can only be a single long length */
-        seqStorePtr->longLengthID = 2;
+        assert(seqStorePtr->longLengthType == ZSTD_llt_none); /* there can only be a single long length */
+        seqStorePtr->longLengthType = ZSTD_llt_matchLength;
        seqStorePtr->longLengthPos = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart);
    }
    seqStorePtr->sequences[0].matchLength = (U16)mlBase;
@ -811,6 +882,13 @@ MEM_STATIC void ZSTD_window_clear(ZSTD_window_t* window)
    window->dictLimit = end;
 }

+MEM_STATIC U32 ZSTD_window_isEmpty(ZSTD_window_t const window)
+{
+    return window.dictLimit == 1 &&
+           window.lowLimit == 1 &&
+           (window.nextSrc - window.base) == 1;
+}
+
 /**
 * ZSTD_window_hasExtDict():
 * Returns non-zero if the window has a non-empty extDict.
@ -834,15 +912,69 @@ MEM_STATIC ZSTD_dictMode_e ZSTD_matchState_dictMode(const ZSTD_matchState_t *ms)
            ZSTD_noDict;
 }

+/* Defining this macro to non-zero tells zstd to run the overflow correction
+ * code much more frequently. This is very inefficient, and should only be
+ * used for tests and fuzzers.
+ */
+#ifndef ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY
+#  ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
+#    define ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY 1
+#  else
+#    define ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY 0
+#  endif
+#endif
+
+/**
+ * ZSTD_window_canOverflowCorrect():
+ * Returns non-zero if the indices are large enough for overflow correction
+ * to work correctly without impacting compression ratio.
+ */
+MEM_STATIC U32 ZSTD_window_canOverflowCorrect(ZSTD_window_t const window,
+                                              U32 cycleLog,
+                                              U32 maxDist,
+                                              U32 loadedDictEnd,
+                                              void const* src)
+{
+    U32 const cycleSize = 1u << cycleLog;
+    U32 const curr = (U32)((BYTE const*)src - window.base);
+    U32 const minIndexToOverflowCorrect = cycleSize + MAX(maxDist, cycleSize);
+
+    /* Adjust the min index to backoff the overflow correction frequency,
+     * so we don't waste too much CPU in overflow correction. If this
+     * computation overflows we don't really care, we just need to make
+     * sure it is at least minIndexToOverflowCorrect.
+     */
+    U32 const adjustment = window.nbOverflowCorrections + 1;
+    U32 const adjustedIndex = MAX(minIndexToOverflowCorrect * adjustment,
+                                  minIndexToOverflowCorrect);
+    U32 const indexLargeEnough = curr > adjustedIndex;
+
+    /* Only overflow correct early if the dictionary is invalidated already,
+     * so we don't hurt compression ratio.
+     */
+    U32 const dictionaryInvalidated = curr > maxDist + loadedDictEnd;
+
+    return indexLargeEnough && dictionaryInvalidated;
+}
+
 /**
 * ZSTD_window_needOverflowCorrection():
 * Returns non-zero if the indices are getting too large and need overflow
 * protection.
 */
 MEM_STATIC U32 ZSTD_window_needOverflowCorrection(ZSTD_window_t const window,
+                                                  U32 cycleLog,
+                                                  U32 maxDist,
+                                                  U32 loadedDictEnd,
+                                                  void const* src,
                                                  void const* srcEnd)
 {
    U32 const curr = (U32)((BYTE const*)srcEnd - window.base);
+    if (ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY) {
+        if (ZSTD_window_canOverflowCorrect(window, cycleLog, maxDist, loadedDictEnd, src)) {
+            return 1;
+        }
+    }
    return curr > ZSTD_CURRENT_MAX;
 }

@ -854,7 +986,6 @@ MEM_STATIC U32 ZSTD_window_needOverflowCorrection(ZSTD_window_t const window,
 *
 * The least significant cycleLog bits of the indices must remain the same,
 * which may be 0. Every index up to maxDist in the past must be valid.
- * NOTE: (maxDist & cycleMask) must be zero.
 */
 MEM_STATIC U32 ZSTD_window_correctOverflow(ZSTD_window_t* window, U32 cycleLog,
                                           U32 maxDist, void const* src)
@ -878,17 +1009,25 @@ MEM_STATIC U32 ZSTD_window_correctOverflow(ZSTD_window_t* window, U32 cycleLog,
     * 3. (cctx->lowLimit + 1<<windowLog) < 1<<32:
     *    windowLog <= 31 ==> 3<<29 + 1<<windowLog < 7<<29 < 1<<32.
     */
-    U32 const cycleMask = (1U << cycleLog) - 1;
+    U32 const cycleSize = 1u << cycleLog;
+    U32 const cycleMask = cycleSize - 1;
    U32 const curr = (U32)((BYTE const*)src - window->base);
    U32 const currentCycle0 = curr & cycleMask;
    /* Exclude zero so that newCurrent - maxDist >= 1. */
-    U32 const currentCycle1 = currentCycle0 == 0 ? (1U << cycleLog) : currentCycle0;
-    U32 const newCurrent = currentCycle1 + maxDist;
+    U32 const currentCycle1 = currentCycle0 == 0 ? cycleSize : currentCycle0;
+    U32 const newCurrent = currentCycle1 + MAX(maxDist, cycleSize);
    U32 const correction = curr - newCurrent;
-    assert((maxDist & cycleMask) == 0);
+    /* maxDist must be a power of two so that:
+     *   (newCurrent & cycleMask) == (curr & cycleMask)
+     * This is required to not corrupt the chains / binary tree.
+     */
+    assert((maxDist & (maxDist - 1)) == 0);
+    assert((curr & cycleMask) == (newCurrent & cycleMask));
    assert(curr > newCurrent);
-    /* Loose bound, should be around 1<<29 (see above) */
-    assert(correction > 1<<28);
+    if (!ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY) {
+        /* Loose bound, should be around 1<<29 (see above) */
+        assert(correction > 1<<28);
+    }

    window->base += correction;
    window->dictBase += correction;
@ -904,6 +1043,8 @@ MEM_STATIC U32 ZSTD_window_correctOverflow(ZSTD_window_t* window, U32 cycleLog,
    assert(window->lowLimit <= newCurrent);
    assert(window->dictLimit <= newCurrent);

+    ++window->nbOverflowCorrections;
+
    DEBUGLOG(4, "Correction of 0x%x bytes to lowLimit=0x%x", correction,
             window->lowLimit);
    return correction;
@ -1013,6 +1154,7 @@ MEM_STATIC void ZSTD_window_init(ZSTD_window_t* window) {
    window->dictLimit = 1;    /* start from 1, so that 1st position is valid */
    window->lowLimit = 1;     /* it ensures first and later CCtx usages compress the same */
    window->nextSrc = window->base + 1;   /* see issue #1241 */
+    window->nbOverflowCorrections = 0;
 }

 /**
@ -1023,7 +1165,8 @@ MEM_STATIC void ZSTD_window_init(ZSTD_window_t* window) {
 * Returns non-zero if the segment is contiguous.
 */
 MEM_STATIC U32 ZSTD_window_update(ZSTD_window_t* window,
-                                  void const* src, size_t srcSize)
+                                  void const* src, size_t srcSize,
+                                  int forceNonContiguous)
 {
    BYTE const* const ip = (BYTE const*)src;
    U32 contiguous = 1;
@ -1033,7 +1176,7 @@ MEM_STATIC U32 ZSTD_window_update(ZSTD_window_t* window,
    assert(window->base != NULL);
    assert(window->dictBase != NULL);
    /* Check if blocks follow each other */
-    if (src != window->nextSrc) {
+    if (src != window->nextSrc || forceNonContiguous) {
        /* not contiguous */
        size_t const distanceFromBase = (size_t)(window->nextSrc - window->base);
        DEBUGLOG(5, "Non contiguous blocks, new segment starts at %u", window->dictLimit);
--- a/extlib/zstd/compress/zstd_compress_literals.c
+++ b/extlib/zstd/compress/zstd_compress_literals.c
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
 * All rights reserved.
 *
 * This source code is licensed under both the BSD-style license (found in the
@ -117,7 +117,7 @@ size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf,
        }
    }

-    if ((cLitSize==0) | (cLitSize >= srcSize - minGain) | ERR_isError(cLitSize)) {
+    if ((cLitSize==0) || (cLitSize >= srcSize - minGain) || ERR_isError(cLitSize)) {
        ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
        return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
    }
--- a/extlib/zstd/compress/zstd_compress_literals.h
+++ b/extlib/zstd/compress/zstd_compress_literals.h
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
 * All rights reserved.
 *
 * This source code is licensed under both the BSD-style license (found in the
--- a/extlib/zstd/compress/zstd_compress_sequences.c
+++ b/extlib/zstd/compress/zstd_compress_sequences.c
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
 * All rights reserved.
 *
 * This source code is licensed under both the BSD-style license (found in the
@ -85,6 +85,8 @@ static size_t ZSTD_entropyCost(unsigned const* count, unsigned const max, size_t
 {
    unsigned cost = 0;
    unsigned s;
+
+    assert(total > 0);
    for (s = 0; s <= max; ++s) {
        unsigned norm = (unsigned)((256 * count[s]) / total);
        if (count[s] != 0 && norm == 0)
@ -232,6 +234,11 @@ ZSTD_selectEncodingType(
    return set_compressed;
 }

+typedef struct {
+    S16 norm[MaxSeq + 1];
+    U32 wksp[FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(MaxSeq, MaxFSELog)];
+} ZSTD_BuildCTableWksp;
+
 size_t
 ZSTD_buildCTable(void* dst, size_t dstCapacity,
                FSE_CTable* nextCTable, U32 FSELog, symbolEncodingType_e type,
@ -258,7 +265,7 @@ ZSTD_buildCTable(void* dst, size_t dstCapacity,
        FORWARD_IF_ERROR(FSE_buildCTable_wksp(nextCTable, defaultNorm, defaultMax, defaultNormLog, entropyWorkspace, entropyWorkspaceSize), "");  /* note : could be pre-calculated */
        return 0;
    case set_compressed: {
-        S16 norm[MaxSeq + 1];
+        ZSTD_BuildCTableWksp* wksp = (ZSTD_BuildCTableWksp*)entropyWorkspace;
        size_t nbSeq_1 = nbSeq;
        const U32 tableLog = FSE_optimalTableLog(FSELog, nbSeq, max);
        if (count[codeTable[nbSeq-1]] > 1) {
@ -266,11 +273,12 @@ ZSTD_buildCTable(void* dst, size_t dstCapacity,
            nbSeq_1--;
        }
        assert(nbSeq_1 > 1);
-        assert(entropyWorkspaceSize >= FSE_BUILD_CTABLE_WORKSPACE_SIZE(MaxSeq, MaxFSELog));
-        FORWARD_IF_ERROR(FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max, ZSTD_useLowProbCount(nbSeq_1)), "");
-        {   size_t const NCountSize = FSE_writeNCount(op, oend - op, norm, max, tableLog);   /* overflow protected */
+        assert(entropyWorkspaceSize >= sizeof(ZSTD_BuildCTableWksp));
+        (void)entropyWorkspaceSize;
+        FORWARD_IF_ERROR(FSE_normalizeCount(wksp->norm, tableLog, count, nbSeq_1, max, ZSTD_useLowProbCount(nbSeq_1)), "");
+        {   size_t const NCountSize = FSE_writeNCount(op, oend - op, wksp->norm, max, tableLog);   /* overflow protected */
            FORWARD_IF_ERROR(NCountSize, "FSE_writeNCount failed");
-            FORWARD_IF_ERROR(FSE_buildCTable_wksp(nextCTable, norm, max, tableLog, entropyWorkspace, entropyWorkspaceSize), "");
+            FORWARD_IF_ERROR(FSE_buildCTable_wksp(nextCTable, wksp->norm, max, tableLog, wksp->wksp, sizeof(wksp->wksp)), "");
            return NCountSize;
        }
    }
--- a/extlib/zstd/compress/zstd_compress_sequences.h
+++ b/extlib/zstd/compress/zstd_compress_sequences.h
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
 * All rights reserved.
 *
 * This source code is licensed under both the BSD-style license (found in the
--- a/extlib/zstd/compress/zstd_compress_superblock.c
+++ b/extlib/zstd/compress/zstd_compress_superblock.c
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
 * All rights reserved.
 *
 * This source code is licensed under both the BSD-style license (found in the
@ -15,288 +15,10 @@

 #include "../common/zstd_internal.h"  /* ZSTD_getSequenceLength */
 #include "hist.h"                     /* HIST_countFast_wksp */
-#include "zstd_compress_internal.h"
+#include "zstd_compress_internal.h"   /* ZSTD_[huf|fse|entropy]CTablesMetadata_t */
 #include "zstd_compress_sequences.h"
 #include "zstd_compress_literals.h"

-/*-*************************************
-*  Superblock entropy buffer structs
-***************************************/
-/** ZSTD_hufCTablesMetadata_t :
- *  Stores Literals Block Type for a super-block in hType, and
- *  huffman tree description in hufDesBuffer.
- *  hufDesSize refers to the size of huffman tree description in bytes.
- *  This metadata is populated in ZSTD_buildSuperBlockEntropy_literal() */
-typedef struct {
-    symbolEncodingType_e hType;
-    BYTE hufDesBuffer[ZSTD_MAX_HUF_HEADER_SIZE];
-    size_t hufDesSize;
-} ZSTD_hufCTablesMetadata_t;
-
-/** ZSTD_fseCTablesMetadata_t :
- *  Stores symbol compression modes for a super-block in {ll, ol, ml}Type, and
- *  fse tables in fseTablesBuffer.
- *  fseTablesSize refers to the size of fse tables in bytes.
- *  This metadata is populated in ZSTD_buildSuperBlockEntropy_sequences() */
-typedef struct {
-    symbolEncodingType_e llType;
-    symbolEncodingType_e ofType;
-    symbolEncodingType_e mlType;
-    BYTE fseTablesBuffer[ZSTD_MAX_FSE_HEADERS_SIZE];
-    size_t fseTablesSize;
-    size_t lastCountSize; /* This is to account for bug in 1.3.4. More detail in ZSTD_compressSubBlock_sequences() */
-} ZSTD_fseCTablesMetadata_t;
-
-typedef struct {
-    ZSTD_hufCTablesMetadata_t hufMetadata;
-    ZSTD_fseCTablesMetadata_t fseMetadata;
-} ZSTD_entropyCTablesMetadata_t;
-
-
-/** ZSTD_buildSuperBlockEntropy_literal() :
- *  Builds entropy for the super-block literals.
- *  Stores literals block type (raw, rle, compressed, repeat) and
- *  huffman description table to hufMetadata.
- *  @return : size of huffman description table or error code */
-static size_t ZSTD_buildSuperBlockEntropy_literal(void* const src, size_t srcSize,
-                                            const ZSTD_hufCTables_t* prevHuf,
-                                                  ZSTD_hufCTables_t* nextHuf,
-                                                  ZSTD_hufCTablesMetadata_t* hufMetadata,
-                                                  const int disableLiteralsCompression,
-                                                  void* workspace, size_t wkspSize)
-{
-    BYTE* const wkspStart = (BYTE*)workspace;
-    BYTE* const wkspEnd = wkspStart + wkspSize;
-    BYTE* const countWkspStart = wkspStart;
-    unsigned* const countWksp = (unsigned*)workspace;
-    const size_t countWkspSize = (HUF_SYMBOLVALUE_MAX + 1) * sizeof(unsigned);
-    BYTE* const nodeWksp = countWkspStart + countWkspSize;
-    const size_t nodeWkspSize = wkspEnd-nodeWksp;
-    unsigned maxSymbolValue = 255;
-    unsigned huffLog = HUF_TABLELOG_DEFAULT;
-    HUF_repeat repeat = prevHuf->repeatMode;
-
-    DEBUGLOG(5, "ZSTD_buildSuperBlockEntropy_literal (srcSize=%zu)", srcSize);
-
-    /* Prepare nextEntropy assuming reusing the existing table */
-    ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
-
-    if (disableLiteralsCompression) {
-        DEBUGLOG(5, "set_basic - disabled");
-        hufMetadata->hType = set_basic;
-        return 0;
-    }
-
-    /* small ? don't even attempt compression (speed opt) */
-#   define COMPRESS_LITERALS_SIZE_MIN 63
-    {   size_t const minLitSize = (prevHuf->repeatMode == HUF_repeat_valid) ? 6 : COMPRESS_LITERALS_SIZE_MIN;
-        if (srcSize <= minLitSize) {
-            DEBUGLOG(5, "set_basic - too small");
-            hufMetadata->hType = set_basic;
-            return 0;
-        }
-    }
-
-    /* Scan input and build symbol stats */
-    {   size_t const largest = HIST_count_wksp (countWksp, &maxSymbolValue, (const BYTE*)src, srcSize, workspace, wkspSize);
-        FORWARD_IF_ERROR(largest, "HIST_count_wksp failed");
-        if (largest == srcSize) {
-            DEBUGLOG(5, "set_rle");
-            hufMetadata->hType = set_rle;
-            return 0;
-        }
-        if (largest <= (srcSize >> 7)+4) {
-            DEBUGLOG(5, "set_basic - no gain");
-            hufMetadata->hType = set_basic;
-            return 0;
-        }
-    }
-
-    /* Validate the previous Huffman table */
-    if (repeat == HUF_repeat_check && !HUF_validateCTable((HUF_CElt const*)prevHuf->CTable, countWksp, maxSymbolValue)) {
-        repeat = HUF_repeat_none;
-    }
-
-    /* Build Huffman Tree */
-    ZSTD_memset(nextHuf->CTable, 0, sizeof(nextHuf->CTable));
-    huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue);
-    {   size_t const maxBits = HUF_buildCTable_wksp((HUF_CElt*)nextHuf->CTable, countWksp,
-                                                    maxSymbolValue, huffLog,
-                                                    nodeWksp, nodeWkspSize);
-        FORWARD_IF_ERROR(maxBits, "HUF_buildCTable_wksp");
-        huffLog = (U32)maxBits;
-        {   /* Build and write the CTable */
-            size_t const newCSize = HUF_estimateCompressedSize(
-                    (HUF_CElt*)nextHuf->CTable, countWksp, maxSymbolValue);
-            size_t const hSize = HUF_writeCTable(
-                    hufMetadata->hufDesBuffer, sizeof(hufMetadata->hufDesBuffer),
-                    (HUF_CElt*)nextHuf->CTable, maxSymbolValue, huffLog);
-            /* Check against repeating the previous CTable */
-            if (repeat != HUF_repeat_none) {
-                size_t const oldCSize = HUF_estimateCompressedSize(
-                        (HUF_CElt const*)prevHuf->CTable, countWksp, maxSymbolValue);
-                if (oldCSize < srcSize && (oldCSize <= hSize + newCSize || hSize + 12 >= srcSize)) {
-                    DEBUGLOG(5, "set_repeat - smaller");
-                    ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
-                    hufMetadata->hType = set_repeat;
-                    return 0;
-                }
-            }
-            if (newCSize + hSize >= srcSize) {
-                DEBUGLOG(5, "set_basic - no gains");
-                ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
-                hufMetadata->hType = set_basic;
-                return 0;
-            }
-            DEBUGLOG(5, "set_compressed (hSize=%u)", (U32)hSize);
-            hufMetadata->hType = set_compressed;
-            nextHuf->repeatMode = HUF_repeat_check;
-            return hSize;
-        }
-    }
-}
-
-/** ZSTD_buildSuperBlockEntropy_sequences() :
- *  Builds entropy for the super-block sequences.
- *  Stores symbol compression modes and fse table to fseMetadata.
- *  @return : size of fse tables or error code */
-static size_t ZSTD_buildSuperBlockEntropy_sequences(seqStore_t* seqStorePtr,
-                                              const ZSTD_fseCTables_t* prevEntropy,
-                                                    ZSTD_fseCTables_t* nextEntropy,
-                                              const ZSTD_CCtx_params* cctxParams,
-                                                    ZSTD_fseCTablesMetadata_t* fseMetadata,
-                                                    void* workspace, size_t wkspSize)
-{
-    BYTE* const wkspStart = (BYTE*)workspace;
-    BYTE* const wkspEnd = wkspStart + wkspSize;
-    BYTE* const countWkspStart = wkspStart;
-    unsigned* const countWksp = (unsigned*)workspace;
-    const size_t countWkspSize = (MaxSeq + 1) * sizeof(unsigned);
-    BYTE* const cTableWksp = countWkspStart + countWkspSize;
-    const size_t cTableWkspSize = wkspEnd-cTableWksp;
-    ZSTD_strategy const strategy = cctxParams->cParams.strategy;
-    FSE_CTable* CTable_LitLength = nextEntropy->litlengthCTable;
-    FSE_CTable* CTable_OffsetBits = nextEntropy->offcodeCTable;
-    FSE_CTable* CTable_MatchLength = nextEntropy->matchlengthCTable;
-    const BYTE* const ofCodeTable = seqStorePtr->ofCode;
-    const BYTE* const llCodeTable = seqStorePtr->llCode;
-    const BYTE* const mlCodeTable = seqStorePtr->mlCode;
-    size_t const nbSeq = seqStorePtr->sequences - seqStorePtr->sequencesStart;
-    BYTE* const ostart = fseMetadata->fseTablesBuffer;
-    BYTE* const oend = ostart + sizeof(fseMetadata->fseTablesBuffer);
-    BYTE* op = ostart;
-
-    assert(cTableWkspSize >= (1 << MaxFSELog) * sizeof(FSE_FUNCTION_TYPE));
-    DEBUGLOG(5, "ZSTD_buildSuperBlockEntropy_sequences (nbSeq=%zu)", nbSeq);
-    ZSTD_memset(workspace, 0, wkspSize);
-
-    fseMetadata->lastCountSize = 0;
-    /* convert length/distances into codes */
-    ZSTD_seqToCodes(seqStorePtr);
-    /* build CTable for Literal Lengths */
-    {   U32 LLtype;
-        unsigned max = MaxLL;
-        size_t const mostFrequent = HIST_countFast_wksp(countWksp, &max, llCodeTable, nbSeq, workspace, wkspSize);  /* can't fail */
-        DEBUGLOG(5, "Building LL table");
-        nextEntropy->litlength_repeatMode = prevEntropy->litlength_repeatMode;
-        LLtype = ZSTD_selectEncodingType(&nextEntropy->litlength_repeatMode,
-                                        countWksp, max, mostFrequent, nbSeq,
-                                        LLFSELog, prevEntropy->litlengthCTable,
-                                        LL_defaultNorm, LL_defaultNormLog,
-                                        ZSTD_defaultAllowed, strategy);
-        assert(set_basic < set_compressed && set_rle < set_compressed);
-        assert(!(LLtype < set_compressed && nextEntropy->litlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */
-        {   size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_LitLength, LLFSELog, (symbolEncodingType_e)LLtype,
-                                                    countWksp, max, llCodeTable, nbSeq, LL_defaultNorm, LL_defaultNormLog, MaxLL,
-                                                    prevEntropy->litlengthCTable, sizeof(prevEntropy->litlengthCTable),
-                                                    cTableWksp, cTableWkspSize);
-            FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for LitLens failed");
-            if (LLtype == set_compressed)
-                fseMetadata->lastCountSize = countSize;
-            op += countSize;
-            fseMetadata->llType = (symbolEncodingType_e) LLtype;
-    }   }
-    /* build CTable for Offsets */
-    {   U32 Offtype;
-        unsigned max = MaxOff;
-        size_t const mostFrequent = HIST_countFast_wksp(countWksp, &max, ofCodeTable, nbSeq, workspace, wkspSize);  /* can't fail */
-        /* We can only use the basic table if max <= DefaultMaxOff, otherwise the offsets are too large */
-        ZSTD_defaultPolicy_e const defaultPolicy = (max <= DefaultMaxOff) ? ZSTD_defaultAllowed : ZSTD_defaultDisallowed;
-        DEBUGLOG(5, "Building OF table");
-        nextEntropy->offcode_repeatMode = prevEntropy->offcode_repeatMode;
-        Offtype = ZSTD_selectEncodingType(&nextEntropy->offcode_repeatMode,
-                                        countWksp, max, mostFrequent, nbSeq,
-                                        OffFSELog, prevEntropy->offcodeCTable,
-                                        OF_defaultNorm, OF_defaultNormLog,
-                                        defaultPolicy, strategy);
-        assert(!(Offtype < set_compressed && nextEntropy->offcode_repeatMode != FSE_repeat_none)); /* We don't copy tables */
-        {   size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_OffsetBits, OffFSELog, (symbolEncodingType_e)Offtype,
-                                                    countWksp, max, ofCodeTable, nbSeq, OF_defaultNorm, OF_defaultNormLog, DefaultMaxOff,
-                                                    prevEntropy->offcodeCTable, sizeof(prevEntropy->offcodeCTable),
-                                                    cTableWksp, cTableWkspSize);
-            FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for Offsets failed");
-            if (Offtype == set_compressed)
-                fseMetadata->lastCountSize = countSize;
-            op += countSize;
-            fseMetadata->ofType = (symbolEncodingType_e) Offtype;
-    }   }
-    /* build CTable for MatchLengths */
-    {   U32 MLtype;
-        unsigned max = MaxML;
-        size_t const mostFrequent = HIST_countFast_wksp(countWksp, &max, mlCodeTable, nbSeq, workspace, wkspSize);   /* can't fail */
-        DEBUGLOG(5, "Building ML table (remaining space : %i)", (int)(oend-op));
-        nextEntropy->matchlength_repeatMode = prevEntropy->matchlength_repeatMode;
-        MLtype = ZSTD_selectEncodingType(&nextEntropy->matchlength_repeatMode,
-                                        countWksp, max, mostFrequent, nbSeq,
-                                        MLFSELog, prevEntropy->matchlengthCTable,
-                                        ML_defaultNorm, ML_defaultNormLog,
-                                        ZSTD_defaultAllowed, strategy);
-        assert(!(MLtype < set_compressed && nextEntropy->matchlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */
-        {   size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_MatchLength, MLFSELog, (symbolEncodingType_e)MLtype,
-                                                    countWksp, max, mlCodeTable, nbSeq, ML_defaultNorm, ML_defaultNormLog, MaxML,
-                                                    prevEntropy->matchlengthCTable, sizeof(prevEntropy->matchlengthCTable),
-                                                    cTableWksp, cTableWkspSize);
-            FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for MatchLengths failed");
-            if (MLtype == set_compressed)
-                fseMetadata->lastCountSize = countSize;
-            op += countSize;
-            fseMetadata->mlType = (symbolEncodingType_e) MLtype;
-    }   }
-    assert((size_t) (op-ostart) <= sizeof(fseMetadata->fseTablesBuffer));
-    return op-ostart;
-}
-
-
-/** ZSTD_buildSuperBlockEntropy() :
- *  Builds entropy for the super-block.
- *  @return : 0 on success or error code */
-static size_t
-ZSTD_buildSuperBlockEntropy(seqStore_t* seqStorePtr,
-                      const ZSTD_entropyCTables_t* prevEntropy,
-                            ZSTD_entropyCTables_t* nextEntropy,
-                      const ZSTD_CCtx_params* cctxParams,
-                            ZSTD_entropyCTablesMetadata_t* entropyMetadata,
-                            void* workspace, size_t wkspSize)
-{
-    size_t const litSize = seqStorePtr->lit - seqStorePtr->litStart;
-    DEBUGLOG(5, "ZSTD_buildSuperBlockEntropy");
-    entropyMetadata->hufMetadata.hufDesSize =
-        ZSTD_buildSuperBlockEntropy_literal(seqStorePtr->litStart, litSize,
-                                            &prevEntropy->huf, &nextEntropy->huf,
-                                            &entropyMetadata->hufMetadata,
-                                            ZSTD_disableLiteralsCompression(cctxParams),
-                                            workspace, wkspSize);
-    FORWARD_IF_ERROR(entropyMetadata->hufMetadata.hufDesSize, "ZSTD_buildSuperBlockEntropy_literal failed");
-    entropyMetadata->fseMetadata.fseTablesSize =
-        ZSTD_buildSuperBlockEntropy_sequences(seqStorePtr,
-                                              &prevEntropy->fse, &nextEntropy->fse,
-                                              cctxParams,
-                                              &entropyMetadata->fseMetadata,
-                                              workspace, wkspSize);
-    FORWARD_IF_ERROR(entropyMetadata->fseMetadata.fseTablesSize, "ZSTD_buildSuperBlockEntropy_sequences failed");
-    return 0;
-}
-
 /** ZSTD_compressSubBlock_literal() :
 *  Compresses literals section for a sub-block.
 *  When we have to write the Huffman table we will sometimes choose a header
@ -643,8 +365,9 @@ static size_t ZSTD_estimateSubBlockSize_sequences(const BYTE* ofCodeTable,
                                                  void* workspace, size_t wkspSize,
                                                  int writeEntropy)
 {
-    size_t sequencesSectionHeaderSize = 3; /* Use hard coded size of 3 bytes */
+    size_t const sequencesSectionHeaderSize = 3; /* Use hard coded size of 3 bytes */
    size_t cSeqSizeEstimate = 0;
+    if (nbSeq == 0) return sequencesSectionHeaderSize;
    cSeqSizeEstimate += ZSTD_estimateSubBlockSize_symbolType(fseMetadata->ofType, ofCodeTable, MaxOff,
                                         nbSeq, fseTables->offcodeCTable, NULL,
                                         OF_defaultNorm, OF_defaultNormLog, DefaultMaxOff,
@ -830,7 +553,7 @@ size_t ZSTD_compressSuperBlock(ZSTD_CCtx* zc,
                               unsigned lastBlock) {
    ZSTD_entropyCTablesMetadata_t entropyMetadata;

-    FORWARD_IF_ERROR(ZSTD_buildSuperBlockEntropy(&zc->seqStore,
+    FORWARD_IF_ERROR(ZSTD_buildBlockEntropyStats(&zc->seqStore,
          &zc->blockState.prevCBlock->entropy,
          &zc->blockState.nextCBlock->entropy,
          &zc->appliedParams,
--- a/extlib/zstd/compress/zstd_compress_superblock.h
+++ b/extlib/zstd/compress/zstd_compress_superblock.h
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
 * All rights reserved.
 *
 * This source code is licensed under both the BSD-style license (found in the
--- a/extlib/zstd/compress/zstd_cwksp.h
+++ b/extlib/zstd/compress/zstd_cwksp.h
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
 * All rights reserved.
 *
 * This source code is licensed under both the BSD-style license (found in the
@ -35,6 +35,10 @@ extern "C" {
 #define ZSTD_CWKSP_ASAN_REDZONE_SIZE 128
 #endif

+
+/* Set our tables and aligneds to align by 64 bytes */
+#define ZSTD_CWKSP_ALIGNMENT_BYTES 64
+
 /*-*************************************
 *  Structures
 ***************************************/
@ -117,10 +121,11 @@ typedef enum {
 * - Tables: these are any of several different datastructures (hash tables,
 *   chain tables, binary trees) that all respect a common format: they are
 *   uint32_t arrays, all of whose values are between 0 and (nextSrc - base).
- *   Their sizes depend on the cparams.
+ *   Their sizes depend on the cparams. These tables are 64-byte aligned.
 *
 * - Aligned: these buffers are used for various purposes that require 4 byte
- *   alignment, but don't require any initialization before they're used.
+ *   alignment, but don't require any initialization before they're used. These
+ *   buffers are each aligned to 64 bytes.
 *
 * - Buffers: these buffers are used for various purposes that don't require
 *   any alignment or initialization before they're used. This means they can
@ -133,8 +138,7 @@ typedef enum {
 *
 * 1. Objects
 * 2. Buffers
- * 3. Aligned
- * 4. Tables
+ * 3. Aligned/Tables
 *
 * Attempts to reserve objects of different types out of order will fail.
 */
@ -187,6 +191,8 @@ MEM_STATIC size_t ZSTD_cwksp_align(size_t size, size_t const align) {
 * Since tables aren't currently redzoned, you don't need to call through this
 * to figure out how much space you need for the matchState tables. Everything
 * else is though.
+ *
+ * Do not use for sizing aligned buffers. Instead, use ZSTD_cwksp_aligned_alloc_size().
 */
 MEM_STATIC size_t ZSTD_cwksp_alloc_size(size_t size) {
    if (size == 0)
@ -198,30 +204,110 @@ MEM_STATIC size_t ZSTD_cwksp_alloc_size(size_t size) {
 #endif
 }

-MEM_STATIC void ZSTD_cwksp_internal_advance_phase(
+/**
+ * Returns an adjusted alloc size that is the nearest larger multiple of 64 bytes.
+ * Used to determine the number of bytes required for a given "aligned".
+ */
+MEM_STATIC size_t ZSTD_cwksp_aligned_alloc_size(size_t size) {
+    return ZSTD_cwksp_alloc_size(ZSTD_cwksp_align(size, ZSTD_CWKSP_ALIGNMENT_BYTES));
+}
+
+/**
+ * Returns the amount of additional space the cwksp must allocate
+ * for internal purposes (currently only alignment).
+ */
+MEM_STATIC size_t ZSTD_cwksp_slack_space_required(void) {
+    /* For alignment, the wksp will always allocate an additional n_1=[1, 64] bytes
+     * to align the beginning of tables section, as well as another n_2=[0, 63] bytes
+     * to align the beginning of the aligned secion.
+     *
+     * n_1 + n_2 == 64 bytes if the cwksp is freshly allocated, due to tables and
+     * aligneds being sized in multiples of 64 bytes.
+     */
+    size_t const slackSpace = ZSTD_CWKSP_ALIGNMENT_BYTES;
+    return slackSpace;
+}
+
+
+/**
+ * Return the number of additional bytes required to align a pointer to the given number of bytes.
+ * alignBytes must be a power of two.
+ */
+MEM_STATIC size_t ZSTD_cwksp_bytes_to_align_ptr(void* ptr, const size_t alignBytes) {
+    size_t const alignBytesMask = alignBytes - 1;
+    size_t const bytes = (alignBytes - ((size_t)ptr & (alignBytesMask))) & alignBytesMask;
+    assert((alignBytes & alignBytesMask) == 0);
+    assert(bytes != ZSTD_CWKSP_ALIGNMENT_BYTES);
+    return bytes;
+}
+
+/**
+ * Internal function. Do not use directly.
+ * Reserves the given number of bytes within the aligned/buffer segment of the wksp, which
+ * counts from the end of the wksp. (as opposed to the object/table segment)
+ *
+ * Returns a pointer to the beginning of that space.
+ */
+MEM_STATIC void* ZSTD_cwksp_reserve_internal_buffer_space(ZSTD_cwksp* ws, size_t const bytes) {
+    void* const alloc = (BYTE*)ws->allocStart - bytes;
+    void* const bottom = ws->tableEnd;
+    DEBUGLOG(5, "cwksp: reserving %p %zd bytes, %zd bytes remaining",
+        alloc, bytes, ZSTD_cwksp_available_space(ws) - bytes);
+    ZSTD_cwksp_assert_internal_consistency(ws);
+    assert(alloc >= bottom);
+    if (alloc < bottom) {
+        DEBUGLOG(4, "cwksp: alloc failed!");
+        ws->allocFailed = 1;
+        return NULL;
+    }
+    if (alloc < ws->tableValidEnd) {
+        ws->tableValidEnd = alloc;
+    }
+    ws->allocStart = alloc;
+    return alloc;
+}
+
+/**
+ * Moves the cwksp to the next phase, and does any necessary allocations.
+ * Returns a 0 on success, or zstd error
+ */
+MEM_STATIC size_t ZSTD_cwksp_internal_advance_phase(
        ZSTD_cwksp* ws, ZSTD_cwksp_alloc_phase_e phase) {
    assert(phase >= ws->phase);
    if (phase > ws->phase) {
+        /* Going from allocating objects to allocating buffers */
        if (ws->phase < ZSTD_cwksp_alloc_buffers &&
                phase >= ZSTD_cwksp_alloc_buffers) {
            ws->tableValidEnd = ws->objectEnd;
        }
+
+        /* Going from allocating buffers to allocating aligneds/tables */
        if (ws->phase < ZSTD_cwksp_alloc_aligned &&
                phase >= ZSTD_cwksp_alloc_aligned) {
-            /* If unaligned allocations down from a too-large top have left us
-             * unaligned, we need to realign our alloc ptr. Technically, this
-             * can consume space that is unaccounted for in the neededSpace
-             * calculation. However, I believe this can only happen when the
-             * workspace is too large, and specifically when it is too large
-             * by a larger margin than the space that will be consumed. */
-            /* TODO: cleaner, compiler warning friendly way to do this??? */
-            ws->allocStart = (BYTE*)ws->allocStart - ((size_t)ws->allocStart & (sizeof(U32)-1));
-            if (ws->allocStart < ws->tableValidEnd) {
-                ws->tableValidEnd = ws->allocStart;
+            {   /* Align the start of the "aligned" to 64 bytes. Use [1, 64] bytes. */
+                size_t const bytesToAlign =
+                    ZSTD_CWKSP_ALIGNMENT_BYTES - ZSTD_cwksp_bytes_to_align_ptr(ws->allocStart, ZSTD_CWKSP_ALIGNMENT_BYTES);
+                DEBUGLOG(5, "reserving aligned alignment addtl space: %zu", bytesToAlign);
+                ZSTD_STATIC_ASSERT((ZSTD_CWKSP_ALIGNMENT_BYTES & (ZSTD_CWKSP_ALIGNMENT_BYTES - 1)) == 0); /* power of 2 */
+                RETURN_ERROR_IF(!ZSTD_cwksp_reserve_internal_buffer_space(ws, bytesToAlign),
+                                memory_allocation, "aligned phase - alignment initial allocation failed!");
+            }
+            {   /* Align the start of the tables to 64 bytes. Use [0, 63] bytes */
+                void* const alloc = ws->objectEnd;
+                size_t const bytesToAlign = ZSTD_cwksp_bytes_to_align_ptr(alloc, ZSTD_CWKSP_ALIGNMENT_BYTES);
+                void* const end = (BYTE*)alloc + bytesToAlign;
+                DEBUGLOG(5, "reserving table alignment addtl space: %zu", bytesToAlign);
+                RETURN_ERROR_IF(end > ws->workspaceEnd, memory_allocation,
+                                "table phase - alignment initial allocation failed!");
+                ws->objectEnd = end;
+                ws->tableEnd = end;
+                ws->tableValidEnd = end;
            }
        }
        ws->phase = phase;
+        ZSTD_cwksp_assert_internal_consistency(ws);
    }
+    return 0;
 }

 /**
@ -237,38 +323,25 @@ MEM_STATIC int ZSTD_cwksp_owns_buffer(const ZSTD_cwksp* ws, const void* ptr) {
 MEM_STATIC void* ZSTD_cwksp_reserve_internal(
        ZSTD_cwksp* ws, size_t bytes, ZSTD_cwksp_alloc_phase_e phase) {
    void* alloc;
-    void* bottom = ws->tableEnd;
-    ZSTD_cwksp_internal_advance_phase(ws, phase);
-    alloc = (BYTE *)ws->allocStart - bytes;
-
-    if (bytes == 0)
+    if (ZSTD_isError(ZSTD_cwksp_internal_advance_phase(ws, phase)) || bytes == 0) {
        return NULL;
+    }

 #if ZSTD_ADDRESS_SANITIZER && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE)
    /* over-reserve space */
-    alloc = (BYTE *)alloc - 2 * ZSTD_CWKSP_ASAN_REDZONE_SIZE;
+    bytes += 2 * ZSTD_CWKSP_ASAN_REDZONE_SIZE;
 #endif

-    DEBUGLOG(5, "cwksp: reserving %p %zd bytes, %zd bytes remaining",
-        alloc, bytes, ZSTD_cwksp_available_space(ws) - bytes);
-    ZSTD_cwksp_assert_internal_consistency(ws);
-    assert(alloc >= bottom);
-    if (alloc < bottom) {
-        DEBUGLOG(4, "cwksp: alloc failed!");
-        ws->allocFailed = 1;
-        return NULL;
-    }
-    if (alloc < ws->tableValidEnd) {
-        ws->tableValidEnd = alloc;
-    }
-    ws->allocStart = alloc;
+    alloc = ZSTD_cwksp_reserve_internal_buffer_space(ws, bytes);

 #if ZSTD_ADDRESS_SANITIZER && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE)
    /* Move alloc so there's ZSTD_CWKSP_ASAN_REDZONE_SIZE unused space on
     * either size. */
-    alloc = (BYTE *)alloc + ZSTD_CWKSP_ASAN_REDZONE_SIZE;
-    if (ws->isStatic == ZSTD_cwksp_dynamic_alloc) {
-        __asan_unpoison_memory_region(alloc, bytes);
+    if (alloc) {
+        alloc = (BYTE *)alloc + ZSTD_CWKSP_ASAN_REDZONE_SIZE;
+        if (ws->isStatic == ZSTD_cwksp_dynamic_alloc) {
+            __asan_unpoison_memory_region(alloc, bytes);
+        }
    }
 #endif

@ -283,28 +356,36 @@ MEM_STATIC BYTE* ZSTD_cwksp_reserve_buffer(ZSTD_cwksp* ws, size_t bytes) {
 }

 /**
- * Reserves and returns memory sized on and aligned on sizeof(unsigned).
+ * Reserves and returns memory sized on and aligned on ZSTD_CWKSP_ALIGNMENT_BYTES (64 bytes).
 */
 MEM_STATIC void* ZSTD_cwksp_reserve_aligned(ZSTD_cwksp* ws, size_t bytes) {
-    assert((bytes & (sizeof(U32)-1)) == 0);
-    return ZSTD_cwksp_reserve_internal(ws, ZSTD_cwksp_align(bytes, sizeof(U32)), ZSTD_cwksp_alloc_aligned);
+    void* ptr = ZSTD_cwksp_reserve_internal(ws, ZSTD_cwksp_align(bytes, ZSTD_CWKSP_ALIGNMENT_BYTES),
+                                            ZSTD_cwksp_alloc_aligned);
+    assert(((size_t)ptr & (ZSTD_CWKSP_ALIGNMENT_BYTES-1))== 0);
+    return ptr;
 }

 /**
- * Aligned on sizeof(unsigned). These buffers have the special property that
+ * Aligned on 64 bytes. These buffers have the special property that
 * their values remain constrained, allowing us to re-use them without
 * memset()-ing them.
 */
 MEM_STATIC void* ZSTD_cwksp_reserve_table(ZSTD_cwksp* ws, size_t bytes) {
    const ZSTD_cwksp_alloc_phase_e phase = ZSTD_cwksp_alloc_aligned;
-    void* alloc = ws->tableEnd;
-    void* end = (BYTE *)alloc + bytes;
-    void* top = ws->allocStart;
+    void* alloc;
+    void* end;
+    void* top;
+
+    if (ZSTD_isError(ZSTD_cwksp_internal_advance_phase(ws, phase))) {
+        return NULL;
+    }
+    alloc = ws->tableEnd;
+    end = (BYTE *)alloc + bytes;
+    top = ws->allocStart;

    DEBUGLOG(5, "cwksp: reserving %p table %zd bytes, %zd bytes remaining",
        alloc, bytes, ZSTD_cwksp_available_space(ws) - bytes);
    assert((bytes & (sizeof(U32)-1)) == 0);
-    ZSTD_cwksp_internal_advance_phase(ws, phase);
    ZSTD_cwksp_assert_internal_consistency(ws);
    assert(end <= top);
    if (end > top) {
@ -320,6 +401,8 @@ MEM_STATIC void* ZSTD_cwksp_reserve_table(ZSTD_cwksp* ws, size_t bytes) {
    }
 #endif

+    assert((bytes & (ZSTD_CWKSP_ALIGNMENT_BYTES-1)) == 0);
+    assert(((size_t)alloc & (ZSTD_CWKSP_ALIGNMENT_BYTES-1))== 0);
    return alloc;
 }

@ -527,6 +610,24 @@ MEM_STATIC int ZSTD_cwksp_reserve_failed(const ZSTD_cwksp* ws) {
 *  Functions Checking Free Space
 ***************************************/

+/* ZSTD_alignmentSpaceWithinBounds() :
+ * Returns if the estimated space needed for a wksp is within an acceptable limit of the
+ * actual amount of space used.
+ */
+MEM_STATIC int ZSTD_cwksp_estimated_space_within_bounds(const ZSTD_cwksp* const ws,
+                                                        size_t const estimatedSpace, int resizedWorkspace) {
+    if (resizedWorkspace) {
+        /* Resized/newly allocated wksp should have exact bounds */
+        return ZSTD_cwksp_used(ws) == estimatedSpace;
+    } else {
+        /* Due to alignment, when reusing a workspace, we can actually consume 63 fewer or more bytes
+         * than estimatedSpace. See the comments in zstd_cwksp.h for details.
+         */
+        return (ZSTD_cwksp_used(ws) >= estimatedSpace - 63) && (ZSTD_cwksp_used(ws) <= estimatedSpace + 63);
+    }
+}
+
+
 MEM_STATIC size_t ZSTD_cwksp_available_space(ZSTD_cwksp* ws) {
    return (size_t)((BYTE*)ws->allocStart - (BYTE*)ws->tableEnd);
 }
--- a/extlib/zstd/compress/zstd_double_fast.c
+++ b/extlib/zstd/compress/zstd_double_fast.c
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
 * All rights reserved.
 *
 * This source code is licensed under both the BSD-style license (found in the
@ -409,7 +409,7 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
        hashSmall[hSmall] = hashLong[hLong] = curr;   /* update hash table */

        if ((((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow : ensure repIndex doesn't overlap dict + prefix */
-            & (repIndex > dictStartIndex))
+            & (offset_1 < curr+1 - dictStartIndex)) /* note: we are searching at curr+1 */
          && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
            const BYTE* repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
            mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4;
@ -477,7 +477,7 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
                U32 const repIndex2 = current2 - offset_2;
                const BYTE* repMatch2 = repIndex2 < prefixStartIndex ? dictBase + repIndex2 : base + repIndex2;
                if ( (((U32)((prefixStartIndex-1) - repIndex2) >= 3)   /* intentional overflow : ensure repIndex2 doesn't overlap dict + prefix */
-                    & (repIndex2 > dictStartIndex))
+                    & (offset_2 < current2 - dictStartIndex))
                  && (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
                    const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend;
                    size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4;
--- a/extlib/zstd/compress/zstd_double_fast.h
+++ b/extlib/zstd/compress/zstd_double_fast.h
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
 * All rights reserved.
 *
 * This source code is licensed under both the BSD-style license (found in the
--- a/extlib/zstd/compress/zstd_fast.c
+++ b/extlib/zstd/compress/zstd_fast.c
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
 * All rights reserved.
 *
 * This source code is licensed under both the BSD-style license (found in the
@ -416,9 +416,9 @@ static size_t ZSTD_compressBlock_fast_extDict_generic(
        const BYTE* const repMatch = repBase + repIndex;
        hashTable[h] = curr;   /* update hash table */
        DEBUGLOG(7, "offset_1 = %u , curr = %u", offset_1, curr);
-        assert(offset_1 <= curr +1);   /* check repIndex */

-        if ( (((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow */ & (repIndex > dictStartIndex))
+        if ( ( ((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow */
+             & (offset_1 < curr+1 - dictStartIndex) ) /* note: we are searching at curr+1 */
           && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
            const BYTE* const repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
            size_t const rLength = ZSTD_count_2segments(ip+1 +4, repMatch +4, iend, repMatchEnd, prefixStart) + 4;
@ -453,7 +453,7 @@ static size_t ZSTD_compressBlock_fast_extDict_generic(
                U32 const current2 = (U32)(ip-base);
                U32 const repIndex2 = current2 - offset_2;
                const BYTE* const repMatch2 = repIndex2 < prefixStartIndex ? dictBase + repIndex2 : base + repIndex2;
-                if ( (((U32)((prefixStartIndex-1) - repIndex2) >= 3) & (repIndex2 > dictStartIndex))  /* intentional overflow */
+                if ( (((U32)((prefixStartIndex-1) - repIndex2) >= 3) & (offset_2 < curr - dictStartIndex))  /* intentional overflow */
                   && (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
                    const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend;
                    size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4;
--- a/extlib/zstd/compress/zstd_fast.h
+++ b/extlib/zstd/compress/zstd_fast.h
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
 * All rights reserved.
 *
 * This source code is licensed under both the BSD-style license (found in the
--- a/extlib/zstd/compress/zstd_lazy.c
+++ b/extlib/zstd/compress/zstd_lazy.c
--- a/extlib/zstd/compress/zstd_lazy.h
+++ b/extlib/zstd/compress/zstd_lazy.h
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
 * All rights reserved.
 *
 * This source code is licensed under both the BSD-style license (found in the
@ -26,6 +26,7 @@ extern "C" {
 #define ZSTD_LAZY_DDSS_BUCKET_LOG 2

 U32 ZSTD_insertAndFindFirstIndex(ZSTD_matchState_t* ms, const BYTE* ip);
+void ZSTD_row_update(ZSTD_matchState_t* const ms, const BYTE* ip);

 void ZSTD_dedicatedDictSearch_lazy_loadDictionary(ZSTD_matchState_t* ms, const BYTE* const ip);

@ -43,6 +44,15 @@ size_t ZSTD_compressBlock_lazy(
 size_t ZSTD_compressBlock_greedy(
        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
        void const* src, size_t srcSize);
+size_t ZSTD_compressBlock_lazy2_row(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize);
+size_t ZSTD_compressBlock_lazy_row(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize);
+size_t ZSTD_compressBlock_greedy_row(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize);

 size_t ZSTD_compressBlock_btlazy2_dictMatchState(
        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
@ -56,6 +66,15 @@ size_t ZSTD_compressBlock_lazy_dictMatchState(
 size_t ZSTD_compressBlock_greedy_dictMatchState(
        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
        void const* src, size_t srcSize);
+size_t ZSTD_compressBlock_lazy2_dictMatchState_row(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize);
+size_t ZSTD_compressBlock_lazy_dictMatchState_row(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize);
+size_t ZSTD_compressBlock_greedy_dictMatchState_row(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize);

 size_t ZSTD_compressBlock_lazy2_dedicatedDictSearch(
        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
@ -66,6 +85,15 @@ size_t ZSTD_compressBlock_lazy_dedicatedDictSearch(
 size_t ZSTD_compressBlock_greedy_dedicatedDictSearch(
        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
        void const* src, size_t srcSize);
+size_t ZSTD_compressBlock_lazy2_dedicatedDictSearch_row(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize);
+size_t ZSTD_compressBlock_lazy_dedicatedDictSearch_row(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize);
+size_t ZSTD_compressBlock_greedy_dedicatedDictSearch_row(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize);

 size_t ZSTD_compressBlock_greedy_extDict(
        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
@ -76,9 +104,19 @@ size_t ZSTD_compressBlock_lazy_extDict(
 size_t ZSTD_compressBlock_lazy2_extDict(
        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
        void const* src, size_t srcSize);
+size_t ZSTD_compressBlock_greedy_extDict_row(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize);
+size_t ZSTD_compressBlock_lazy_extDict_row(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize);
+size_t ZSTD_compressBlock_lazy2_extDict_row(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize);
 size_t ZSTD_compressBlock_btlazy2_extDict(
        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
        void const* src, size_t srcSize);
+        

 #if defined (__cplusplus)
 }
--- a/extlib/zstd/compress/zstd_ldm.c
+++ b/extlib/zstd/compress/zstd_ldm.c
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
 * All rights reserved.
 *
 * This source code is licensed under both the BSD-style license (found in the
@ -57,6 +57,33 @@ static void ZSTD_ldm_gear_init(ldmRollingHashState_t* state, ldmParams_t const*
    }
 }

+/** ZSTD_ldm_gear_reset()
+ * Feeds [data, data + minMatchLength) into the hash without registering any
+ * splits. This effectively resets the hash state. This is used when skipping
+ * over data, either at the beginning of a block, or skipping sections.
+ */
+static void ZSTD_ldm_gear_reset(ldmRollingHashState_t* state,
+                                BYTE const* data, size_t minMatchLength)
+{
+    U64 hash = state->rolling;
+    size_t n = 0;
+
+#define GEAR_ITER_ONCE() do {                                  \
+        hash = (hash << 1) + ZSTD_ldm_gearTab[data[n] & 0xff]; \
+        n += 1;                                                \
+    } while (0)
+    while (n + 3 < minMatchLength) {
+        GEAR_ITER_ONCE();
+        GEAR_ITER_ONCE();
+        GEAR_ITER_ONCE();
+        GEAR_ITER_ONCE();
+    }
+    while (n < minMatchLength) {
+        GEAR_ITER_ONCE();
+    }
+#undef GEAR_ITER_ONCE
+}
+
 /** ZSTD_ldm_gear_feed():
 *
 * Registers in the splits array all the split points found in the first
@ -255,7 +282,7 @@ void ZSTD_ldm_fillHashTable(
    while (ip < iend) {
        size_t hashed;
        unsigned n;
-        
+
        numSplits = 0;
        hashed = ZSTD_ldm_gear_feed(&hashState, ip, iend - ip, splits, &numSplits);

@ -327,16 +354,8 @@ static size_t ZSTD_ldm_generateSequences_internal(

    /* Initialize the rolling hash state with the first minMatchLength bytes */
    ZSTD_ldm_gear_init(&hashState, params);
-    {
-        size_t n = 0;
-
-        while (n < minMatchLength) {
-            numSplits = 0;
-            n += ZSTD_ldm_gear_feed(&hashState, ip + n, minMatchLength - n,
-                                    splits, &numSplits);
-        }
-        ip += minMatchLength;
-    }
+    ZSTD_ldm_gear_reset(&hashState, ip, minMatchLength);
+    ip += minMatchLength;

    while (ip < ilimit) {
        size_t hashed;
@ -361,6 +380,7 @@ static size_t ZSTD_ldm_generateSequences_internal(
        for (n = 0; n < numSplits; n++) {
            size_t forwardMatchLength = 0, backwardMatchLength = 0,
                   bestMatchLength = 0, mLength;
+            U32 offset;
            BYTE const* const split = candidates[n].split;
            U32 const checksum = candidates[n].checksum;
            U32 const hash = candidates[n].hash;
@ -428,9 +448,9 @@ static size_t ZSTD_ldm_generateSequences_internal(
            }

            /* Match found */
+            offset = (U32)(split - base) - bestEntry->offset;
            mLength = forwardMatchLength + backwardMatchLength;
            {
-                U32 const offset = (U32)(split - base) - bestEntry->offset;
                rawSeq* const seq = rawSeqStore->seq + rawSeqStore->size;

                /* Out of sequence storage */
@ -447,6 +467,21 @@ static size_t ZSTD_ldm_generateSequences_internal(
            ZSTD_ldm_insertEntry(ldmState, hash, newEntry, *params);

            anchor = split + forwardMatchLength;
+
+            /* If we find a match that ends after the data that we've hashed
+             * then we have a repeating, overlapping, pattern. E.g. all zeros.
+             * If one repetition of the pattern matches our `stopMask` then all
+             * repetitions will. We don't need to insert them all into out table,
+             * only the first one. So skip over overlapping matches.
+             * This is a major speed boost (20x) for compressing a single byte
+             * repeated, when that byte ends up in the table.
+             */
+            if (anchor > ip + hashed) {
+                ZSTD_ldm_gear_reset(&hashState, anchor - minMatchLength, minMatchLength);
+                /* Continue the outter loop at anchor (ip + hashed == anchor). */
+                ip = anchor - hashed;
+                break;
+            }
        }

        ip += hashed;
@ -500,7 +535,7 @@ size_t ZSTD_ldm_generateSequences(

        assert(chunkStart < iend);
        /* 1. Perform overflow correction if necessary. */
-        if (ZSTD_window_needOverflowCorrection(ldmState->window, chunkEnd)) {
+        if (ZSTD_window_needOverflowCorrection(ldmState->window, 0, maxDist, ldmState->loadedDictEnd, chunkStart, chunkEnd)) {
            U32 const ldmHSize = 1U << params->hashLog;
            U32 const correction = ZSTD_window_correctOverflow(
                &ldmState->window, /* cycleLog */ 0, maxDist, chunkStart);
@ -622,12 +657,13 @@ void ZSTD_ldm_skipRawSeqStoreBytes(rawSeqStore_t* rawSeqStore, size_t nbBytes) {

 size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore,
    ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+    ZSTD_useRowMatchFinderMode_e useRowMatchFinder,
    void const* src, size_t srcSize)
 {
    const ZSTD_compressionParameters* const cParams = &ms->cParams;
    unsigned const minMatch = cParams->minMatch;
    ZSTD_blockCompressor const blockCompressor =
-        ZSTD_selectBlockCompressor(cParams->strategy, ZSTD_matchState_dictMode(ms));
+        ZSTD_selectBlockCompressor(cParams->strategy, useRowMatchFinder, ZSTD_matchState_dictMode(ms));
    /* Input bounds */
    BYTE const* const istart = (BYTE const*)src;
    BYTE const* const iend = istart + srcSize;
--- a/extlib/zstd/compress/zstd_ldm.h
+++ b/extlib/zstd/compress/zstd_ldm.h
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
 * All rights reserved.
 *
 * This source code is licensed under both the BSD-style license (found in the
@ -66,6 +66,7 @@ size_t ZSTD_ldm_generateSequences(
 */
 size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore,
            ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+            ZSTD_useRowMatchFinderMode_e useRowMatchFinder,
            void const* src, size_t srcSize);

 /**
--- a/extlib/zstd/compress/zstd_ldm_geartab.h
+++ b/extlib/zstd/compress/zstd_ldm_geartab.h
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
 * All rights reserved.
 *
 * This source code is licensed under both the BSD-style license (found in the
--- a/extlib/zstd/compress/zstd_opt.c
+++ b/extlib/zstd/compress/zstd_opt.c
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2021, Przemyslaw Skibinski, Yann Collet, Facebook, Inc.
+ * Copyright (c) Przemyslaw Skibinski, Yann Collet, Facebook, Inc.
 * All rights reserved.
 *
 * This source code is licensed under both the BSD-style license (found in the
--- a/extlib/zstd/compress/zstd_opt.h
+++ b/extlib/zstd/compress/zstd_opt.h
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
 * All rights reserved.
 *
 * This source code is licensed under both the BSD-style license (found in the
--- a/extlib/zstd/compress/zstdmt_compress.c
+++ b/extlib/zstd/compress/zstdmt_compress.c
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
 * All rights reserved.
 *
 * This source code is licensed under both the BSD-style license (found in the
@ -512,7 +512,7 @@ ZSTDMT_serialState_reset(serialState_t* serialState,
        if (dictSize > 0) {
            if (dictContentType == ZSTD_dct_rawContent) {
                BYTE const* const dictEnd = (const BYTE*)dict + dictSize;
-                ZSTD_window_update(&serialState->ldmState.window, dict, dictSize);
+                ZSTD_window_update(&serialState->ldmState.window, dict, dictSize, /* forceNonContiguous */ 0);
                ZSTD_ldm_fillHashTable(&serialState->ldmState, (const BYTE*)dict, dictEnd, &params.ldmParams);
                serialState->ldmState.loadedDictEnd = params.forceWindow ? 0 : (U32)(dictEnd - serialState->ldmState.window.base);
            } else {
@ -569,7 +569,7 @@ static void ZSTDMT_serialState_update(serialState_t* serialState,
            assert(seqStore.seq != NULL && seqStore.pos == 0 &&
                   seqStore.size == 0 && seqStore.capacity > 0);
            assert(src.size <= serialState->params.jobSize);
-            ZSTD_window_update(&serialState->ldmState.window, src.start, src.size);
+            ZSTD_window_update(&serialState->ldmState.window, src.start, src.size, /* forceNonContiguous */ 0);
            error = ZSTD_ldm_generateSequences(
                &serialState->ldmState, &seqStore,
                &serialState->params.ldmParams, src.start, src.size);
@ -695,6 +695,10 @@ static void ZSTDMT_compressionJob(void* jobDescription)
        {   size_t const forceWindowError = ZSTD_CCtxParams_setParameter(&jobParams, ZSTD_c_forceMaxWindow, !job->firstJob);
            if (ZSTD_isError(forceWindowError)) JOB_ERROR(forceWindowError);
        }
+        if (!job->firstJob) {
+            size_t const err = ZSTD_CCtxParams_setParameter(&jobParams, ZSTD_c_deterministicRefPrefix, 0);
+            if (ZSTD_isError(err)) JOB_ERROR(err);
+        }
        {   size_t const initError = ZSTD_compressBegin_advanced_internal(cctx,
                                        job->prefix.start, job->prefix.size, ZSTD_dct_rawContent, /* load dictionary in "content-only" mode (no header analysis) */
                                        ZSTD_dtlm_fast,
@ -750,6 +754,12 @@ static void ZSTDMT_compressionJob(void* jobDescription)
            if (ZSTD_isError(cSize)) JOB_ERROR(cSize);
            lastCBlockSize = cSize;
    }   }
+    if (!job->firstJob) {
+        /* Double check that we don't have an ext-dict, because then our
+         * repcode invalidation doesn't work.
+         */
+        assert(!ZSTD_window_hasExtDict(cctx->blockState.matchState.window));
+    }
    ZSTD_CCtx_trace(cctx, 0);

 _endJob:
@ -1240,9 +1250,8 @@ size_t ZSTDMT_initCStream_internal(

    if (params.rsyncable) {
        /* Aim for the targetsectionSize as the average job size. */
-        U32 const jobSizeMB = (U32)(mtctx->targetSectionSize >> 20);
-        U32 const rsyncBits = ZSTD_highbit32(jobSizeMB) + 20;
-        assert(jobSizeMB >= 1);
+        U32 const jobSizeKB = (U32)(mtctx->targetSectionSize >> 10);
+        U32 const rsyncBits = (assert(jobSizeKB >= 1), ZSTD_highbit32(jobSizeKB) + 10);
        DEBUGLOG(4, "rsyncLog = %u", rsyncBits);
        mtctx->rsync.hash = 0;
        mtctx->rsync.hitMask = (1ULL << rsyncBits) - 1;
--- a/extlib/zstd/compress/zstdmt_compress.h
+++ b/extlib/zstd/compress/zstdmt_compress.h
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
 * All rights reserved.
 *
 * This source code is licensed under both the BSD-style license (found in the
@ -32,11 +32,11 @@


 /* ===   Constants   === */
-#ifndef ZSTDMT_NBWORKERS_MAX
-#  define ZSTDMT_NBWORKERS_MAX 200
+#ifndef ZSTDMT_NBWORKERS_MAX /* a different value can be selected at compile time */
+#  define ZSTDMT_NBWORKERS_MAX ((sizeof(void*)==4) /*32-bit*/ ? 64 : 256)
 #endif
-#ifndef ZSTDMT_JOBSIZE_MIN
-#  define ZSTDMT_JOBSIZE_MIN (1 MB)
+#ifndef ZSTDMT_JOBSIZE_MIN   /* a different value can be selected at compile time */
+#  define ZSTDMT_JOBSIZE_MIN (512 KB)
 #endif
 #define ZSTDMT_JOBLOG_MAX   (MEM_32bits() ? 29 : 30)
 #define ZSTDMT_JOBSIZE_MAX  (MEM_32bits() ? (512 MB) : (1024 MB))
--- a/extlib/zstd/decompress/huf_decompress.c
+++ b/extlib/zstd/decompress/huf_decompress.c
@ -1,7 +1,7 @@
 /* ******************************************************************
 * huff0 huffman decoder,
 * part of Finite State Entropy library
- * Copyright (c) 2013-2021, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
 *
 *  You can contact the author at :
 *  - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy
@ -528,13 +528,15 @@ typedef rankValCol_t rankVal_t[HUF_TABLELOG_MAX];
 static void HUF_fillDTableX2Level2(HUF_DEltX2* DTable, U32 sizeLog, const U32 consumed,
                           const U32* rankValOrigin, const int minWeight,
                           const sortedSymbol_t* sortedSymbols, const U32 sortedListSize,
-                           U32 nbBitsBaseline, U16 baseSeq)
+                           U32 nbBitsBaseline, U16 baseSeq, U32* wksp, size_t wkspSize)
 {
    HUF_DEltX2 DElt;
-    U32 rankVal[HUF_TABLELOG_MAX + 1];
+    U32* rankVal = wksp;

+    assert(wkspSize >= HUF_TABLELOG_MAX + 1);
+    (void)wkspSize;
    /* get pre-calculated rankVal */
-    ZSTD_memcpy(rankVal, rankValOrigin, sizeof(rankVal));
+    ZSTD_memcpy(rankVal, rankValOrigin, sizeof(U32) * (HUF_TABLELOG_MAX + 1));

    /* fill skipped values */
    if (minWeight>1) {
@ -569,14 +571,18 @@ static void HUF_fillDTableX2Level2(HUF_DEltX2* DTable, U32 sizeLog, const U32 co
 static void HUF_fillDTableX2(HUF_DEltX2* DTable, const U32 targetLog,
                           const sortedSymbol_t* sortedList, const U32 sortedListSize,
                           const U32* rankStart, rankVal_t rankValOrigin, const U32 maxWeight,
-                           const U32 nbBitsBaseline)
+                           const U32 nbBitsBaseline, U32* wksp, size_t wkspSize)
 {
-    U32 rankVal[HUF_TABLELOG_MAX + 1];
+    U32* rankVal = wksp;
    const int scaleLog = nbBitsBaseline - targetLog;   /* note : targetLog >= srcLog, hence scaleLog <= 1 */
    const U32 minBits  = nbBitsBaseline - maxWeight;
    U32 s;

-    ZSTD_memcpy(rankVal, rankValOrigin, sizeof(rankVal));
+    assert(wkspSize >= HUF_TABLELOG_MAX + 1);
+    wksp += HUF_TABLELOG_MAX + 1;
+    wkspSize -= HUF_TABLELOG_MAX + 1;
+
+    ZSTD_memcpy(rankVal, rankValOrigin, sizeof(U32) * (HUF_TABLELOG_MAX + 1));

    /* fill DTable */
    for (s=0; s<sortedListSize; s++) {
@ -594,7 +600,7 @@ static void HUF_fillDTableX2(HUF_DEltX2* DTable, const U32 targetLog,
            HUF_fillDTableX2Level2(DTable+start, targetLog-nbBits, nbBits,
                           rankValOrigin[nbBits], minWeight,
                           sortedList+sortedRank, sortedListSize-sortedRank,
-                           nbBitsBaseline, symbol);
+                           nbBitsBaseline, symbol, wksp, wkspSize);
        } else {
            HUF_DEltX2 DElt;
            MEM_writeLE16(&(DElt.sequence), symbol);
@ -608,6 +614,15 @@ static void HUF_fillDTableX2(HUF_DEltX2* DTable, const U32 targetLog,
    }
 }

+typedef struct {
+    rankValCol_t rankVal[HUF_TABLELOG_MAX];
+    U32 rankStats[HUF_TABLELOG_MAX + 1];
+    U32 rankStart0[HUF_TABLELOG_MAX + 2];
+    sortedSymbol_t sortedSymbol[HUF_SYMBOLVALUE_MAX + 1];
+    BYTE weightList[HUF_SYMBOLVALUE_MAX + 1];
+    U32 calleeWksp[HUF_READ_STATS_WORKSPACE_SIZE_U32];
+} HUF_ReadDTableX2_Workspace;
+
 size_t HUF_readDTableX2_wksp(HUF_DTable* DTable,
                       const void* src, size_t srcSize,
                             void* workSpace, size_t wkspSize)
@ -620,47 +635,32 @@ size_t HUF_readDTableX2_wksp(HUF_DTable* DTable,
    HUF_DEltX2* const dt = (HUF_DEltX2*)dtPtr;
    U32 *rankStart;

-    rankValCol_t* rankVal;
-    U32* rankStats;
-    U32* rankStart0;
-    sortedSymbol_t* sortedSymbol;
-    BYTE* weightList;
-    size_t spaceUsed32 = 0;
+    HUF_ReadDTableX2_Workspace* const wksp = (HUF_ReadDTableX2_Workspace*)workSpace;

-    rankVal = (rankValCol_t *)((U32 *)workSpace + spaceUsed32);
-    spaceUsed32 += (sizeof(rankValCol_t) * HUF_TABLELOG_MAX) >> 2;
-    rankStats = (U32 *)workSpace + spaceUsed32;
-    spaceUsed32 += HUF_TABLELOG_MAX + 1;
-    rankStart0 = (U32 *)workSpace + spaceUsed32;
-    spaceUsed32 += HUF_TABLELOG_MAX + 2;
-    sortedSymbol = (sortedSymbol_t *)workSpace + (spaceUsed32 * sizeof(U32)) / sizeof(sortedSymbol_t);
-    spaceUsed32 += HUF_ALIGN(sizeof(sortedSymbol_t) * (HUF_SYMBOLVALUE_MAX + 1), sizeof(U32)) >> 2;
-    weightList = (BYTE *)((U32 *)workSpace + spaceUsed32);
-    spaceUsed32 += HUF_ALIGN(HUF_SYMBOLVALUE_MAX + 1, sizeof(U32)) >> 2;
+    if (sizeof(*wksp) > wkspSize) return ERROR(GENERIC);

-    if ((spaceUsed32 << 2) > wkspSize) return ERROR(tableLog_tooLarge);
-
-    rankStart = rankStart0 + 1;
-    ZSTD_memset(rankStats, 0, sizeof(U32) * (2 * HUF_TABLELOG_MAX + 2 + 1));
+    rankStart = wksp->rankStart0 + 1;
+    ZSTD_memset(wksp->rankStats, 0, sizeof(wksp->rankStats));
+    ZSTD_memset(wksp->rankStart0, 0, sizeof(wksp->rankStart0));

    DEBUG_STATIC_ASSERT(sizeof(HUF_DEltX2) == sizeof(HUF_DTable));   /* if compiler fails here, assertion is wrong */
    if (maxTableLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge);
    /* ZSTD_memset(weightList, 0, sizeof(weightList)); */  /* is not necessary, even though some analyzer complain ... */

-    iSize = HUF_readStats(weightList, HUF_SYMBOLVALUE_MAX + 1, rankStats, &nbSymbols, &tableLog, src, srcSize);
+    iSize = HUF_readStats_wksp(wksp->weightList, HUF_SYMBOLVALUE_MAX + 1, wksp->rankStats, &nbSymbols, &tableLog, src, srcSize, wksp->calleeWksp, sizeof(wksp->calleeWksp), /* bmi2 */ 0);
    if (HUF_isError(iSize)) return iSize;

    /* check result */
    if (tableLog > maxTableLog) return ERROR(tableLog_tooLarge);   /* DTable can't fit code depth */

    /* find maxWeight */
-    for (maxW = tableLog; rankStats[maxW]==0; maxW--) {}  /* necessarily finds a solution before 0 */
+    for (maxW = tableLog; wksp->rankStats[maxW]==0; maxW--) {}  /* necessarily finds a solution before 0 */

    /* Get start index of each weight */
    {   U32 w, nextRankStart = 0;
        for (w=1; w<maxW+1; w++) {
            U32 curr = nextRankStart;
-            nextRankStart += rankStats[w];
+            nextRankStart += wksp->rankStats[w];
            rankStart[w] = curr;
        }
        rankStart[0] = nextRankStart;   /* put all 0w symbols at the end of sorted list*/
@ -670,37 +670,38 @@ size_t HUF_readDTableX2_wksp(HUF_DTable* DTable,
    /* sort symbols by weight */
    {   U32 s;
        for (s=0; s<nbSymbols; s++) {
-            U32 const w = weightList[s];
+            U32 const w = wksp->weightList[s];
            U32 const r = rankStart[w]++;
-            sortedSymbol[r].symbol = (BYTE)s;
-            sortedSymbol[r].weight = (BYTE)w;
+            wksp->sortedSymbol[r].symbol = (BYTE)s;
+            wksp->sortedSymbol[r].weight = (BYTE)w;
        }
        rankStart[0] = 0;   /* forget 0w symbols; this is beginning of weight(1) */
    }

    /* Build rankVal */
-    {   U32* const rankVal0 = rankVal[0];
+    {   U32* const rankVal0 = wksp->rankVal[0];
        {   int const rescale = (maxTableLog-tableLog) - 1;   /* tableLog <= maxTableLog */
            U32 nextRankVal = 0;
            U32 w;
            for (w=1; w<maxW+1; w++) {
                U32 curr = nextRankVal;
-                nextRankVal += rankStats[w] << (w+rescale);
+                nextRankVal += wksp->rankStats[w] << (w+rescale);
                rankVal0[w] = curr;
        }   }
        {   U32 const minBits = tableLog+1 - maxW;
            U32 consumed;
            for (consumed = minBits; consumed < maxTableLog - minBits + 1; consumed++) {
-                U32* const rankValPtr = rankVal[consumed];
+                U32* const rankValPtr = wksp->rankVal[consumed];
                U32 w;
                for (w = 1; w < maxW+1; w++) {
                    rankValPtr[w] = rankVal0[w] >> consumed;
    }   }   }   }

    HUF_fillDTableX2(dt, maxTableLog,
-                   sortedSymbol, sizeOfSort,
-                   rankStart0, rankVal, maxW,
-                   tableLog+1);
+                   wksp->sortedSymbol, sizeOfSort,
+                   wksp->rankStart0, wksp->rankVal, maxW,
+                   tableLog+1,
+                   wksp->calleeWksp, sizeof(wksp->calleeWksp) / sizeof(U32));

    dtd.tableLog = (BYTE)maxTableLog;
    dtd.tableType = 1;
@ -1225,7 +1226,7 @@ size_t HUF_decompress1X1 (void* dst, size_t dstSize, const void* cSrc, size_t cS
    HUF_CREATE_STATIC_DTABLEX1(DTable, HUF_TABLELOG_MAX);
    return HUF_decompress1X1_DCtx (DTable, dst, dstSize, cSrc, cSrcSize);
 }
-#endif 
+#endif

 #ifndef HUF_FORCE_DECOMPRESS_X1
 size_t HUF_readDTableX2(HUF_DTable* DTable, const void* src, size_t srcSize)
--- a/extlib/zstd/decompress/zstd_ddict.c
+++ b/extlib/zstd/decompress/zstd_ddict.c
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
 * All rights reserved.
 *
 * This source code is licensed under both the BSD-style license (found in the
--- a/extlib/zstd/decompress/zstd_ddict.h
+++ b/extlib/zstd/decompress/zstd_ddict.h
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
 * All rights reserved.
 *
 * This source code is licensed under both the BSD-style license (found in the
--- a/extlib/zstd/decompress/zstd_decompress.c
+++ b/extlib/zstd/decompress/zstd_decompress.c
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
 * All rights reserved.
 *
 * This source code is licensed under both the BSD-style license (found in the
@ -58,7 +58,6 @@
 #include "../common/zstd_deps.h"   /* ZSTD_memcpy, ZSTD_memmove, ZSTD_memset */
 #include "../common/cpu.h"         /* bmi2 */
 #include "../common/mem.h"         /* low level memory routines */
-#include "../common/zstd_trace.h"
 #define FSE_STATIC_LINKING_ONLY
 #include "../common/fse.h"
 #define HUF_STATIC_LINKING_ONLY
@ -789,7 +788,7 @@ static size_t ZSTD_setRleBlock(void* dst, size_t dstCapacity,
 static void ZSTD_DCtx_trace_end(ZSTD_DCtx const* dctx, U64 uncompressedSize, U64 compressedSize, unsigned streaming)
 {
 #if ZSTD_TRACE
-    if (dctx->traceCtx) {
+    if (dctx->traceCtx && ZSTD_trace_decompress_end != NULL) {
        ZSTD_Trace trace;
        ZSTD_memset(&trace, 0, sizeof(trace));
        trace.version = ZSTD_VERSION_NUMBER;
@ -1384,7 +1383,7 @@ size_t ZSTD_decompressBegin(ZSTD_DCtx* dctx)
 {
    assert(dctx != NULL);
 #if ZSTD_TRACE
-    dctx->traceCtx = ZSTD_trace_decompress_begin(dctx);
+    dctx->traceCtx = (ZSTD_trace_decompress_begin != NULL) ? ZSTD_trace_decompress_begin(dctx) : 0;
 #endif
    dctx->expected = ZSTD_startingInputLength(dctx->format);  /* dctx->format must be properly set */
    dctx->stage = ZSTDds_getFrameHeaderSize;
--- a/extlib/zstd/decompress/zstd_decompress_block.c
+++ b/extlib/zstd/decompress/zstd_decompress_block.c
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
 * All rights reserved.
 *
 * This source code is licensed under both the BSD-style license (found in the
@ -658,7 +658,6 @@ typedef struct {
    size_t litLength;
    size_t matchLength;
    size_t offset;
-    const BYTE* match;
 } seq_t;

 typedef struct {
@ -672,9 +671,6 @@ typedef struct {
    ZSTD_fseState stateOffb;
    ZSTD_fseState stateML;
    size_t prevOffset[ZSTD_REP_NUM];
-    const BYTE* prefixStart;
-    const BYTE* dictEnd;
-    size_t pos;
 } seqState_t;

 /*! ZSTD_overlapCopy8() :
@ -936,10 +932,9 @@ ZSTD_updateFseStateWithDInfo(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD, ZSTD
        : 0)

 typedef enum { ZSTD_lo_isRegularOffset, ZSTD_lo_isLongOffset=1 } ZSTD_longOffset_e;
-typedef enum { ZSTD_p_noPrefetch=0, ZSTD_p_prefetch=1 } ZSTD_prefetch_e;

 FORCE_INLINE_TEMPLATE seq_t
-ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets, const ZSTD_prefetch_e prefetch)
+ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets)
 {
    seq_t seq;
    ZSTD_seqSymbol const llDInfo = seqState->stateLL.table[seqState->stateLL.state];
@ -1014,14 +1009,6 @@ ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets, c
    DEBUGLOG(6, "seq: litL=%u, matchL=%u, offset=%u",
                (U32)seq.litLength, (U32)seq.matchLength, (U32)seq.offset);

-    if (prefetch == ZSTD_p_prefetch) {
-        size_t const pos = seqState->pos + seq.litLength;
-        const BYTE* const matchBase = (seq.offset > pos) ? seqState->dictEnd : seqState->prefixStart;
-        seq.match = matchBase + pos - seq.offset;  /* note : this operation can overflow when seq.offset is really too large, which can only happen when input is corrupted.
-                                                    * No consequence though : no memory access will occur, offset is only used for prefetching */
-        seqState->pos = pos + seq.matchLength;
-    }
-
    /* ANS state update
     * gcc-9.0.0 does 2.5% worse with ZSTD_updateFseStateWithDInfo().
     * clang-9.2.0 does 7% worse with ZSTD_updateFseState().
@ -1122,7 +1109,6 @@ ZSTD_decompressSequences_body( ZSTD_DCtx* dctx,
    /* Regen sequences */
    if (nbSeq) {
        seqState_t seqState;
-        size_t error = 0;
        dctx->fseEntropy = 1;
        { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) seqState.prevOffset[i] = dctx->entropy.rep[i]; }
        RETURN_ERROR_IF(
@ -1156,13 +1142,14 @@ ZSTD_decompressSequences_body( ZSTD_DCtx* dctx,
         * If you see most cycles served out of the DSB you've hit the good case.
         * If it is pretty even then you may be in an okay case.
         *
-         * I've been able to reproduce this issue on the following CPUs:
+         * This issue has been reproduced on the following CPUs:
         *   - Kabylake: Macbook Pro (15-inch, 2019) 2.4 GHz Intel Core i9
         *               Use Instruments->Counters to get DSB/MITE cycles.
         *               I never got performance swings, but I was able to
         *               go from the good case of mostly DSB to half of the
         *               cycles served from MITE.
         *   - Coffeelake: Intel i9-9900k
+         *   - Coffeelake: Intel i7-9700k
         *
         * I haven't been able to reproduce the instability or DSB misses on any
         * of the following CPUS:
@ -1175,33 +1162,35 @@ ZSTD_decompressSequences_body( ZSTD_DCtx* dctx,
         *
         *   https://gist.github.com/terrelln/9889fc06a423fd5ca6e99351564473f4
         */
+        __asm__(".p2align 6");
+        __asm__("nop");
        __asm__(".p2align 5");
        __asm__("nop");
+#  if __GNUC__ >= 9
+        /* better for gcc-9 and gcc-10, worse for clang and gcc-8 */
+        __asm__(".p2align 3");
+#  else
        __asm__(".p2align 4");
+#  endif
 #endif
        for ( ; ; ) {
-            seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset, ZSTD_p_noPrefetch);
+            seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset);
            size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litEnd, prefixStart, vBase, dictEnd);
 #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
            assert(!ZSTD_isError(oneSeqSize));
            if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase);
 #endif
+            if (UNLIKELY(ZSTD_isError(oneSeqSize)))
+                return oneSeqSize;
            DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize);
-            BIT_reloadDStream(&(seqState.DStream));
            op += oneSeqSize;
-            /* gcc and clang both don't like early returns in this loop.
-             * Instead break and check for an error at the end of the loop.
-             */
-            if (UNLIKELY(ZSTD_isError(oneSeqSize))) {
-                error = oneSeqSize;
+            if (UNLIKELY(!--nbSeq))
                break;
-            }
-            if (UNLIKELY(!--nbSeq)) break;
+            BIT_reloadDStream(&(seqState.DStream));
        }

        /* check if reached exact end */
        DEBUGLOG(5, "ZSTD_decompressSequences_body: after decode loop, remaining nbSeq : %i", nbSeq);
-        if (ZSTD_isError(error)) return error;
        RETURN_ERROR_IF(nbSeq, corruption_detected, "");
        RETURN_ERROR_IF(BIT_reloadDStream(&seqState.DStream) < BIT_DStream_completed, corruption_detected, "");
        /* save reps for next block */
@ -1232,6 +1221,24 @@ ZSTD_decompressSequences_default(ZSTD_DCtx* dctx,
 #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */

 #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT
+
+FORCE_INLINE_TEMPLATE size_t
+ZSTD_prefetchMatch(size_t prefetchPos, seq_t const sequence,
+                   const BYTE* const prefixStart, const BYTE* const dictEnd)
+{
+    prefetchPos += sequence.litLength;
+    {   const BYTE* const matchBase = (sequence.offset > prefetchPos) ? dictEnd : prefixStart;
+        const BYTE* const match = matchBase + prefetchPos - sequence.offset; /* note : this operation can overflow when seq.offset is really too large, which can only happen when input is corrupted.
+                                                                              * No consequence though : memory address is only used for prefetching, not for dereferencing */
+        PREFETCH_L1(match); PREFETCH_L1(match+CACHELINE_SIZE);   /* note : it's safe to invoke PREFETCH() on any memory address, including invalid ones */
+    }
+    return prefetchPos + sequence.matchLength;
+}
+
+/* This decoding function employs prefetching
+ * to reduce latency impact of cache misses.
+ * It's generally employed when block contains a significant portion of long-distance matches
+ * or when coupled with a "cold" dictionary */
 FORCE_INLINE_TEMPLATE size_t
 ZSTD_decompressSequencesLong_body(
                               ZSTD_DCtx* dctx,
@ -1254,18 +1261,17 @@ ZSTD_decompressSequencesLong_body(

    /* Regen sequences */
    if (nbSeq) {
-#define STORED_SEQS 4
+#define STORED_SEQS 8
 #define STORED_SEQS_MASK (STORED_SEQS-1)
-#define ADVANCED_SEQS 4
+#define ADVANCED_SEQS STORED_SEQS
        seq_t sequences[STORED_SEQS];
        int const seqAdvance = MIN(nbSeq, ADVANCED_SEQS);
        seqState_t seqState;
        int seqNb;
+        size_t prefetchPos = (size_t)(op-prefixStart); /* track position relative to prefixStart */
+
        dctx->fseEntropy = 1;
        { int i; for (i=0; i<ZSTD_REP_NUM; i++) seqState.prevOffset[i] = dctx->entropy.rep[i]; }
-        seqState.prefixStart = prefixStart;
-        seqState.pos = (size_t)(op-prefixStart);
-        seqState.dictEnd = dictEnd;
        assert(dst != NULL);
        assert(iend >= ip);
        RETURN_ERROR_IF(
@ -1277,21 +1283,23 @@ ZSTD_decompressSequencesLong_body(

        /* prepare in advance */
        for (seqNb=0; (BIT_reloadDStream(&seqState.DStream) <= BIT_DStream_completed) && (seqNb<seqAdvance); seqNb++) {
-            sequences[seqNb] = ZSTD_decodeSequence(&seqState, isLongOffset, ZSTD_p_prefetch);
-            PREFETCH_L1(sequences[seqNb].match); PREFETCH_L1(sequences[seqNb].match + sequences[seqNb].matchLength - 1); /* note : it's safe to invoke PREFETCH() on any memory address, including invalid ones */
+            seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset);
+            prefetchPos = ZSTD_prefetchMatch(prefetchPos, sequence, prefixStart, dictEnd);
+            sequences[seqNb] = sequence;
        }
        RETURN_ERROR_IF(seqNb<seqAdvance, corruption_detected, "");

        /* decode and decompress */
        for ( ; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && (seqNb<nbSeq) ; seqNb++) {
-            seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset, ZSTD_p_prefetch);
+            seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset);
            size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequences[(seqNb-ADVANCED_SEQS) & STORED_SEQS_MASK], &litPtr, litEnd, prefixStart, dictStart, dictEnd);
 #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
            assert(!ZSTD_isError(oneSeqSize));
            if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequences[(seqNb-ADVANCED_SEQS) & STORED_SEQS_MASK], prefixStart, dictStart);
 #endif
            if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
-            PREFETCH_L1(sequence.match); PREFETCH_L1(sequence.match + sequence.matchLength - 1); /* note : it's safe to invoke PREFETCH() on any memory address, including invalid ones */
+
+            prefetchPos = ZSTD_prefetchMatch(prefetchPos, sequence, prefixStart, dictEnd);
            sequences[seqNb & STORED_SEQS_MASK] = sequence;
            op += oneSeqSize;
        }
--- a/extlib/zstd/decompress/zstd_decompress_block.h
+++ b/extlib/zstd/decompress/zstd_decompress_block.h
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
 * All rights reserved.
 *
 * This source code is licensed under both the BSD-style license (found in the
--- a/extlib/zstd/decompress/zstd_decompress_internal.h
+++ b/extlib/zstd/decompress/zstd_decompress_internal.h
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
 * All rights reserved.
 *
 * This source code is licensed under both the BSD-style license (found in the
@ -21,7 +21,6 @@
 *********************************************************/
 #include "../common/mem.h"             /* BYTE, U16, U32 */
 #include "../common/zstd_internal.h"   /* ZSTD_seqSymbol */
-#include "../common/zstd_trace.h"      /* ZSTD_TraceCtx */



--- a/extlib/zstd/deprecated/zbuff.h
+++ b/extlib/zstd/deprecated/zbuff.h
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
 * All rights reserved.
 *
 * This source code is licensed under both the BSD-style license (found in the
--- a/extlib/zstd/deprecated/zbuff_common.c
+++ b/extlib/zstd/deprecated/zbuff_common.c
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
 * All rights reserved.
 *
 * This source code is licensed under both the BSD-style license (found in the
--- a/extlib/zstd/deprecated/zbuff_compress.c
+++ b/extlib/zstd/deprecated/zbuff_compress.c
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
 * All rights reserved.
 *
 * This source code is licensed under both the BSD-style license (found in the
--- a/extlib/zstd/deprecated/zbuff_decompress.c
+++ b/extlib/zstd/deprecated/zbuff_decompress.c
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
 * All rights reserved.
 *
 * This source code is licensed under both the BSD-style license (found in the
--- a/extlib/zstd/dictBuilder/cover.c
+++ b/extlib/zstd/dictBuilder/cover.c
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
 * All rights reserved.
 *
 * This source code is licensed under both the BSD-style license (found in the
@ -26,15 +26,16 @@
 #include <string.h> /* memset */
 #include <time.h>   /* clock */

+#ifndef ZDICT_STATIC_LINKING_ONLY
+#  define ZDICT_STATIC_LINKING_ONLY
+#endif
+
 #include "../common/mem.h" /* read */
 #include "../common/pool.h"
 #include "../common/threading.h"
-#include "cover.h"
 #include "../common/zstd_internal.h" /* includes zstd.h */
-#ifndef ZDICT_STATIC_LINKING_ONLY
-#define ZDICT_STATIC_LINKING_ONLY
-#endif
-#include "zdict.h"
+#include "../zdict.h"
+#include "cover.h"

 /*-*************************************
 *  Constants
--- a/extlib/zstd/dictBuilder/cover.h
+++ b/extlib/zstd/dictBuilder/cover.h
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2021, Facebook, Inc.
+ * Copyright (c) Facebook, Inc.
 * All rights reserved.
 *
 * This source code is licensed under both the BSD-style license (found in the
@ -8,6 +8,10 @@
 * You may select, at your option, one of the above-listed licenses.
 */

+#ifndef ZDICT_STATIC_LINKING_ONLY
+#  define ZDICT_STATIC_LINKING_ONLY
+#endif
+
 #include <stdio.h>  /* fprintf */
 #include <stdlib.h> /* malloc, free, qsort */
 #include <string.h> /* memset */
@ -16,10 +20,7 @@
 #include "../common/pool.h"
 #include "../common/threading.h"
 #include "../common/zstd_internal.h" /* includes zstd.h */
-#ifndef ZDICT_STATIC_LINKING_ONLY
-#define ZDICT_STATIC_LINKING_ONLY
-#endif
-#include "zdict.h"
+#include "../zdict.h"

 /**
 * COVER_best_t is used for two purposes:
--- a/extlib/zstd/dictBuilder/fastcover.c
+++ b/extlib/zstd/dictBuilder/fastcover.c
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2021, Facebook, Inc.
+ * Copyright (c) Facebook, Inc.
 * All rights reserved.
 *
 * This source code is licensed under both the BSD-style license (found in the
@ -16,16 +16,17 @@
 #include <string.h> /* memset */
 #include <time.h>   /* clock */

+#ifndef ZDICT_STATIC_LINKING_ONLY
+#  define ZDICT_STATIC_LINKING_ONLY
+#endif
+
 #include "../common/mem.h" /* read */
 #include "../common/pool.h"
 #include "../common/threading.h"
-#include "cover.h"
 #include "../common/zstd_internal.h" /* includes zstd.h */
 #include "../compress/zstd_compress_internal.h" /* ZSTD_hash*() */
-#ifndef ZDICT_STATIC_LINKING_ONLY
-#define ZDICT_STATIC_LINKING_ONLY
-#endif
-#include "zdict.h"
+#include "../zdict.h"
+#include "cover.h"


 /*-*************************************
--- a/extlib/zstd/dictBuilder/zdict.c
+++ b/extlib/zstd/dictBuilder/zdict.c
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
 * All rights reserved.
 *
 * This source code is licensed under both the BSD-style license (found in the
@ -41,18 +41,19 @@
 #include <stdio.h>         /* fprintf, fopen, ftello64 */
 #include <time.h>          /* clock */

-#include "../common/mem.h"           /* read */
-#include "../common/fse.h"           /* FSE_normalizeCount, FSE_writeNCount */
-#define HUF_STATIC_LINKING_ONLY
-#include "../common/huf.h"           /* HUF_buildCTable, HUF_writeCTable */
-#include "../common/zstd_internal.h" /* includes zstd.h */
-#include "../common/xxhash.h"        /* XXH64 */
-#include "divsufsort.h"
 #ifndef ZDICT_STATIC_LINKING_ONLY
 #  define ZDICT_STATIC_LINKING_ONLY
 #endif
-#include "zdict.h"
+#define HUF_STATIC_LINKING_ONLY
+
+#include "../common/mem.h"           /* read */
+#include "../common/fse.h"           /* FSE_normalizeCount, FSE_writeNCount */
+#include "../common/huf.h"           /* HUF_buildCTable, HUF_writeCTable */
+#include "../common/zstd_internal.h" /* includes zstd.h */
+#include "../common/xxhash.h"        /* XXH64 */
 #include "../compress/zstd_compress_internal.h" /* ZSTD_loadCEntropy() */
+#include "../zdict.h"
+#include "divsufsort.h"


 /*-*************************************
--- a/extlib/zstd/dll/example/Makefile
+++ b/extlib/zstd/dll/example/Makefile
@ -1,5 +1,5 @@
 # ################################################################
-# Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
+# Copyright (c) Yann Collet, Facebook, Inc.
 # All rights reserved.
 #
 # This source code is licensed under both the BSD-style license (found in the
--- a/extlib/zstd/legacy/zstd_legacy.h
+++ b/extlib/zstd/legacy/zstd_legacy.h
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
 * All rights reserved.
 *
 * This source code is licensed under both the BSD-style license (found in the
--- a/extlib/zstd/legacy/zstd_v01.c
+++ b/extlib/zstd/legacy/zstd_v01.c
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
 * All rights reserved.
 *
 * This source code is licensed under both the BSD-style license (found in the
--- a/extlib/zstd/legacy/zstd_v01.h
+++ b/extlib/zstd/legacy/zstd_v01.h
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
 * All rights reserved.
 *
 * This source code is licensed under both the BSD-style license (found in the
--- a/extlib/zstd/legacy/zstd_v02.c
+++ b/extlib/zstd/legacy/zstd_v02.c
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
 * All rights reserved.
 *
 * This source code is licensed under both the BSD-style license (found in the
--- a/extlib/zstd/legacy/zstd_v02.h
+++ b/extlib/zstd/legacy/zstd_v02.h
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
 * All rights reserved.
 *
 * This source code is licensed under both the BSD-style license (found in the
--- a/extlib/zstd/legacy/zstd_v03.c
+++ b/extlib/zstd/legacy/zstd_v03.c
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
 * All rights reserved.
 *
 * This source code is licensed under both the BSD-style license (found in the
--- a/extlib/zstd/legacy/zstd_v03.h
+++ b/extlib/zstd/legacy/zstd_v03.h
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
 * All rights reserved.
 *
 * This source code is licensed under both the BSD-style license (found in the
--- a/extlib/zstd/legacy/zstd_v04.c
+++ b/extlib/zstd/legacy/zstd_v04.c
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
 * All rights reserved.
 *
 * This source code is licensed under both the BSD-style license (found in the
--- a/extlib/zstd/legacy/zstd_v04.h
+++ b/extlib/zstd/legacy/zstd_v04.h
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
 * All rights reserved.
 *
 * This source code is licensed under both the BSD-style license (found in the
--- a/extlib/zstd/legacy/zstd_v05.c
+++ b/extlib/zstd/legacy/zstd_v05.c
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
 * All rights reserved.
 *
 * This source code is licensed under both the BSD-style license (found in the
--- a/extlib/zstd/legacy/zstd_v05.h
+++ b/extlib/zstd/legacy/zstd_v05.h
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
 * All rights reserved.
 *
 * This source code is licensed under both the BSD-style license (found in the
--- a/extlib/zstd/legacy/zstd_v06.c
+++ b/extlib/zstd/legacy/zstd_v06.c
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
 * All rights reserved.
 *
 * This source code is licensed under both the BSD-style license (found in the
--- a/extlib/zstd/legacy/zstd_v06.h
+++ b/extlib/zstd/legacy/zstd_v06.h
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
 * All rights reserved.
 *
 * This source code is licensed under both the BSD-style license (found in the
--- a/extlib/zstd/legacy/zstd_v07.c
+++ b/extlib/zstd/legacy/zstd_v07.c
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
 * All rights reserved.
 *
 * This source code is licensed under both the BSD-style license (found in the
--- a/extlib/zstd/legacy/zstd_v07.h
+++ b/extlib/zstd/legacy/zstd_v07.h
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
 * All rights reserved.
 *
 * This source code is licensed under both the BSD-style license (found in the
--- a/extlib/zstd/dictBuilder/zdict.h
+++ b/extlib/zstd/dictBuilder/zdict.h
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
 * All rights reserved.
 *
 * This source code is licensed under both the BSD-style license (found in the
@ -36,6 +36,145 @@ extern "C" {
 #  define ZDICTLIB_API ZDICTLIB_VISIBILITY
 #endif

+/*******************************************************************************
+ * Zstd dictionary builder
+ *
+ * FAQ
+ * ===
+ * Why should I use a dictionary?
+ * ------------------------------
+ *
+ * Zstd can use dictionaries to improve compression ratio of small data.
+ * Traditionally small files don't compress well because there is very little
+ * repetion in a single sample, since it is small. But, if you are compressing
+ * many similar files, like a bunch of JSON records that share the same
+ * structure, you can train a dictionary on ahead of time on some samples of
+ * these files. Then, zstd can use the dictionary to find repetitions that are
+ * present across samples. This can vastly improve compression ratio.
+ *
+ * When is a dictionary useful?
+ * ----------------------------
+ *
+ * Dictionaries are useful when compressing many small files that are similar.
+ * The larger a file is, the less benefit a dictionary will have. Generally,
+ * we don't expect dictionary compression to be effective past 100KB. And the
+ * smaller a file is, the more we would expect the dictionary to help.
+ *
+ * How do I use a dictionary?
+ * --------------------------
+ *
+ * Simply pass the dictionary to the zstd compressor with
+ * `ZSTD_CCtx_loadDictionary()`. The same dictionary must then be passed to
+ * the decompressor, using `ZSTD_DCtx_loadDictionary()`. There are other
+ * more advanced functions that allow selecting some options, see zstd.h for
+ * complete documentation.
+ *
+ * What is a zstd dictionary?
+ * --------------------------
+ *
+ * A zstd dictionary has two pieces: Its header, and its content. The header
+ * contains a magic number, the dictionary ID, and entropy tables. These
+ * entropy tables allow zstd to save on header costs in the compressed file,
+ * which really matters for small data. The content is just bytes, which are
+ * repeated content that is common across many samples.
+ *
+ * What is a raw content dictionary?
+ * ---------------------------------
+ *
+ * A raw content dictionary is just bytes. It doesn't have a zstd dictionary
+ * header, a dictionary ID, or entropy tables. Any buffer is a valid raw
+ * content dictionary.
+ *
+ * How do I train a dictionary?
+ * ----------------------------
+ *
+ * Gather samples from your use case. These samples should be similar to each
+ * other. If you have several use cases, you could try to train one dictionary
+ * per use case.
+ *
+ * Pass those samples to `ZDICT_trainFromBuffer()` and that will train your
+ * dictionary. There are a few advanced versions of this function, but this
+ * is a great starting point. If you want to further tune your dictionary
+ * you could try `ZDICT_optimizeTrainFromBuffer_cover()`. If that is too slow
+ * you can try `ZDICT_optimizeTrainFromBuffer_fastCover()`.
+ *
+ * If the dictionary training function fails, that is likely because you
+ * either passed too few samples, or a dictionary would not be effective
+ * for your data. Look at the messages that the dictionary trainer printed,
+ * if it doesn't say too few samples, then a dictionary would not be effective.
+ *
+ * How large should my dictionary be?
+ * ----------------------------------
+ *
+ * A reasonable dictionary size, the `dictBufferCapacity`, is about 100KB.
+ * The zstd CLI defaults to a 110KB dictionary. You likely don't need a
+ * dictionary larger than that. But, most use cases can get away with a
+ * smaller dictionary. The advanced dictionary builders can automatically
+ * shrink the dictionary for you, and select a the smallest size that
+ * doesn't hurt compression ratio too much. See the `shrinkDict` parameter.
+ * A smaller dictionary can save memory, and potentially speed up
+ * compression.
+ *
+ * How many samples should I provide to the dictionary builder?
+ * ------------------------------------------------------------
+ *
+ * We generally recommend passing ~100x the size of the dictionary
+ * in samples. A few thousand should suffice. Having too few samples
+ * can hurt the dictionaries effectiveness. Having more samples will
+ * only improve the dictionaries effectiveness. But having too many
+ * samples can slow down the dictionary builder.
+ *
+ * How do I determine if a dictionary will be effective?
+ * -----------------------------------------------------
+ *
+ * Simply train a dictionary and try it out. You can use zstd's built in
+ * benchmarking tool to test the dictionary effectiveness.
+ *
+ *   # Benchmark levels 1-3 without a dictionary
+ *   zstd -b1e3 -r /path/to/my/files
+ *   # Benchmark levels 1-3 with a dictioanry
+ *   zstd -b1e3 -r /path/to/my/files -D /path/to/my/dictionary
+ *
+ * When should I retrain a dictionary?
+ * -----------------------------------
+ *
+ * You should retrain a dictionary when its effectiveness drops. Dictionary
+ * effectiveness drops as the data you are compressing changes. Generally, we do
+ * expect dictionaries to "decay" over time, as your data changes, but the rate
+ * at which they decay depends on your use case. Internally, we regularly
+ * retrain dictionaries, and if the new dictionary performs significantly
+ * better than the old dictionary, we will ship the new dictionary.
+ *
+ * I have a raw content dictionary, how do I turn it into a zstd dictionary?
+ * -------------------------------------------------------------------------
+ *
+ * If you have a raw content dictionary, e.g. by manually constructing it, or
+ * using a third-party dictionary builder, you can turn it into a zstd
+ * dictionary by using `ZDICT_finalizeDictionary()`. You'll also have to
+ * provide some samples of the data. It will add the zstd header to the
+ * raw content, which contains a dictionary ID and entropy tables, which
+ * will improve compression ratio, and allow zstd to write the dictionary ID
+ * into the frame, if you so choose.
+ *
+ * Do I have to use zstd's dictionary builder?
+ * -------------------------------------------
+ *
+ * No! You can construct dictionary content however you please, it is just
+ * bytes. It will always be valid as a raw content dictionary. If you want
+ * a zstd dictionary, which can improve compression ratio, use
+ * `ZDICT_finalizeDictionary()`.
+ *
+ * What is the attack surface of a zstd dictionary?
+ * ------------------------------------------------
+ *
+ * Zstd is heavily fuzz tested, including loading fuzzed dictionaries, so
+ * zstd should never crash, or access out-of-bounds memory no matter what
+ * the dictionary is. However, if an attacker can control the dictionary
+ * during decompression, they can cause zstd to generate arbitrary bytes,
+ * just like if they controlled the compressed data.
+ *
+ ******************************************************************************/
+

 /*! ZDICT_trainFromBuffer():
 *  Train a dictionary from an array of samples.
@ -64,7 +203,14 @@ ZDICTLIB_API size_t ZDICT_trainFromBuffer(void* dictBuffer, size_t dictBufferCap
 typedef struct {
    int      compressionLevel;   /*< optimize for a specific zstd compression level; 0 means default */
    unsigned notificationLevel;  /*< Write log to stderr; 0 = none (default); 1 = errors; 2 = progression; 3 = details; 4 = debug; */
-    unsigned dictID;             /*< force dictID value; 0 means auto mode (32-bits random value) */
+    unsigned dictID;             /*< force dictID value; 0 means auto mode (32-bits random value)
+                                  *   NOTE: The zstd format reserves some dictionary IDs for future use.
+                                  *         You may use them in private settings, but be warned that they
+                                  *         may be used by zstd in a public dictionary registry in the future.
+                                  *         These dictionary IDs are:
+                                  *           - low range  : <= 32767
+                                  *           - high range : >= (2^31)
+                                  */
 } ZDICT_params_t;

 /*! ZDICT_finalizeDictionary():
--- a/extlib/zstd/zstd.h
+++ b/extlib/zstd/zstd.h
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
 * All rights reserved.
 *
 * This source code is licensed under both the BSD-style license (found in the
@ -71,8 +71,8 @@ extern "C" {

 /*------   Version   ------*/
 #define ZSTD_VERSION_MAJOR    1
-#define ZSTD_VERSION_MINOR    4
-#define ZSTD_VERSION_RELEASE  9
+#define ZSTD_VERSION_MINOR    5
+#define ZSTD_VERSION_RELEASE  0
 #define ZSTD_VERSION_NUMBER  (ZSTD_VERSION_MAJOR *100*100 + ZSTD_VERSION_MINOR *100 + ZSTD_VERSION_RELEASE)

 /*! ZSTD_versionNumber() :
@ -109,7 +109,6 @@ ZSTDLIB_API const char* ZSTD_versionString(void);
 #define ZSTD_BLOCKSIZE_MAX     (1<<ZSTD_BLOCKSIZELOG_MAX)


-
 /***************************************
 *  Simple API
 ***************************************/
@ -166,7 +165,7 @@ ZSTDLIB_API unsigned long long ZSTD_getFrameContentSize(const void *src, size_t
 * @return : decompressed size of `src` frame content _if known and not empty_, 0 otherwise. */
 ZSTDLIB_API unsigned long long ZSTD_getDecompressedSize(const void* src, size_t srcSize);

-/*! ZSTD_findFrameCompressedSize() :
+/*! ZSTD_findFrameCompressedSize() : Requires v1.4.0+
 * `src` should point to the start of a ZSTD frame or skippable frame.
 * `srcSize` must be >= first frame size
 * @return : the compressed size of the first frame starting at `src`,
@ -180,8 +179,9 @@ ZSTDLIB_API size_t ZSTD_findFrameCompressedSize(const void* src, size_t srcSize)
 ZSTDLIB_API size_t      ZSTD_compressBound(size_t srcSize); /*!< maximum compressed size in worst case single-pass scenario */
 ZSTDLIB_API unsigned    ZSTD_isError(size_t code);          /*!< tells if a `size_t` function result is an error code */
 ZSTDLIB_API const char* ZSTD_getErrorName(size_t code);     /*!< provides readable string from an error code */
-ZSTDLIB_API int         ZSTD_minCLevel(void);               /*!< minimum negative compression level allowed */
+ZSTDLIB_API int         ZSTD_minCLevel(void);               /*!< minimum negative compression level allowed, requires v1.4.0+ */
 ZSTDLIB_API int         ZSTD_maxCLevel(void);               /*!< maximum compression level available */
+ZSTDLIB_API int         ZSTD_defaultCLevel(void);           /*!< default compression level, specified by ZSTD_CLEVEL_DEFAULT, requires v1.5.0+ */


 /***************************************
@ -199,7 +199,7 @@ ZSTDLIB_API int         ZSTD_maxCLevel(void);               /*!< maximum compres
 */
 typedef struct ZSTD_CCtx_s ZSTD_CCtx;
 ZSTDLIB_API ZSTD_CCtx* ZSTD_createCCtx(void);
-ZSTDLIB_API size_t     ZSTD_freeCCtx(ZSTD_CCtx* cctx);
+ZSTDLIB_API size_t     ZSTD_freeCCtx(ZSTD_CCtx* cctx);  /* accept NULL pointer */

 /*! ZSTD_compressCCtx() :
 *  Same as ZSTD_compress(), using an explicit ZSTD_CCtx.
@ -222,7 +222,7 @@ ZSTDLIB_API size_t ZSTD_compressCCtx(ZSTD_CCtx* cctx,
 *  Use one context per thread for parallel execution. */
 typedef struct ZSTD_DCtx_s ZSTD_DCtx;
 ZSTDLIB_API ZSTD_DCtx* ZSTD_createDCtx(void);
-ZSTDLIB_API size_t     ZSTD_freeDCtx(ZSTD_DCtx* dctx);
+ZSTDLIB_API size_t     ZSTD_freeDCtx(ZSTD_DCtx* dctx);  /* accept NULL pointer */

 /*! ZSTD_decompressDCtx() :
 *  Same as ZSTD_decompress(),
@ -234,9 +234,9 @@ ZSTDLIB_API size_t ZSTD_decompressDCtx(ZSTD_DCtx* dctx,
                                 const void* src, size_t srcSize);


-/***************************************
-*  Advanced compression API
-***************************************/
+/*********************************************
+*  Advanced compression API (Requires v1.4.0+)
+**********************************************/

 /* API design :
 *   Parameters are pushed one by one into an existing context,
@ -266,7 +266,6 @@ typedef enum { ZSTD_fast=1,
                         Only the order (from fast to strong) is guaranteed */
 } ZSTD_strategy;

-
 typedef enum {

    /* compression parameters
@ -332,7 +331,6 @@ typedef enum {
                              * The higher the value of selected strategy, the more complex it is,
                              * resulting in stronger and slower compression.
                              * Special: value 0 means "use default strategy". */
-
    /* LDM mode parameters */
    ZSTD_c_enableLongDistanceMatching=160, /* Enable long distance matching.
                                     * This parameter is designed to improve compression ratio
@ -389,7 +387,7 @@ typedef enum {
    ZSTD_c_jobSize=401,      /* Size of a compression job. This value is enforced only when nbWorkers >= 1.
                              * Each compression job is completed in parallel, so this value can indirectly impact the nb of active threads.
                              * 0 means default, which is dynamically determined based on compression parameters.
-                              * Job size must be a minimum of overlap size, or 1 MB, whichever is largest.
+                              * Job size must be a minimum of overlap size, or ZSTDMT_JOBSIZE_MIN (= 512 KB), whichever is largest.
                              * The minimum size is automatically and transparently enforced. */
    ZSTD_c_overlapLog=402,   /* Control the overlap size, as a fraction of window size.
                              * The overlap size is an amount of data reloaded from previous job at the beginning of a new job.
@ -419,6 +417,8 @@ typedef enum {
     * ZSTD_c_stableOutBuffer
     * ZSTD_c_blockDelimiters
     * ZSTD_c_validateSequences
+     * ZSTD_c_splitBlocks
+     * ZSTD_c_useRowMatchFinder
     * Because they are not stable, it's necessary to define ZSTD_STATIC_LINKING_ONLY to access them.
     * note : never ever use experimentalParam? names directly;
     *        also, the enums values themselves are unstable and can still change.
@ -434,7 +434,10 @@ typedef enum {
     ZSTD_c_experimentalParam9=1006,
     ZSTD_c_experimentalParam10=1007,
     ZSTD_c_experimentalParam11=1008,
-     ZSTD_c_experimentalParam12=1009
+     ZSTD_c_experimentalParam12=1009,
+     ZSTD_c_experimentalParam13=1010,
+     ZSTD_c_experimentalParam14=1011,
+     ZSTD_c_experimentalParam15=1012
 } ZSTD_cParameter;

 typedef struct {
@ -519,9 +522,9 @@ ZSTDLIB_API size_t ZSTD_compress2( ZSTD_CCtx* cctx,
                             const void* src, size_t srcSize);


-/***************************************
-*  Advanced decompression API
-***************************************/
+/***********************************************
+*  Advanced decompression API (Requires v1.4.0+)
+************************************************/

 /* The advanced API pushes parameters one by one into an existing DCtx context.
 * Parameters are sticky, and remain valid for all following frames
@ -667,7 +670,7 @@ typedef ZSTD_CCtx ZSTD_CStream;  /**< CCtx and CStream are now effectively same
                                 /* Continue to distinguish them for compatibility with older versions <= v1.2.0 */
 /*===== ZSTD_CStream management functions =====*/
 ZSTDLIB_API ZSTD_CStream* ZSTD_createCStream(void);
-ZSTDLIB_API size_t ZSTD_freeCStream(ZSTD_CStream* zcs);
+ZSTDLIB_API size_t ZSTD_freeCStream(ZSTD_CStream* zcs);  /* accept NULL pointer */

 /*===== Streaming compression functions =====*/
 typedef enum {
@ -683,7 +686,7 @@ typedef enum {
                        : note : multithreaded compression will block to flush as much output as possible. */
 } ZSTD_EndDirective;

-/*! ZSTD_compressStream2() :
+/*! ZSTD_compressStream2() : Requires v1.4.0+
 *  Behaves about the same as ZSTD_compressStream, with additional control on end directive.
 *  - Compression parameters are pushed into CCtx before starting compression, using ZSTD_CCtx_set*()
 *  - Compression parameters cannot be changed once compression is started (save a list of exceptions in multi-threading mode)
@ -729,11 +732,11 @@ ZSTDLIB_API size_t ZSTD_CStreamOutSize(void);   /**< recommended size for output


 /* *****************************************************************************
- * This following is a legacy streaming API.
+ * This following is a legacy streaming API, available since v1.0+ .
 * It can be replaced by ZSTD_CCtx_reset() and ZSTD_compressStream2().
 * It is redundant, but remains fully supported.
- * Advanced parameters and dictionary compression can only be used through the
- * new API.
+ * Streaming in combination with advanced parameters and dictionary compression
+ * can only be used through the new API.
 ******************************************************************************/

 /*!
@ -788,7 +791,7 @@ typedef ZSTD_DCtx ZSTD_DStream;  /**< DCtx and DStream are now effectively same
                                 /* For compatibility with versions <= v1.2.0, prefer differentiating them. */
 /*===== ZSTD_DStream management functions =====*/
 ZSTDLIB_API ZSTD_DStream* ZSTD_createDStream(void);
-ZSTDLIB_API size_t ZSTD_freeDStream(ZSTD_DStream* zds);
+ZSTDLIB_API size_t ZSTD_freeDStream(ZSTD_DStream* zds);  /* accept NULL pointer */

 /*===== Streaming decompression functions =====*/

@ -811,7 +814,7 @@ ZSTDLIB_API size_t ZSTD_DStreamOutSize(void);   /*!< recommended size for output
 /*! ZSTD_compress_usingDict() :
 *  Compression at an explicit compression level using a Dictionary.
 *  A dictionary can be any arbitrary data segment (also called a prefix),
- *  or a buffer with specified information (see dictBuilder/zdict.h).
+ *  or a buffer with specified information (see zdict.h).
 *  Note : This function loads the dictionary, resulting in significant startup delay.
 *         It's intended for a dictionary used only once.
 *  Note 2 : When `dict == NULL || dictSize < 8` no dictionary is used. */
@ -854,7 +857,8 @@ ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict(const void* dictBuffer, size_t dictSize
                                         int compressionLevel);

 /*! ZSTD_freeCDict() :
- *  Function frees memory allocated by ZSTD_createCDict(). */
+ *  Function frees memory allocated by ZSTD_createCDict().
+ *  If a NULL pointer is passed, no operation is performed. */
 ZSTDLIB_API size_t      ZSTD_freeCDict(ZSTD_CDict* CDict);

 /*! ZSTD_compress_usingCDict() :
@ -876,7 +880,8 @@ typedef struct ZSTD_DDict_s ZSTD_DDict;
 ZSTDLIB_API ZSTD_DDict* ZSTD_createDDict(const void* dictBuffer, size_t dictSize);

 /*! ZSTD_freeDDict() :
- *  Function frees memory allocated with ZSTD_createDDict() */
+ *  Function frees memory allocated with ZSTD_createDDict()
+ *  If a NULL pointer is passed, no operation is performed. */
 ZSTDLIB_API size_t      ZSTD_freeDDict(ZSTD_DDict* ddict);

 /*! ZSTD_decompress_usingDDict() :
@ -892,19 +897,25 @@ ZSTDLIB_API size_t ZSTD_decompress_usingDDict(ZSTD_DCtx* dctx,
 *  Dictionary helper functions
 *******************************/

-/*! ZSTD_getDictID_fromDict() :
+/*! ZSTD_getDictID_fromDict() : Requires v1.4.0+
 *  Provides the dictID stored within dictionary.
 *  if @return == 0, the dictionary is not conformant with Zstandard specification.
 *  It can still be loaded, but as a content-only dictionary. */
 ZSTDLIB_API unsigned ZSTD_getDictID_fromDict(const void* dict, size_t dictSize);

-/*! ZSTD_getDictID_fromDDict() :
+/*! ZSTD_getDictID_fromCDict() : Requires v1.5.0+
+ *  Provides the dictID of the dictionary loaded into `cdict`.
+ *  If @return == 0, the dictionary is not conformant to Zstandard specification, or empty.
+ *  Non-conformant dictionaries can still be loaded, but as content-only dictionaries. */
+ZSTDLIB_API unsigned ZSTD_getDictID_fromCDict(const ZSTD_CDict* cdict);
+
+/*! ZSTD_getDictID_fromDDict() : Requires v1.4.0+
 *  Provides the dictID of the dictionary loaded into `ddict`.
 *  If @return == 0, the dictionary is not conformant to Zstandard specification, or empty.
 *  Non-conformant dictionaries can still be loaded, but as content-only dictionaries. */
 ZSTDLIB_API unsigned ZSTD_getDictID_fromDDict(const ZSTD_DDict* ddict);

-/*! ZSTD_getDictID_fromFrame() :
+/*! ZSTD_getDictID_fromFrame() : Requires v1.4.0+
 *  Provides the dictID required to decompressed the frame stored within `src`.
 *  If @return == 0, the dictID could not be decoded.
 *  This could for one of the following reasons :
@ -918,7 +929,7 @@ ZSTDLIB_API unsigned ZSTD_getDictID_fromFrame(const void* src, size_t srcSize);


 /*******************************************************************************
- * Advanced dictionary and prefix API
+ * Advanced dictionary and prefix API (Requires v1.4.0+)
 *
 * This API allows dictionaries to be used with ZSTD_compress2(),
 * ZSTD_compressStream2(), and ZSTD_decompress(). Dictionaries are sticky, and
@ -927,7 +938,7 @@ ZSTDLIB_API unsigned ZSTD_getDictID_fromFrame(const void* src, size_t srcSize);
 ******************************************************************************/


-/*! ZSTD_CCtx_loadDictionary() :
+/*! ZSTD_CCtx_loadDictionary() : Requires v1.4.0+
 *  Create an internal CDict from `dict` buffer.
 *  Decompression will have to use same dictionary.
 * @result : 0, or an error code (which can be tested with ZSTD_isError()).
@ -946,7 +957,7 @@ ZSTDLIB_API unsigned ZSTD_getDictID_fromFrame(const void* src, size_t srcSize);
 *           to precisely select how dictionary content must be interpreted. */
 ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary(ZSTD_CCtx* cctx, const void* dict, size_t dictSize);

-/*! ZSTD_CCtx_refCDict() :
+/*! ZSTD_CCtx_refCDict() : Requires v1.4.0+
 *  Reference a prepared dictionary, to be used for all next compressed frames.
 *  Note that compression parameters are enforced from within CDict,
 *  and supersede any compression parameter previously set within CCtx.
@ -960,7 +971,7 @@ ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary(ZSTD_CCtx* cctx, const void* dict, s
 *  Note 2 : CDict is just referenced, its lifetime must outlive its usage within CCtx. */
 ZSTDLIB_API size_t ZSTD_CCtx_refCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict);

-/*! ZSTD_CCtx_refPrefix() :
+/*! ZSTD_CCtx_refPrefix() : Requires v1.4.0+
 *  Reference a prefix (single-usage dictionary) for next compressed frame.
 *  A prefix is **only used once**. Tables are discarded at end of frame (ZSTD_e_end).
 *  Decompression will need same prefix to properly regenerate data.
@ -981,7 +992,7 @@ ZSTDLIB_API size_t ZSTD_CCtx_refCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict);
 ZSTDLIB_API size_t ZSTD_CCtx_refPrefix(ZSTD_CCtx* cctx,
                                 const void* prefix, size_t prefixSize);

-/*! ZSTD_DCtx_loadDictionary() :
+/*! ZSTD_DCtx_loadDictionary() : Requires v1.4.0+
 *  Create an internal DDict from dict buffer,
 *  to be used to decompress next frames.
 *  The dictionary remains valid for all future frames, until explicitly invalidated.
@ -998,7 +1009,7 @@ ZSTDLIB_API size_t ZSTD_CCtx_refPrefix(ZSTD_CCtx* cctx,
 */
 ZSTDLIB_API size_t ZSTD_DCtx_loadDictionary(ZSTD_DCtx* dctx, const void* dict, size_t dictSize);

-/*! ZSTD_DCtx_refDDict() :
+/*! ZSTD_DCtx_refDDict() : Requires v1.4.0+
 *  Reference a prepared dictionary, to be used to decompress next frames.
 *  The dictionary remains active for decompression of future frames using same DCtx.
 *
@ -1016,7 +1027,7 @@ ZSTDLIB_API size_t ZSTD_DCtx_loadDictionary(ZSTD_DCtx* dctx, const void* dict, s
 */
 ZSTDLIB_API size_t ZSTD_DCtx_refDDict(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict);

-/*! ZSTD_DCtx_refPrefix() :
+/*! ZSTD_DCtx_refPrefix() : Requires v1.4.0+
 *  Reference a prefix (single-usage dictionary) to decompress next frame.
 *  This is the reverse operation of ZSTD_CCtx_refPrefix(),
 *  and must use the same prefix as the one used during compression.
@ -1037,7 +1048,7 @@ ZSTDLIB_API size_t ZSTD_DCtx_refPrefix(ZSTD_DCtx* dctx,

 /* ===   Memory management   === */

-/*! ZSTD_sizeof_*() :
+/*! ZSTD_sizeof_*() : Requires v1.4.0+
 *  These functions give the _current_ memory usage of selected object.
 *  Note that object memory usage can evolve (increase or decrease) over time. */
 ZSTDLIB_API size_t ZSTD_sizeof_CCtx(const ZSTD_CCtx* cctx);
@ -1062,6 +1073,28 @@ ZSTDLIB_API size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict);
 #if defined(ZSTD_STATIC_LINKING_ONLY) && !defined(ZSTD_H_ZSTD_STATIC_LINKING_ONLY)
 #define ZSTD_H_ZSTD_STATIC_LINKING_ONLY

+/* Deprecation warnings :
+ * Should these warnings be a problem, it is generally possible to disable them,
+ * typically with -Wno-deprecated-declarations for gcc or _CRT_SECURE_NO_WARNINGS in Visual.
+ * Otherwise, it's also possible to define ZSTD_DISABLE_DEPRECATE_WARNINGS.
+ */
+#ifdef ZSTD_DISABLE_DEPRECATE_WARNINGS
+#  define ZSTD_DEPRECATED(message) ZSTDLIB_API  /* disable deprecation warnings */
+#else
+#  if defined (__cplusplus) && (__cplusplus >= 201402) /* C++14 or greater */
+#    define ZSTD_DEPRECATED(message) [[deprecated(message)]] ZSTDLIB_API
+#  elif (defined(GNUC) && (GNUC > 4 || (GNUC == 4 && GNUC_MINOR >= 5))) || defined(__clang__)
+#    define ZSTD_DEPRECATED(message) ZSTDLIB_API __attribute__((deprecated(message)))
+#  elif defined(__GNUC__) && (__GNUC__ >= 3)
+#    define ZSTD_DEPRECATED(message) ZSTDLIB_API __attribute__((deprecated))
+#  elif defined(_MSC_VER)
+#    define ZSTD_DEPRECATED(message) ZSTDLIB_API __declspec(deprecated(message))
+#  else
+#    pragma message("WARNING: You need to implement ZSTD_DEPRECATED for this compiler")
+#    define ZSTD_DEPRECATED(message) ZSTDLIB_API
+#  endif
+#endif /* ZSTD_DISABLE_DEPRECATE_WARNINGS */
+
 /****************************************************************************************
 *   experimental API (static linking only)
 ****************************************************************************************
@ -1268,6 +1301,11 @@ typedef enum {
  ZSTD_lcm_uncompressed = 2   /**< Always emit uncompressed literals. */
 } ZSTD_literalCompressionMode_e;

+typedef enum {
+  ZSTD_urm_auto = 0,                   /* Automatically determine whether or not we use row matchfinder */
+  ZSTD_urm_disableRowMatchFinder = 1,  /* Never use row matchfinder */
+  ZSTD_urm_enableRowMatchFinder = 2    /* Always use row matchfinder when applicable */
+} ZSTD_useRowMatchFinderMode_e;

 /***************************************
 *  Frame size functions
@ -1538,13 +1576,14 @@ ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_advanced(const void* dict, size_t dictS
 * Note that the lifetime of such pool must exist while being used.
 * ZSTD_CCtx_refThreadPool assigns a thread pool to a context (use NULL argument value
 * to use an internal thread pool).
- * ZSTD_freeThreadPool frees a thread pool.
+ * ZSTD_freeThreadPool frees a thread pool, accepts NULL pointer.
 */
 typedef struct POOL_ctx_s ZSTD_threadPool;
 ZSTDLIB_API ZSTD_threadPool* ZSTD_createThreadPool(size_t numThreads);
-ZSTDLIB_API void ZSTD_freeThreadPool (ZSTD_threadPool* pool);
+ZSTDLIB_API void ZSTD_freeThreadPool (ZSTD_threadPool* pool);  /* accept NULL pointer */
 ZSTDLIB_API size_t ZSTD_CCtx_refThreadPool(ZSTD_CCtx* cctx, ZSTD_threadPool* pool);

+
 /*
 * This API is temporary and is expected to change or disappear in the future!
 */
@ -1561,6 +1600,7 @@ ZSTDLIB_API ZSTD_DDict* ZSTD_createDDict_advanced(
    ZSTD_dictContentType_e dictContentType,
    ZSTD_customMem customMem);

+
 /***************************************
 *  Advanced compression functions
 ***************************************/
@ -1573,12 +1613,6 @@ ZSTDLIB_API ZSTD_DDict* ZSTD_createDDict_advanced(
 *  note: equivalent to ZSTD_createCDict_advanced(), with dictLoadMethod==ZSTD_dlm_byRef */
 ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_byReference(const void* dictBuffer, size_t dictSize, int compressionLevel);

-/*! ZSTD_getDictID_fromCDict() :
- *  Provides the dictID of the dictionary loaded into `cdict`.
- *  If @return == 0, the dictionary is not conformant to Zstandard specification, or empty.
- *  Non-conformant dictionaries can still be loaded, but as content-only dictionaries. */
-ZSTDLIB_API unsigned ZSTD_getDictID_fromCDict(const ZSTD_CDict* cdict);
-
 /*! ZSTD_getCParams() :
 * @return ZSTD_compressionParameters structure for a selected compression level and estimated srcSize.
 * `estimatedSrcSize` value is optional, select 0 if not known */
@ -1605,18 +1639,20 @@ ZSTDLIB_API ZSTD_compressionParameters ZSTD_adjustCParams(ZSTD_compressionParame
 /*! ZSTD_compress_advanced() :
 *  Note : this function is now DEPRECATED.
 *         It can be replaced by ZSTD_compress2(), in combination with ZSTD_CCtx_setParameter() and other parameter setters.
- *  This prototype will be marked as deprecated and generate compilation warning on reaching v1.5.x */
-ZSTDLIB_API size_t ZSTD_compress_advanced(ZSTD_CCtx* cctx,
+ *  This prototype will generate compilation warnings. */
+ZSTD_DEPRECATED("use ZSTD_compress2")
+size_t ZSTD_compress_advanced(ZSTD_CCtx* cctx,
                                          void* dst, size_t dstCapacity,
                                    const void* src, size_t srcSize,
                                    const void* dict,size_t dictSize,
                                          ZSTD_parameters params);

 /*! ZSTD_compress_usingCDict_advanced() :
- *  Note : this function is now REDUNDANT.
+ *  Note : this function is now DEPRECATED.
 *         It can be replaced by ZSTD_compress2(), in combination with ZSTD_CCtx_loadDictionary() and other parameter setters.
- *  This prototype will be marked as deprecated and generate compilation warning in some future version */
-ZSTDLIB_API size_t ZSTD_compress_usingCDict_advanced(ZSTD_CCtx* cctx,
+ *  This prototype will generate compilation warnings. */
+ZSTD_DEPRECATED("use ZSTD_compress2 with ZSTD_CCtx_loadDictionary")
+size_t ZSTD_compress_usingCDict_advanced(ZSTD_CCtx* cctx,
                                              void* dst, size_t dstCapacity,
                                        const void* src, size_t srcSize,
                                        const ZSTD_CDict* cdict,
@ -1678,7 +1714,7 @@ ZSTDLIB_API size_t ZSTD_CCtx_refPrefix_advanced(ZSTD_CCtx* cctx, const void* pre

 /* Controls how the literals are compressed (default is auto).
 * The value must be of type ZSTD_literalCompressionMode_e.
- * See ZSTD_literalCompressionMode_t enum definition for details.
+ * See ZSTD_literalCompressionMode_e enum definition for details.
 */
 #define ZSTD_c_literalCompressionMode ZSTD_c_experimentalParam5

@ -1830,6 +1866,46 @@ ZSTDLIB_API size_t ZSTD_CCtx_refPrefix_advanced(ZSTD_CCtx* cctx, const void* pre
 */
 #define ZSTD_c_validateSequences ZSTD_c_experimentalParam12

+/* ZSTD_c_splitBlocks
+ * Default is 0 == disabled. Set to 1 to enable block splitting.
+ *
+ * Will attempt to split blocks in order to improve compression ratio at the cost of speed.
+ */
+#define ZSTD_c_splitBlocks ZSTD_c_experimentalParam13
+
+/* ZSTD_c_useRowMatchFinder
+ * Default is ZSTD_urm_auto.
+ * Controlled with ZSTD_useRowMatchFinderMode_e enum.
+ *
+ * By default, in ZSTD_urm_auto, when finalizing the compression parameters, the library
+ * will decide at runtime whether to use the row-based matchfinder based on support for SIMD
+ * instructions as well as the windowLog.
+ *
+ * Set to ZSTD_urm_disableRowMatchFinder to never use row-based matchfinder.
+ * Set to ZSTD_urm_enableRowMatchFinder to force usage of row-based matchfinder.
+ */
+#define ZSTD_c_useRowMatchFinder ZSTD_c_experimentalParam14
+
+/* ZSTD_c_deterministicRefPrefix
+ * Default is 0 == disabled. Set to 1 to enable.
+ *
+ * Zstd produces different results for prefix compression when the prefix is
+ * directly adjacent to the data about to be compressed vs. when it isn't.
+ * This is because zstd detects that the two buffers are contiguous and it can
+ * use a more efficient match finding algorithm. However, this produces different
+ * results than when the two buffers are non-contiguous. This flag forces zstd
+ * to always load the prefix in non-contiguous mode, even if it happens to be
+ * adjacent to the data, to guarantee determinism.
+ *
+ * If you really care about determinism when using a dictionary or prefix,
+ * like when doing delta compression, you should select this option. It comes
+ * at a speed penalty of about ~2.5% if the dictionary and data happened to be
+ * contiguous, and is free if they weren't contiguous. We don't expect that
+ * intentionally making the dictionary and data contiguous will be worth the
+ * cost to memcpy() the data.
+ */
+#define ZSTD_c_deterministicRefPrefix ZSTD_c_experimentalParam15
+
 /*! ZSTD_CCtx_getParameter() :
 *  Get the requested compression parameter value, selected by enum ZSTD_cParameter,
 *  and store it into int* value.
@ -1850,13 +1926,13 @@ ZSTDLIB_API size_t ZSTD_CCtx_getParameter(const ZSTD_CCtx* cctx, ZSTD_cParameter
 *                                    These parameters will be applied to
 *                                    all subsequent frames.
 *  - ZSTD_compressStream2() : Do compression using the CCtx.
- *  - ZSTD_freeCCtxParams() : Free the memory.
+ *  - ZSTD_freeCCtxParams() : Free the memory, accept NULL pointer.
 *
 *  This can be used with ZSTD_estimateCCtxSize_advanced_usingCCtxParams()
 *  for static allocation of CCtx for single-threaded compression.
 */
 ZSTDLIB_API ZSTD_CCtx_params* ZSTD_createCCtxParams(void);
-ZSTDLIB_API size_t ZSTD_freeCCtxParams(ZSTD_CCtx_params* params);
+ZSTDLIB_API size_t ZSTD_freeCCtxParams(ZSTD_CCtx_params* params);  /* accept NULL pointer */

 /*! ZSTD_CCtxParams_reset() :
 *  Reset params to default values.
@ -1875,7 +1951,7 @@ ZSTDLIB_API size_t ZSTD_CCtxParams_init(ZSTD_CCtx_params* cctxParams, int compre
 */
 ZSTDLIB_API size_t ZSTD_CCtxParams_init_advanced(ZSTD_CCtx_params* cctxParams, ZSTD_parameters params);

-/*! ZSTD_CCtxParams_setParameter() :
+/*! ZSTD_CCtxParams_setParameter() : Requires v1.4.0+
 *  Similar to ZSTD_CCtx_setParameter.
 *  Set one compression parameter, selected by enum ZSTD_cParameter.
 *  Parameters must be applied to a ZSTD_CCtx using
@ -2041,11 +2117,13 @@ ZSTDLIB_API size_t ZSTD_DCtx_getParameter(ZSTD_DCtx* dctx, ZSTD_dParameter param


 /*! ZSTD_DCtx_setFormat() :
+ *  This function is REDUNDANT. Prefer ZSTD_DCtx_setParameter().
 *  Instruct the decoder context about what kind of data to decode next.
 *  This instruction is mandatory to decode data without a fully-formed header,
 *  such ZSTD_f_zstd1_magicless for example.
 * @return : 0, or an error code (which can be tested using ZSTD_isError()). */
-ZSTDLIB_API size_t ZSTD_DCtx_setFormat(ZSTD_DCtx* dctx, ZSTD_format_e format);
+ZSTD_DEPRECATED("use ZSTD_DCtx_setParameter() instead")
+size_t ZSTD_DCtx_setFormat(ZSTD_DCtx* dctx, ZSTD_format_e format);

 /*! ZSTD_decompressStream_simpleArgs() :
 *  Same as ZSTD_decompressStream(),
@ -2069,7 +2147,7 @@ ZSTDLIB_API size_t ZSTD_decompressStream_simpleArgs (
 /*=====   Advanced Streaming compression functions  =====*/

 /*! ZSTD_initCStream_srcSize() :
- * This function is deprecated, and equivalent to:
+ * This function is DEPRECATED, and equivalent to:
 *     ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
 *     ZSTD_CCtx_refCDict(zcs, NULL); // clear the dictionary (if any)
 *     ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel);
@ -2078,15 +2156,15 @@ ZSTDLIB_API size_t ZSTD_decompressStream_simpleArgs (
 * pledgedSrcSize must be correct. If it is not known at init time, use
 * ZSTD_CONTENTSIZE_UNKNOWN. Note that, for compatibility with older programs,
 * "0" also disables frame content size field. It may be enabled in the future.
- * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x
+ * This prototype will generate compilation warnings.
 */
-ZSTDLIB_API size_t
-ZSTD_initCStream_srcSize(ZSTD_CStream* zcs,
+ZSTD_DEPRECATED("use ZSTD_CCtx_reset, see zstd.h for detailed instructions")
+size_t ZSTD_initCStream_srcSize(ZSTD_CStream* zcs,
                         int compressionLevel,
                         unsigned long long pledgedSrcSize);

 /*! ZSTD_initCStream_usingDict() :
- * This function is deprecated, and is equivalent to:
+ * This function is DEPRECATED, and is equivalent to:
 *     ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
 *     ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel);
 *     ZSTD_CCtx_loadDictionary(zcs, dict, dictSize);
@ -2095,15 +2173,15 @@ ZSTD_initCStream_srcSize(ZSTD_CStream* zcs,
 * dict == NULL or dictSize < 8, in which case no dict is used.
 * Note: dict is loaded with ZSTD_dct_auto (treated as a full zstd dictionary if
 * it begins with ZSTD_MAGIC_DICTIONARY, else as raw content) and ZSTD_dlm_byCopy.
- * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x
+ * This prototype will generate compilation warnings.
 */
-ZSTDLIB_API size_t
-ZSTD_initCStream_usingDict(ZSTD_CStream* zcs,
+ZSTD_DEPRECATED("use ZSTD_CCtx_reset, see zstd.h for detailed instructions")
+size_t ZSTD_initCStream_usingDict(ZSTD_CStream* zcs,
                     const void* dict, size_t dictSize,
                           int compressionLevel);

 /*! ZSTD_initCStream_advanced() :
- * This function is deprecated, and is approximately equivalent to:
+ * This function is DEPRECATED, and is approximately equivalent to:
 *     ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
 *     // Pseudocode: Set each zstd parameter and leave the rest as-is.
 *     for ((param, value) : params) {
@ -2115,23 +2193,24 @@ ZSTD_initCStream_usingDict(ZSTD_CStream* zcs,
 * dict is loaded with ZSTD_dct_auto and ZSTD_dlm_byCopy.
 * pledgedSrcSize must be correct.
 * If srcSize is not known at init time, use value ZSTD_CONTENTSIZE_UNKNOWN.
- * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x
+ * This prototype will generate compilation warnings.
 */
-ZSTDLIB_API size_t
-ZSTD_initCStream_advanced(ZSTD_CStream* zcs,
+ZSTD_DEPRECATED("use ZSTD_CCtx_reset, see zstd.h for detailed instructions")
+size_t ZSTD_initCStream_advanced(ZSTD_CStream* zcs,
                    const void* dict, size_t dictSize,
                          ZSTD_parameters params,
                          unsigned long long pledgedSrcSize);

 /*! ZSTD_initCStream_usingCDict() :
- * This function is deprecated, and equivalent to:
+ * This function is DEPRECATED, and equivalent to:
 *     ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
 *     ZSTD_CCtx_refCDict(zcs, cdict);
- *
+ * 
 * note : cdict will just be referenced, and must outlive compression session
- * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x
+ * This prototype will generate compilation warnings.
 */
-ZSTDLIB_API size_t ZSTD_initCStream_usingCDict(ZSTD_CStream* zcs, const ZSTD_CDict* cdict);
+ZSTD_DEPRECATED("use ZSTD_CCtx_reset and ZSTD_CCtx_refCDict, see zstd.h for detailed instructions")
+size_t ZSTD_initCStream_usingCDict(ZSTD_CStream* zcs, const ZSTD_CDict* cdict);

 /*! ZSTD_initCStream_usingCDict_advanced() :
 *   This function is DEPRECATED, and is approximately equivalent to:
@ -2146,18 +2225,21 @@ ZSTDLIB_API size_t ZSTD_initCStream_usingCDict(ZSTD_CStream* zcs, const ZSTD_CDi
 * same as ZSTD_initCStream_usingCDict(), with control over frame parameters.
 * pledgedSrcSize must be correct. If srcSize is not known at init time, use
 * value ZSTD_CONTENTSIZE_UNKNOWN.
- * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x
+ * This prototype will generate compilation warnings.
 */
-ZSTDLIB_API size_t
-ZSTD_initCStream_usingCDict_advanced(ZSTD_CStream* zcs,
+ZSTD_DEPRECATED("use ZSTD_CCtx_reset and ZSTD_CCtx_refCDict, see zstd.h for detailed instructions")
+size_t ZSTD_initCStream_usingCDict_advanced(ZSTD_CStream* zcs,
                               const ZSTD_CDict* cdict,
                                     ZSTD_frameParameters fParams,
                                     unsigned long long pledgedSrcSize);

 /*! ZSTD_resetCStream() :
- * This function is deprecated, and is equivalent to:
+ * This function is DEPRECATED, and is equivalent to:
 *     ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
 *     ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize);
+ * Note: ZSTD_resetCStream() interprets pledgedSrcSize == 0 as ZSTD_CONTENTSIZE_UNKNOWN, but
+ *       ZSTD_CCtx_setPledgedSrcSize() does not do the same, so ZSTD_CONTENTSIZE_UNKNOWN must be
+ *       explicitly specified.
 *
 *  start a new frame, using same parameters from previous frame.
 *  This is typically useful to skip dictionary loading stage, since it will re-use it in-place.
@ -2167,9 +2249,10 @@ ZSTD_initCStream_usingCDict_advanced(ZSTD_CStream* zcs,
 *  For the time being, pledgedSrcSize==0 is interpreted as "srcSize unknown" for compatibility with older programs,
 *  but it will change to mean "empty" in future version, so use macro ZSTD_CONTENTSIZE_UNKNOWN instead.
 * @return : 0, or an error code (which can be tested using ZSTD_isError())
- *  Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x
+ *  This prototype will generate compilation warnings.
 */
-ZSTDLIB_API size_t ZSTD_resetCStream(ZSTD_CStream* zcs, unsigned long long pledgedSrcSize);
+ZSTD_DEPRECATED("use ZSTD_CCtx_reset, see zstd.h for detailed instructions")
+size_t ZSTD_resetCStream(ZSTD_CStream* zcs, unsigned long long pledgedSrcSize);


 typedef struct {
@ -2256,8 +2339,7 @@ ZSTDLIB_API size_t ZSTD_resetDStream(ZSTD_DStream* zds);
  ZSTD_CCtx object can be re-used multiple times within successive compression operations.

  Start by initializing a context.
-  Use ZSTD_compressBegin(), or ZSTD_compressBegin_usingDict() for dictionary compression,
-  or ZSTD_compressBegin_advanced(), for finer parameter control.
+  Use ZSTD_compressBegin(), or ZSTD_compressBegin_usingDict() for dictionary compression.
  It's also possible to duplicate a reference context which has already been initialized, using ZSTD_copyCCtx()

  Then, consume your input using ZSTD_compressContinue().
@ -2282,15 +2364,17 @@ ZSTDLIB_API size_t ZSTD_resetDStream(ZSTD_DStream* zds);
 /*=====   Buffer-less streaming compression functions  =====*/
 ZSTDLIB_API size_t ZSTD_compressBegin(ZSTD_CCtx* cctx, int compressionLevel);
 ZSTDLIB_API size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel);
-ZSTDLIB_API size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, ZSTD_parameters params, unsigned long long pledgedSrcSize); /**< pledgedSrcSize : If srcSize is not known at init time, use ZSTD_CONTENTSIZE_UNKNOWN */
 ZSTDLIB_API size_t ZSTD_compressBegin_usingCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict); /**< note: fails if cdict==NULL */
-ZSTDLIB_API size_t ZSTD_compressBegin_usingCDict_advanced(ZSTD_CCtx* const cctx, const ZSTD_CDict* const cdict, ZSTD_frameParameters const fParams, unsigned long long const pledgedSrcSize);   /* compression parameters are already set within cdict. pledgedSrcSize must be correct. If srcSize is not known, use macro ZSTD_CONTENTSIZE_UNKNOWN */
 ZSTDLIB_API size_t ZSTD_copyCCtx(ZSTD_CCtx* cctx, const ZSTD_CCtx* preparedCCtx, unsigned long long pledgedSrcSize); /**<  note: if pledgedSrcSize is not known, use ZSTD_CONTENTSIZE_UNKNOWN */

 ZSTDLIB_API size_t ZSTD_compressContinue(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
 ZSTDLIB_API size_t ZSTD_compressEnd(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);

-
+/* The ZSTD_compressBegin_advanced() and ZSTD_compressBegin_usingCDict_advanced() are now DEPRECATED and will generate a compiler warning */
+ZSTD_DEPRECATED("use advanced API to access custom parameters")
+size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, ZSTD_parameters params, unsigned long long pledgedSrcSize); /**< pledgedSrcSize : If srcSize is not known at init time, use ZSTD_CONTENTSIZE_UNKNOWN */
+ZSTD_DEPRECATED("use advanced API to access custom parameters")
+size_t ZSTD_compressBegin_usingCDict_advanced(ZSTD_CCtx* const cctx, const ZSTD_CDict* const cdict, ZSTD_frameParameters const fParams, unsigned long long const pledgedSrcSize);   /* compression parameters are already set within cdict. pledgedSrcSize must be correct. If srcSize is not known, use macro ZSTD_CONTENTSIZE_UNKNOWN */
 /**
  Buffer-less streaming decompression (synchronous mode)

--- a/extlib/zstd/common/zstd_errors.h
+++ b/extlib/zstd/common/zstd_errors.h
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
 * All rights reserved.
 *
 * This source code is licensed under both the BSD-style license (found in the