Add xz decompression support

This adds xz decompression support from the kernel. Both compressing the barebox binary with xz and decompressing xz files on the commandline is supported. Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de>
2014-10-17 15:36:39 +02:00 · 2014-10-17 15:36:39 +02:00 · ccb2816477
parent 2f81d316d2
commit ccb2816477
19 changed files with 3777 additions and 0 deletions
--- a/common/Kconfig
+++ b/common/Kconfig
@ -582,6 +582,7 @@ choice
 	prompt "default compression for in-barebox binaries"
 	default DEFAULT_COMPRESSION_NONE if PBL_IMAGE
 	default DEFAULT_COMPRESSION_LZO if LZO_DECOMPRESS
 	default DEFAULT_COMPRESSION_XZ if XZ_DECOMPRESS
 	default DEFAULT_COMPRESSION_GZIP if ZLIB
 	default DEFAULT_COMPRESSION_LZ4 if LZ4_DECOMPRESS
 	default DEFAULT_COMPRESSION_BZIP2 if BZLIB
@ -606,6 +607,10 @@ config DEFAULT_COMPRESSION_LZ4
 	bool "lz4"
 	depends on LZ4_DECOMPRESS
 config DEFAULT_COMPRESSION_XZ
 	bool "xz"
 	depends on XZ_DECOMPRESS
 config DEFAULT_COMPRESSION_NONE
 	bool "no compression"
--- a/images/Makefile
+++ b/images/Makefile
@ -82,6 +82,7 @@ $(obj)/%.s: $(obj)/% FORCE
 suffix_$(CONFIG_IMAGE_COMPRESSION_GZIP) = gzip
 suffix_$(CONFIG_IMAGE_COMPRESSION_LZO)  = lzo
 suffix_$(CONFIG_IMAGE_COMPRESSION_LZ4)	= lz4
 suffix_$(CONFIG_IMAGE_COMPRESSION_XZKERN) = xzkern
 suffix_$(CONFIG_IMAGE_COMPRESSION_NONE) = shipped
 # barebox.z - compressed barebox binary
--- a/include/linux/xz.h
+++ b/include/linux/xz.h
@ -0,0 +1,271 @@
 /*
 * XZ decompressor
 *
 * Authors: Lasse Collin <lasse.collin@tukaani.org>
 *          Igor Pavlov <http://7-zip.org/>
 *
 * This file has been put into the public domain.
 * You can do whatever you want with this file.
 */
 #ifndef XZ_H
 #define XZ_H
 #ifdef __KERNEL__
 #	include <linux/stddef.h>
 #	include <linux/types.h>
 #else
 #	include <stddef.h>
 #	include <stdint.h>
 #endif
 /* In Linux, this is used to make extern functions static when needed. */
 #ifndef XZ_EXTERN
 #	define XZ_EXTERN extern
 #endif
 #ifndef STATIC
 #define STATIC
 #endif
 /**
 * enum xz_mode - Operation mode
 *
 * @XZ_SINGLE:              Single-call mode. This uses less RAM than
 *                          than multi-call modes, because the LZMA2
 *                          dictionary doesn't need to be allocated as
 *                          part of the decoder state. All required data
 *                          structures are allocated at initialization,
 *                          so xz_dec_run() cannot return XZ_MEM_ERROR.
 * @XZ_PREALLOC:            Multi-call mode with preallocated LZMA2
 *                          dictionary buffer. All data structures are
 *                          allocated at initialization, so xz_dec_run()
 *                          cannot return XZ_MEM_ERROR.
 * @XZ_DYNALLOC:            Multi-call mode. The LZMA2 dictionary is
 *                          allocated once the required size has been
 *                          parsed from the stream headers. If the
 *                          allocation fails, xz_dec_run() will return
 *                          XZ_MEM_ERROR.
 *
 * It is possible to enable support only for a subset of the above
 * modes at compile time by defining XZ_DEC_SINGLE, XZ_DEC_PREALLOC,
 * or XZ_DEC_DYNALLOC. The xz_dec kernel module is always compiled
 * with support for all operation modes, but the preboot code may
 * be built with fewer features to minimize code size.
 */
 enum xz_mode {
 	XZ_SINGLE,
 	XZ_PREALLOC,
 	XZ_DYNALLOC
 };
 /**
 * enum xz_ret - Return codes
 * @XZ_OK:                  Everything is OK so far. More input or more
 *                          output space is required to continue. This
 *                          return code is possible only in multi-call mode
 *                          (XZ_PREALLOC or XZ_DYNALLOC).
 * @XZ_STREAM_END:          Operation finished successfully.
 * @XZ_UNSUPPORTED_CHECK:   Integrity check type is not supported. Decoding
 *                          is still possible in multi-call mode by simply
 *                          calling xz_dec_run() again.
 *                          Note that this return value is used only if
 *                          XZ_DEC_ANY_CHECK was defined at build time,
 *                          which is not used in the kernel. Unsupported
 *                          check types return XZ_OPTIONS_ERROR if
 *                          XZ_DEC_ANY_CHECK was not defined at build time.
 * @XZ_MEM_ERROR:           Allocating memory failed. This return code is
 *                          possible only if the decoder was initialized
 *                          with XZ_DYNALLOC. The amount of memory that was
 *                          tried to be allocated was no more than the
 *                          dict_max argument given to xz_dec_init().
 * @XZ_MEMLIMIT_ERROR:      A bigger LZMA2 dictionary would be needed than
 *                          allowed by the dict_max argument given to
 *                          xz_dec_init(). This return value is possible
 *                          only in multi-call mode (XZ_PREALLOC or
 *                          XZ_DYNALLOC); the single-call mode (XZ_SINGLE)
 *                          ignores the dict_max argument.
 * @XZ_FORMAT_ERROR:        File format was not recognized (wrong magic
 *                          bytes).
 * @XZ_OPTIONS_ERROR:       This implementation doesn't support the requested
 *                          compression options. In the decoder this means
 *                          that the header CRC32 matches, but the header
 *                          itself specifies something that we don't support.
 * @XZ_DATA_ERROR:          Compressed data is corrupt.
 * @XZ_BUF_ERROR:           Cannot make any progress. Details are slightly
 *                          different between multi-call and single-call
 *                          mode; more information below.
 *
 * In multi-call mode, XZ_BUF_ERROR is returned when two consecutive calls
 * to XZ code cannot consume any input and cannot produce any new output.
 * This happens when there is no new input available, or the output buffer
 * is full while at least one output byte is still pending. Assuming your
 * code is not buggy, you can get this error only when decoding a compressed
 * stream that is truncated or otherwise corrupt.
 *
 * In single-call mode, XZ_BUF_ERROR is returned only when the output buffer
 * is too small or the compressed input is corrupt in a way that makes the
 * decoder produce more output than the caller expected. When it is
 * (relatively) clear that the compressed input is truncated, XZ_DATA_ERROR
 * is used instead of XZ_BUF_ERROR.
 */
 enum xz_ret {
 	XZ_OK,
 	XZ_STREAM_END,
 	XZ_UNSUPPORTED_CHECK,
 	XZ_MEM_ERROR,
 	XZ_MEMLIMIT_ERROR,
 	XZ_FORMAT_ERROR,
 	XZ_OPTIONS_ERROR,
 	XZ_DATA_ERROR,
 	XZ_BUF_ERROR
 };
 /**
 * struct xz_buf - Passing input and output buffers to XZ code
 * @in:         Beginning of the input buffer. This may be NULL if and only
 *              if in_pos is equal to in_size.
 * @in_pos:     Current position in the input buffer. This must not exceed
 *              in_size.
 * @in_size:    Size of the input buffer
 * @out:        Beginning of the output buffer. This may be NULL if and only
 *              if out_pos is equal to out_size.
 * @out_pos:    Current position in the output buffer. This must not exceed
 *              out_size.
 * @out_size:   Size of the output buffer
 *
 * Only the contents of the output buffer from out[out_pos] onward, and
 * the variables in_pos and out_pos are modified by the XZ code.
 */
 struct xz_buf {
 	const uint8_t *in;
 	size_t in_pos;
 	size_t in_size;
 	uint8_t *out;
 	size_t out_pos;
 	size_t out_size;
 };
 /**
 * struct xz_dec - Opaque type to hold the XZ decoder state
 */
 struct xz_dec;
 /**
 * xz_dec_init() - Allocate and initialize a XZ decoder state
 * @mode:       Operation mode
 * @dict_max:   Maximum size of the LZMA2 dictionary (history buffer) for
 *              multi-call decoding. This is ignored in single-call mode
 *              (mode == XZ_SINGLE). LZMA2 dictionary is always 2^n bytes
 *              or 2^n + 2^(n-1) bytes (the latter sizes are less common
 *              in practice), so other values for dict_max don't make sense.
 *              In the kernel, dictionary sizes of 64 KiB, 128 KiB, 256 KiB,
 *              512 KiB, and 1 MiB are probably the only reasonable values,
 *              except for kernel and initramfs images where a bigger
 *              dictionary can be fine and useful.
 *
 * Single-call mode (XZ_SINGLE): xz_dec_run() decodes the whole stream at
 * once. The caller must provide enough output space or the decoding will
 * fail. The output space is used as the dictionary buffer, which is why
 * there is no need to allocate the dictionary as part of the decoder's
 * internal state.
 *
 * Because the output buffer is used as the workspace, streams encoded using
 * a big dictionary are not a problem in single-call mode. It is enough that
 * the output buffer is big enough to hold the actual uncompressed data; it
 * can be smaller than the dictionary size stored in the stream headers.
 *
 * Multi-call mode with preallocated dictionary (XZ_PREALLOC): dict_max bytes
 * of memory is preallocated for the LZMA2 dictionary. This way there is no
 * risk that xz_dec_run() could run out of memory, since xz_dec_run() will
 * never allocate any memory. Instead, if the preallocated dictionary is too
 * small for decoding the given input stream, xz_dec_run() will return
 * XZ_MEMLIMIT_ERROR. Thus, it is important to know what kind of data will be
 * decoded to avoid allocating excessive amount of memory for the dictionary.
 *
 * Multi-call mode with dynamically allocated dictionary (XZ_DYNALLOC):
 * dict_max specifies the maximum allowed dictionary size that xz_dec_run()
 * may allocate once it has parsed the dictionary size from the stream
 * headers. This way excessive allocations can be avoided while still
 * limiting the maximum memory usage to a sane value to prevent running the
 * system out of memory when decompressing streams from untrusted sources.
 *
 * On success, xz_dec_init() returns a pointer to struct xz_dec, which is
 * ready to be used with xz_dec_run(). If memory allocation fails,
 * xz_dec_init() returns NULL.
 */
 XZ_EXTERN struct xz_dec *xz_dec_init(enum xz_mode mode, uint32_t dict_max);
 /**
 * xz_dec_run() - Run the XZ decoder
 * @s:          Decoder state allocated using xz_dec_init()
 * @b:          Input and output buffers
 *
 * The possible return values depend on build options and operation mode.
 * See enum xz_ret for details.
 *
 * Note that if an error occurs in single-call mode (return value is not
 * XZ_STREAM_END), b->in_pos and b->out_pos are not modified and the
 * contents of the output buffer from b->out[b->out_pos] onward are
 * undefined. This is true even after XZ_BUF_ERROR, because with some filter
 * chains, there may be a second pass over the output buffer, and this pass
 * cannot be properly done if the output buffer is truncated. Thus, you
 * cannot give the single-call decoder a too small buffer and then expect to
 * get that amount valid data from the beginning of the stream. You must use
 * the multi-call decoder if you don't want to uncompress the whole stream.
 */
 XZ_EXTERN enum xz_ret xz_dec_run(struct xz_dec *s, struct xz_buf *b);
 /**
 * xz_dec_reset() - Reset an already allocated decoder state
 * @s:          Decoder state allocated using xz_dec_init()
 *
 * This function can be used to reset the multi-call decoder state without
 * freeing and reallocating memory with xz_dec_end() and xz_dec_init().
 *
 * In single-call mode, xz_dec_reset() is always called in the beginning of
 * xz_dec_run(). Thus, explicit call to xz_dec_reset() is useful only in
 * multi-call mode.
 */
 XZ_EXTERN void xz_dec_reset(struct xz_dec *s);
 /**
 * xz_dec_end() - Free the memory allocated for the decoder state
 * @s:          Decoder state allocated using xz_dec_init(). If s is NULL,
 *              this function does nothing.
 */
 XZ_EXTERN void xz_dec_end(struct xz_dec *s);
 /*
 * Standalone build (userspace build or in-kernel build for boot time use)
 * needs a CRC32 implementation. For normal in-kernel use, kernel's own
 * CRC32 module is used instead, and users of this module don't need to
 * care about the functions below.
 */
 #ifndef XZ_INTERNAL_CRC32
 #	define XZ_INTERNAL_CRC32 1
 #endif
 #if XZ_INTERNAL_CRC32
 /*
 * This must be called before any other xz_* function to initialize
 * the CRC32 lookup table.
 */
 XZ_EXTERN void xz_crc32_init(void);
 /*
 * Update CRC32 value using the polynomial from IEEE-802.3. To start a new
 * calculation, the third argument must be zero. To continue the calculation,
 * the previously returned value is passed as the third argument.
 */
 XZ_EXTERN uint32_t xz_crc32(const uint8_t *buf, size_t size, uint32_t crc);
 #endif
 STATIC int decompress_unxz(unsigned char *in, long in_size,
 		     long (*fill)(void *dest, unsigned long size),
 		     long (*flush)(void *src, unsigned long size),
 		     unsigned char *out, long *in_used,
 		     void (*error)(char *x));
 #endif
--- a/lib/Kconfig
+++ b/lib/Kconfig
@ -18,6 +18,10 @@ config LZ4_DECOMPRESS
 	bool "include lz4 uncompression support"
 	select UNCOMPRESS
 config XZ_DECOMPRESS
 	bool "include xz uncompression support"
 	select UNCOMPRESS
 config GENERIC_FIND_NEXT_BIT
 	def_bool n
--- a/lib/Makefile
+++ b/lib/Makefile
@ -20,6 +20,7 @@ obj-y			+= make_directory.o
 obj-y			+= math.o
 obj-$(CONFIG_BZLIB)	+= decompress_bunzip2.o
 obj-$(CONFIG_ZLIB)	+= decompress_inflate.o zlib_inflate/
 obj-$(CONFIG_XZ_DECOMPRESS) += decompress_unxz.o xz/
 obj-$(CONFIG_CMDLINE_EDITING)	+= readline.o
 obj-$(CONFIG_SIMPLE_READLINE)	+= readline_simple.o
 obj-$(CONFIG_GLOB)		+= fnmatch.o
--- a/lib/decompress_unxz.c
+++ b/lib/decompress_unxz.c
@ -0,0 +1,376 @@
 /*
 * Wrapper for decompressing XZ-compressed kernel, initramfs, and initrd
 *
 * Author: Lasse Collin <lasse.collin@tukaani.org>
 *
 * This file has been put into the public domain.
 * You can do whatever you want with this file.
 */
 /*
 * Important notes about in-place decompression
 *
 * At least on x86, the kernel is decompressed in place: the compressed data
 * is placed to the end of the output buffer, and the decompressor overwrites
 * most of the compressed data. There must be enough safety margin to
 * guarantee that the write position is always behind the read position.
 *
 * The safety margin for XZ with LZMA2 or BCJ+LZMA2 is calculated below.
 * Note that the margin with XZ is bigger than with Deflate (gzip)!
 *
 * The worst case for in-place decompression is that the beginning of
 * the file is compressed extremely well, and the rest of the file is
 * uncompressible. Thus, we must look for worst-case expansion when the
 * compressor is encoding uncompressible data.
 *
 * The structure of the .xz file in case of a compresed kernel is as follows.
 * Sizes (as bytes) of the fields are in parenthesis.
 *
 *    Stream Header (12)
 *    Block Header:
 *      Block Header (8-12)
 *      Compressed Data (N)
 *      Block Padding (0-3)
 *      CRC32 (4)
 *    Index (8-20)
 *    Stream Footer (12)
 *
 * Normally there is exactly one Block, but let's assume that there are
 * 2-4 Blocks just in case. Because Stream Header and also Block Header
 * of the first Block don't make the decompressor produce any uncompressed
 * data, we can ignore them from our calculations. Block Headers of possible
 * additional Blocks have to be taken into account still. With these
 * assumptions, it is safe to assume that the total header overhead is
 * less than 128 bytes.
 *
 * Compressed Data contains LZMA2 or BCJ+LZMA2 encoded data. Since BCJ
 * doesn't change the size of the data, it is enough to calculate the
 * safety margin for LZMA2.
 *
 * LZMA2 stores the data in chunks. Each chunk has a header whose size is
 * a maximum of 6 bytes, but to get round 2^n numbers, let's assume that
 * the maximum chunk header size is 8 bytes. After the chunk header, there
 * may be up to 64 KiB of actual payload in the chunk. Often the payload is
 * quite a bit smaller though; to be safe, let's assume that an average
 * chunk has only 32 KiB of payload.
 *
 * The maximum uncompressed size of the payload is 2 MiB. The minimum
 * uncompressed size of the payload is in practice never less than the
 * payload size itself. The LZMA2 format would allow uncompressed size
 * to be less than the payload size, but no sane compressor creates such
 * files. LZMA2 supports storing uncompressible data in uncompressed form,
 * so there's never a need to create payloads whose uncompressed size is
 * smaller than the compressed size.
 *
 * The assumption, that the uncompressed size of the payload is never
 * smaller than the payload itself, is valid only when talking about
 * the payload as a whole. It is possible that the payload has parts where
 * the decompressor consumes more input than it produces output. Calculating
 * the worst case for this would be tricky. Instead of trying to do that,
 * let's simply make sure that the decompressor never overwrites any bytes
 * of the payload which it is currently reading.
 *
 * Now we have enough information to calculate the safety margin. We need
 *   - 128 bytes for the .xz file format headers;
 *   - 8 bytes per every 32 KiB of uncompressed size (one LZMA2 chunk header
 *     per chunk, each chunk having average payload size of 32 KiB); and
 *   - 64 KiB (biggest possible LZMA2 chunk payload size) to make sure that
 *     the decompressor never overwrites anything from the LZMA2 chunk
 *     payload it is currently reading.
 *
 * We get the following formula:
 *
 *    safety_margin = 128 + uncompressed_size * 8 / 32768 + 65536
 *                  = 128 + (uncompressed_size >> 12) + 65536
 *
 * For comparison, according to arch/x86/boot/compressed/misc.c, the
 * equivalent formula for Deflate is this:
 *
 *    safety_margin = 18 + (uncompressed_size >> 12) + 32768
 *
 * Thus, when updating Deflate-only in-place kernel decompressor to
 * support XZ, the fixed overhead has to be increased from 18+32768 bytes
 * to 128+65536 bytes.
 */
 /*
 * STATIC is defined to "static" if we are being built for kernel
 * decompression (pre-boot code). <linux/decompress/mm.h> will define
 * STATIC to empty if it wasn't already defined. Since we will need to
 * know later if we are being used for kernel decompression, we define
 * XZ_PREBOOT here.
 */
 #ifdef STATIC
 #	define XZ_PREBOOT
 #endif
 #ifdef __KERNEL__
 #	include <linux/decompress/mm.h>
 #endif
 #define XZ_EXTERN STATIC
 #ifndef XZ_PREBOOT
 #	include <malloc.h>
 #	include <linux/xz.h>
 #else
 /*
 * Use the internal CRC32 code instead of kernel's CRC32 module, which
 * is not available in early phase of booting.
 */
 #define XZ_INTERNAL_CRC32 1
 /*
 * For boot time use, we enable only the BCJ filter of the current
 * architecture or none if no BCJ filter is available for the architecture.
 */
 #ifdef CONFIG_X86
 #	define XZ_DEC_X86
 #endif
 #ifdef CONFIG_PPC
 #	define XZ_DEC_POWERPC
 #endif
 #ifdef CONFIG_ARM
 #	define XZ_DEC_ARM
 #endif
 #ifdef CONFIG_IA64
 #	define XZ_DEC_IA64
 #endif
 #ifdef CONFIG_SPARC
 #	define XZ_DEC_SPARC
 #endif
 /*
 * This will get the basic headers so that memeq() and others
 * can be defined.
 */
 #include "xz/xz_private.h"
 /*
 * Replace the normal allocation functions with the versions from
 * <linux/decompress/mm.h>. vfree() needs to support vfree(NULL)
 * when XZ_DYNALLOC is used, but the pre-boot free() doesn't support it.
 * Workaround it here because the other decompressors don't need it.
 */
 #undef kmalloc
 #undef kfree
 #undef vmalloc
 #undef vfree
 #define kmalloc(size, flags) malloc(size)
 #define kfree(ptr) free(ptr)
 #define vmalloc(size) malloc(size)
 #define vfree(ptr) do { if (ptr != NULL) free(ptr); } while (0)
 /*
 * FIXME: Not all basic memory functions are provided in architecture-specific
 * files (yet). We define our own versions here for now, but this should be
 * only a temporary solution.
 *
 * memeq and memzero are not used much and any remotely sane implementation
 * is fast enough. memcpy/memmove speed matters in multi-call mode, but
 * the kernel image is decompressed in single-call mode, in which only
 * memcpy speed can matter and only if there is a lot of uncompressible data
 * (LZMA2 stores uncompressible chunks in uncompressed form). Thus, the
 * functions below should just be kept small; it's probably not worth
 * optimizing for speed.
 */
 #ifndef memeq
 static bool memeq(const void *a, const void *b, size_t size)
 {
 	const uint8_t *x = a;
 	const uint8_t *y = b;
 	size_t i;
 	for (i = 0; i < size; ++i)
 		if (x[i] != y[i])
 			return false;
 	return true;
 }
 #endif
 #ifndef memzero
 static void memzero(void *buf, size_t size)
 {
 	uint8_t *b = buf;
 	uint8_t *e = b + size;
 	while (b != e)
 		*b++ = '\0';
 }
 #endif
 /*
 * Since we need memmove anyway, would use it as memcpy too.
 * Commented out for now to avoid breaking things.
 */
 /*
 #ifndef memcpy
 #	define memcpy memmove
 #endif
 */
 #include "xz/xz_crc32.c"
 #include "xz/xz_dec_stream.c"
 #include "xz/xz_dec_lzma2.c"
 #include "xz/xz_dec_bcj.c"
 #endif /* XZ_PREBOOT */
 /* Size of the input and output buffers in multi-call mode */
 #define XZ_IOBUF_SIZE 4096
 /*
 * This function implements the API defined in <linux/decompress/generic.h>.
 *
 * This wrapper will automatically choose single-call or multi-call mode
 * of the native XZ decoder API. The single-call mode can be used only when
 * both input and output buffers are available as a single chunk, i.e. when
 * fill() and flush() won't be used.
 */
 STATIC int decompress_unxz(unsigned char *in, long in_size,
 		     long (*fill)(void *dest, unsigned long size),
 		     long (*flush)(void *src, unsigned long size),
 		     unsigned char *out, long *in_used,
 		     void (*error)(char *x))
 {
 	struct xz_buf b;
 	struct xz_dec *s;
 	enum xz_ret ret;
 	bool must_free_in = false;
 #if XZ_INTERNAL_CRC32
 	xz_crc32_init();
 #endif
 	if (in_used != NULL)
 		*in_used = 0;
 	if (fill == NULL && flush == NULL)
 		s = xz_dec_init(XZ_SINGLE, 0);
 	else
 		s = xz_dec_init(XZ_DYNALLOC, (uint32_t)-1);
 	if (s == NULL)
 		goto error_alloc_state;
 	if (flush == NULL) {
 		b.out = out;
 		b.out_size = (size_t)-1;
 	} else {
 		b.out_size = XZ_IOBUF_SIZE;
 		b.out = malloc(XZ_IOBUF_SIZE);
 		if (b.out == NULL)
 			goto error_alloc_out;
 	}
 	if (in == NULL) {
 		must_free_in = true;
 		in = malloc(XZ_IOBUF_SIZE);
 		if (in == NULL)
 			goto error_alloc_in;
 	}
 	b.in = in;
 	b.in_pos = 0;
 	b.in_size = in_size;
 	b.out_pos = 0;
 	if (fill == NULL && flush == NULL) {
 		ret = xz_dec_run(s, &b);
 	} else {
 		do {
 			if (b.in_pos == b.in_size && fill != NULL) {
 				if (in_used != NULL)
 					*in_used += b.in_pos;
 				b.in_pos = 0;
 				in_size = fill(in, XZ_IOBUF_SIZE);
 				if (in_size < 0) {
 					/*
 					 * This isn't an optimal error code
 					 * but it probably isn't worth making
 					 * a new one either.
 					 */
 					ret = XZ_BUF_ERROR;
 					break;
 				}
 				b.in_size = in_size;
 			}
 			ret = xz_dec_run(s, &b);
 			if (flush != NULL && (b.out_pos == b.out_size
 					|| (ret != XZ_OK && b.out_pos > 0))) {
 				/*
 				 * Setting ret here may hide an error
 				 * returned by xz_dec_run(), but probably
 				 * it's not too bad.
 				 */
 				if (flush(b.out, b.out_pos) != (long)b.out_pos)
 					ret = XZ_BUF_ERROR;
 				b.out_pos = 0;
 			}
 		} while (ret == XZ_OK);
 		if (must_free_in)
 			free(in);
 		if (flush != NULL)
 			free(b.out);
 	}
 	if (in_used != NULL)
 		*in_used += b.in_pos;
 	xz_dec_end(s);
 	switch (ret) {
 	case XZ_STREAM_END:
 		return 0;
 	case XZ_MEM_ERROR:
 		/* This can occur only in multi-call mode. */
 		error("XZ decompressor ran out of memory");
 		break;
 	case XZ_FORMAT_ERROR:
 		error("Input is not in the XZ format (wrong magic bytes)");
 		break;
 	case XZ_OPTIONS_ERROR:
 		error("Input was encoded with settings that are not "
 				"supported by this XZ decoder");
 		break;
 	case XZ_DATA_ERROR:
 	case XZ_BUF_ERROR:
 		error("XZ-compressed data is corrupt");
 		break;
 	default:
 		error("Bug in the XZ decompressor");
 		break;
 	}
 	return -1;
 error_alloc_in:
 	if (flush != NULL)
 		free(b.out);
 error_alloc_out:
 	xz_dec_end(s);
 error_alloc_state:
 	error("XZ decompressor ran out of memory");
 	return -1;
 }
 /*
 * This macro is used by architecture-specific files to decompress
 * the kernel image.
 */
 #define decompress decompress_unxz
--- a/lib/uncompress.c
+++ b/lib/uncompress.c
@ -21,6 +21,7 @@
 #include <bunzip2.h>
 #include <gunzip.h>
 #include <lzo.h>
 #include <linux/xz.h>
 #include <linux/decompress/unlz4.h>
 #include <errno.h>
 #include <filetype.h>
@ -117,6 +118,11 @@ int uncompress(unsigned char *inbuf, int len,
 	case filetype_lz4_compressed:
 		compfn = decompress_unlz4;
 		break;
 #endif
 #ifdef CONFIG_XZ_DECOMPRESS
 	case filetype_xz_compressed:
 		compfn = decompress_unxz;
 		break;
 #endif
 	default:
 		err = asprintf("cannot handle filetype %s", file_type_to_string(ft));
--- a/lib/xz/Makefile
+++ b/lib/xz/Makefile
@ -0,0 +1,2 @@
 obj-$(CONFIG_XZ_DECOMPRESS) += xz_crc32.o xz_dec_bcj.o
 obj-$(CONFIG_XZ_DECOMPRESS) += xz_dec_lzma2.o xz_dec_stream.o
--- a/lib/xz/xz_crc32.c
+++ b/lib/xz/xz_crc32.c
@ -0,0 +1,59 @@
 /*
 * CRC32 using the polynomial from IEEE-802.3
 *
 * Authors: Lasse Collin <lasse.collin@tukaani.org>
 *          Igor Pavlov <http://7-zip.org/>
 *
 * This file has been put into the public domain.
 * You can do whatever you want with this file.
 */
 /*
 * This is not the fastest implementation, but it is pretty compact.
 * The fastest versions of xz_crc32() on modern CPUs without hardware
 * accelerated CRC instruction are 3-5 times as fast as this version,
 * but they are bigger and use more memory for the lookup table.
 */
 #include "xz_private.h"
 /*
 * STATIC_RW_DATA is used in the pre-boot environment on some architectures.
 * See <linux/decompress/mm.h> for details.
 */
 #ifndef STATIC_RW_DATA
 #	define STATIC_RW_DATA static
 #endif
 STATIC_RW_DATA uint32_t xz_crc32_table[256];
 XZ_EXTERN void xz_crc32_init(void)
 {
 	const uint32_t poly = 0xEDB88320;
 	uint32_t i;
 	uint32_t j;
 	uint32_t r;
 	for (i = 0; i < 256; ++i) {
 		r = i;
 		for (j = 0; j < 8; ++j)
 			r = (r >> 1) ^ (poly & ~((r & 1) - 1));
 		xz_crc32_table[i] = r;
 	}
 	return;
 }
 XZ_EXTERN uint32_t xz_crc32(const uint8_t *buf, size_t size, uint32_t crc)
 {
 	crc = ~crc;
 	while (size != 0) {
 		crc = xz_crc32_table[*buf++ ^ (crc & 0xFF)] ^ (crc >> 8);
 		--size;
 	}
 	return ~crc;
 }
--- a/lib/xz/xz_dec_bcj.c
+++ b/lib/xz/xz_dec_bcj.c
@ -0,0 +1,574 @@
 /*
 * Branch/Call/Jump (BCJ) filter decoders
 *
 * Authors: Lasse Collin <lasse.collin@tukaani.org>
 *          Igor Pavlov <http://7-zip.org/>
 *
 * This file has been put into the public domain.
 * You can do whatever you want with this file.
 */
 #include "xz_private.h"
 /*
 * The rest of the file is inside this ifdef. It makes things a little more
 * convenient when building without support for any BCJ filters.
 */
 #ifdef XZ_DEC_BCJ
 struct xz_dec_bcj {
 	/* Type of the BCJ filter being used */
 	enum {
 		BCJ_X86 = 4,        /* x86 or x86-64 */
 		BCJ_POWERPC = 5,    /* Big endian only */
 		BCJ_IA64 = 6,       /* Big or little endian */
 		BCJ_ARM = 7,        /* Little endian only */
 		BCJ_ARMTHUMB = 8,   /* Little endian only */
 		BCJ_SPARC = 9       /* Big or little endian */
 	} type;
 	/*
 	 * Return value of the next filter in the chain. We need to preserve
 	 * this information across calls, because we must not call the next
 	 * filter anymore once it has returned XZ_STREAM_END.
 	 */
 	enum xz_ret ret;
 	/* True if we are operating in single-call mode. */
 	bool single_call;
 	/*
 	 * Absolute position relative to the beginning of the uncompressed
 	 * data (in a single .xz Block). We care only about the lowest 32
 	 * bits so this doesn't need to be uint64_t even with big files.
 	 */
 	uint32_t pos;
 	/* x86 filter state */
 	uint32_t x86_prev_mask;
 	/* Temporary space to hold the variables from struct xz_buf */
 	uint8_t *out;
 	size_t out_pos;
 	size_t out_size;
 	struct {
 		/* Amount of already filtered data in the beginning of buf */
 		size_t filtered;
 		/* Total amount of data currently stored in buf  */
 		size_t size;
 		/*
 		 * Buffer to hold a mix of filtered and unfiltered data. This
 		 * needs to be big enough to hold Alignment + 2 * Look-ahead:
 		 *
 		 * Type         Alignment   Look-ahead
 		 * x86              1           4
 		 * PowerPC          4           0
 		 * IA-64           16           0
 		 * ARM              4           0
 		 * ARM-Thumb        2           2
 		 * SPARC            4           0
 		 */
 		uint8_t buf[16];
 	} temp;
 };
 #ifdef XZ_DEC_X86
 /*
 * This is used to test the most significant byte of a memory address
 * in an x86 instruction.
 */
 static inline int bcj_x86_test_msbyte(uint8_t b)
 {
 	return b == 0x00 || b == 0xFF;
 }
 static size_t bcj_x86(struct xz_dec_bcj *s, uint8_t *buf, size_t size)
 {
 	static const bool mask_to_allowed_status[8]
 		= { true, true, true, false, true, false, false, false };
 	static const uint8_t mask_to_bit_num[8] = { 0, 1, 2, 2, 3, 3, 3, 3 };
 	size_t i;
 	size_t prev_pos = (size_t)-1;
 	uint32_t prev_mask = s->x86_prev_mask;
 	uint32_t src;
 	uint32_t dest;
 	uint32_t j;
 	uint8_t b;
 	if (size <= 4)
 		return 0;
 	size -= 4;
 	for (i = 0; i < size; ++i) {
 		if ((buf[i] & 0xFE) != 0xE8)
 			continue;
 		prev_pos = i - prev_pos;
 		if (prev_pos > 3) {
 			prev_mask = 0;
 		} else {
 			prev_mask = (prev_mask << (prev_pos - 1)) & 7;
 			if (prev_mask != 0) {
 				b = buf[i + 4 - mask_to_bit_num[prev_mask]];
 				if (!mask_to_allowed_status[prev_mask]
 						|| bcj_x86_test_msbyte(b)) {
 					prev_pos = i;
 					prev_mask = (prev_mask << 1) | 1;
 					continue;
 				}
 			}
 		}
 		prev_pos = i;
 		if (bcj_x86_test_msbyte(buf[i + 4])) {
 			src = get_unaligned_le32(buf + i + 1);
 			while (true) {
 				dest = src - (s->pos + (uint32_t)i + 5);
 				if (prev_mask == 0)
 					break;
 				j = mask_to_bit_num[prev_mask] * 8;
 				b = (uint8_t)(dest >> (24 - j));
 				if (!bcj_x86_test_msbyte(b))
 					break;
 				src = dest ^ (((uint32_t)1 << (32 - j)) - 1);
 			}
 			dest &= 0x01FFFFFF;
 			dest |= (uint32_t)0 - (dest & 0x01000000);
 			put_unaligned_le32(dest, buf + i + 1);
 			i += 4;
 		} else {
 			prev_mask = (prev_mask << 1) | 1;
 		}
 	}
 	prev_pos = i - prev_pos;
 	s->x86_prev_mask = prev_pos > 3 ? 0 : prev_mask << (prev_pos - 1);
 	return i;
 }
 #endif
 #ifdef XZ_DEC_POWERPC
 static size_t bcj_powerpc(struct xz_dec_bcj *s, uint8_t *buf, size_t size)
 {
 	size_t i;
 	uint32_t instr;
 	for (i = 0; i + 4 <= size; i += 4) {
 		instr = get_unaligned_be32(buf + i);
 		if ((instr & 0xFC000003) == 0x48000001) {
 			instr &= 0x03FFFFFC;
 			instr -= s->pos + (uint32_t)i;
 			instr &= 0x03FFFFFC;
 			instr |= 0x48000001;
 			put_unaligned_be32(instr, buf + i);
 		}
 	}
 	return i;
 }
 #endif
 #ifdef XZ_DEC_IA64
 static size_t bcj_ia64(struct xz_dec_bcj *s, uint8_t *buf, size_t size)
 {
 	static const uint8_t branch_table[32] = {
 		0, 0, 0, 0, 0, 0, 0, 0,
 		0, 0, 0, 0, 0, 0, 0, 0,
 		4, 4, 6, 6, 0, 0, 7, 7,
 		4, 4, 0, 0, 4, 4, 0, 0
 	};
 	/*
 	 * The local variables take a little bit stack space, but it's less
 	 * than what LZMA2 decoder takes, so it doesn't make sense to reduce
 	 * stack usage here without doing that for the LZMA2 decoder too.
 	 */
 	/* Loop counters */
 	size_t i;
 	size_t j;
 	/* Instruction slot (0, 1, or 2) in the 128-bit instruction word */
 	uint32_t slot;
 	/* Bitwise offset of the instruction indicated by slot */
 	uint32_t bit_pos;
 	/* bit_pos split into byte and bit parts */
 	uint32_t byte_pos;
 	uint32_t bit_res;
 	/* Address part of an instruction */
 	uint32_t addr;
 	/* Mask used to detect which instructions to convert */
 	uint32_t mask;
 	/* 41-bit instruction stored somewhere in the lowest 48 bits */
 	uint64_t instr;
 	/* Instruction normalized with bit_res for easier manipulation */
 	uint64_t norm;
 	for (i = 0; i + 16 <= size; i += 16) {
 		mask = branch_table[buf[i] & 0x1F];
 		for (slot = 0, bit_pos = 5; slot < 3; ++slot, bit_pos += 41) {
 			if (((mask >> slot) & 1) == 0)
 				continue;
 			byte_pos = bit_pos >> 3;
 			bit_res = bit_pos & 7;
 			instr = 0;
 			for (j = 0; j < 6; ++j)
 				instr |= (uint64_t)(buf[i + j + byte_pos])
 						<< (8 * j);
 			norm = instr >> bit_res;
 			if (((norm >> 37) & 0x0F) == 0x05
 					&& ((norm >> 9) & 0x07) == 0) {
 				addr = (norm >> 13) & 0x0FFFFF;
 				addr |= ((uint32_t)(norm >> 36) & 1) << 20;
 				addr <<= 4;
 				addr -= s->pos + (uint32_t)i;
 				addr >>= 4;
 				norm &= ~((uint64_t)0x8FFFFF << 13);
 				norm |= (uint64_t)(addr & 0x0FFFFF) << 13;
 				norm |= (uint64_t)(addr & 0x100000)
 						<< (36 - 20);
 				instr &= (1 << bit_res) - 1;
 				instr |= norm << bit_res;
 				for (j = 0; j < 6; j++)
 					buf[i + j + byte_pos]
 						= (uint8_t)(instr >> (8 * j));
 			}
 		}
 	}
 	return i;
 }
 #endif
 #ifdef XZ_DEC_ARM
 static size_t bcj_arm(struct xz_dec_bcj *s, uint8_t *buf, size_t size)
 {
 	size_t i;
 	uint32_t addr;
 	for (i = 0; i + 4 <= size; i += 4) {
 		if (buf[i + 3] == 0xEB) {
 			addr = (uint32_t)buf[i] | ((uint32_t)buf[i + 1] << 8)
 					| ((uint32_t)buf[i + 2] << 16);
 			addr <<= 2;
 			addr -= s->pos + (uint32_t)i + 8;
 			addr >>= 2;
 			buf[i] = (uint8_t)addr;
 			buf[i + 1] = (uint8_t)(addr >> 8);
 			buf[i + 2] = (uint8_t)(addr >> 16);
 		}
 	}
 	return i;
 }
 #endif
 #ifdef XZ_DEC_ARMTHUMB
 static size_t bcj_armthumb(struct xz_dec_bcj *s, uint8_t *buf, size_t size)
 {
 	size_t i;
 	uint32_t addr;
 	for (i = 0; i + 4 <= size; i += 2) {
 		if ((buf[i + 1] & 0xF8) == 0xF0
 				&& (buf[i + 3] & 0xF8) == 0xF8) {
 			addr = (((uint32_t)buf[i + 1] & 0x07) << 19)
 					| ((uint32_t)buf[i] << 11)
 					| (((uint32_t)buf[i + 3] & 0x07) << 8)
 					| (uint32_t)buf[i + 2];
 			addr <<= 1;
 			addr -= s->pos + (uint32_t)i + 4;
 			addr >>= 1;
 			buf[i + 1] = (uint8_t)(0xF0 | ((addr >> 19) & 0x07));
 			buf[i] = (uint8_t)(addr >> 11);
 			buf[i + 3] = (uint8_t)(0xF8 | ((addr >> 8) & 0x07));
 			buf[i + 2] = (uint8_t)addr;
 			i += 2;
 		}
 	}
 	return i;
 }
 #endif
 #ifdef XZ_DEC_SPARC
 static size_t bcj_sparc(struct xz_dec_bcj *s, uint8_t *buf, size_t size)
 {
 	size_t i;
 	uint32_t instr;
 	for (i = 0; i + 4 <= size; i += 4) {
 		instr = get_unaligned_be32(buf + i);
 		if ((instr >> 22) == 0x100 || (instr >> 22) == 0x1FF) {
 			instr <<= 2;
 			instr -= s->pos + (uint32_t)i;
 			instr >>= 2;
 			instr = ((uint32_t)0x40000000 - (instr & 0x400000))
 					| 0x40000000 | (instr & 0x3FFFFF);
 			put_unaligned_be32(instr, buf + i);
 		}
 	}
 	return i;
 }
 #endif
 /*
 * Apply the selected BCJ filter. Update *pos and s->pos to match the amount
 * of data that got filtered.
 *
 * NOTE: This is implemented as a switch statement to avoid using function
 * pointers, which could be problematic in the kernel boot code, which must
 * avoid pointers to static data (at least on x86).
 */
 static void bcj_apply(struct xz_dec_bcj *s,
 		      uint8_t *buf, size_t *pos, size_t size)
 {
 	size_t filtered;
 	buf += *pos;
 	size -= *pos;
 	switch (s->type) {
 #ifdef XZ_DEC_X86
 	case BCJ_X86:
 		filtered = bcj_x86(s, buf, size);
 		break;
 #endif
 #ifdef XZ_DEC_POWERPC
 	case BCJ_POWERPC:
 		filtered = bcj_powerpc(s, buf, size);
 		break;
 #endif
 #ifdef XZ_DEC_IA64
 	case BCJ_IA64:
 		filtered = bcj_ia64(s, buf, size);
 		break;
 #endif
 #ifdef XZ_DEC_ARM
 	case BCJ_ARM:
 		filtered = bcj_arm(s, buf, size);
 		break;
 #endif
 #ifdef XZ_DEC_ARMTHUMB
 	case BCJ_ARMTHUMB:
 		filtered = bcj_armthumb(s, buf, size);
 		break;
 #endif
 #ifdef XZ_DEC_SPARC
 	case BCJ_SPARC:
 		filtered = bcj_sparc(s, buf, size);
 		break;
 #endif
 	default:
 		/* Never reached but silence compiler warnings. */
 		filtered = 0;
 		break;
 	}
 	*pos += filtered;
 	s->pos += filtered;
 }
 /*
 * Flush pending filtered data from temp to the output buffer.
 * Move the remaining mixture of possibly filtered and unfiltered
 * data to the beginning of temp.
 */
 static void bcj_flush(struct xz_dec_bcj *s, struct xz_buf *b)
 {
 	size_t copy_size;
 	copy_size = min_t(size_t, s->temp.filtered, b->out_size - b->out_pos);
 	memcpy(b->out + b->out_pos, s->temp.buf, copy_size);
 	b->out_pos += copy_size;
 	s->temp.filtered -= copy_size;
 	s->temp.size -= copy_size;
 	memmove(s->temp.buf, s->temp.buf + copy_size, s->temp.size);
 }
 /*
 * The BCJ filter functions are primitive in sense that they process the
 * data in chunks of 1-16 bytes. To hide this issue, this function does
 * some buffering.
 */
 XZ_EXTERN enum xz_ret xz_dec_bcj_run(struct xz_dec_bcj *s,
 				     struct xz_dec_lzma2 *lzma2,
 				     struct xz_buf *b)
 {
 	size_t out_start;
 	/*
 	 * Flush pending already filtered data to the output buffer. Return
 	 * immediatelly if we couldn't flush everything, or if the next
 	 * filter in the chain had already returned XZ_STREAM_END.
 	 */
 	if (s->temp.filtered > 0) {
 		bcj_flush(s, b);
 		if (s->temp.filtered > 0)
 			return XZ_OK;
 		if (s->ret == XZ_STREAM_END)
 			return XZ_STREAM_END;
 	}
 	/*
 	 * If we have more output space than what is currently pending in
 	 * temp, copy the unfiltered data from temp to the output buffer
 	 * and try to fill the output buffer by decoding more data from the
 	 * next filter in the chain. Apply the BCJ filter on the new data
 	 * in the output buffer. If everything cannot be filtered, copy it
 	 * to temp and rewind the output buffer position accordingly.
 	 *
 	 * This needs to be always run when temp.size == 0 to handle a special
 	 * case where the output buffer is full and the next filter has no
 	 * more output coming but hasn't returned XZ_STREAM_END yet.
 	 */
 	if (s->temp.size < b->out_size - b->out_pos || s->temp.size == 0) {
 		out_start = b->out_pos;
 		memcpy(b->out + b->out_pos, s->temp.buf, s->temp.size);
 		b->out_pos += s->temp.size;
 		s->ret = xz_dec_lzma2_run(lzma2, b);
 		if (s->ret != XZ_STREAM_END
 				&& (s->ret != XZ_OK || s->single_call))
 			return s->ret;
 		bcj_apply(s, b->out, &out_start, b->out_pos);
 		/*
 		 * As an exception, if the next filter returned XZ_STREAM_END,
 		 * we can do that too, since the last few bytes that remain
 		 * unfiltered are meant to remain unfiltered.
 		 */
 		if (s->ret == XZ_STREAM_END)
 			return XZ_STREAM_END;
 		s->temp.size = b->out_pos - out_start;
 		b->out_pos -= s->temp.size;
 		memcpy(s->temp.buf, b->out + b->out_pos, s->temp.size);
 		/*
 		 * If there wasn't enough input to the next filter to fill
 		 * the output buffer with unfiltered data, there's no point
 		 * to try decoding more data to temp.
 		 */
 		if (b->out_pos + s->temp.size < b->out_size)
 			return XZ_OK;
 	}
 	/*
 	 * We have unfiltered data in temp. If the output buffer isn't full
 	 * yet, try to fill the temp buffer by decoding more data from the
 	 * next filter. Apply the BCJ filter on temp. Then we hopefully can
 	 * fill the actual output buffer by copying filtered data from temp.
 	 * A mix of filtered and unfiltered data may be left in temp; it will
 	 * be taken care on the next call to this function.
 	 */
 	if (b->out_pos < b->out_size) {
 		/* Make b->out{,_pos,_size} temporarily point to s->temp. */
 		s->out = b->out;
 		s->out_pos = b->out_pos;
 		s->out_size = b->out_size;
 		b->out = s->temp.buf;
 		b->out_pos = s->temp.size;
 		b->out_size = sizeof(s->temp.buf);
 		s->ret = xz_dec_lzma2_run(lzma2, b);
 		s->temp.size = b->out_pos;
 		b->out = s->out;
 		b->out_pos = s->out_pos;
 		b->out_size = s->out_size;
 		if (s->ret != XZ_OK && s->ret != XZ_STREAM_END)
 			return s->ret;
 		bcj_apply(s, s->temp.buf, &s->temp.filtered, s->temp.size);
 		/*
 		 * If the next filter returned XZ_STREAM_END, we mark that
 		 * everything is filtered, since the last unfiltered bytes
 		 * of the stream are meant to be left as is.
 		 */
 		if (s->ret == XZ_STREAM_END)
 			s->temp.filtered = s->temp.size;
 		bcj_flush(s, b);
 		if (s->temp.filtered > 0)
 			return XZ_OK;
 	}
 	return s->ret;
 }
 XZ_EXTERN struct xz_dec_bcj *xz_dec_bcj_create(bool single_call)
 {
 	struct xz_dec_bcj *s = kmalloc(sizeof(*s), GFP_KERNEL);
 	if (s != NULL)
 		s->single_call = single_call;
 	return s;
 }
 XZ_EXTERN enum xz_ret xz_dec_bcj_reset(struct xz_dec_bcj *s, uint8_t id)
 {
 	switch (id) {
 #ifdef XZ_DEC_X86
 	case BCJ_X86:
 #endif
 #ifdef XZ_DEC_POWERPC
 	case BCJ_POWERPC:
 #endif
 #ifdef XZ_DEC_IA64
 	case BCJ_IA64:
 #endif
 #ifdef XZ_DEC_ARM
 	case BCJ_ARM:
 #endif
 #ifdef XZ_DEC_ARMTHUMB
 	case BCJ_ARMTHUMB:
 #endif
 #ifdef XZ_DEC_SPARC
 	case BCJ_SPARC:
 #endif
 		break;
 	default:
 		/* Unsupported Filter ID */
 		return XZ_OPTIONS_ERROR;
 	}
 	s->type = id;
 	s->ret = XZ_OK;
 	s->pos = 0;
 	s->x86_prev_mask = 0;
 	s->temp.filtered = 0;
 	s->temp.size = 0;
 	return XZ_OK;
 }
 #endif
--- a/lib/xz/xz_dec_lzma2.c
+++ b/lib/xz/xz_dec_lzma2.c
--- a/lib/xz/xz_dec_stream.c
+++ b/lib/xz/xz_dec_stream.c
@ -0,0 +1,821 @@
 /*
 * .xz Stream decoder
 *
 * Author: Lasse Collin <lasse.collin@tukaani.org>
 *
 * This file has been put into the public domain.
 * You can do whatever you want with this file.
 */
 #include "xz_private.h"
 #include "xz_stream.h"
 /* Hash used to validate the Index field */
 struct xz_dec_hash {
 	vli_type unpadded;
 	vli_type uncompressed;
 	uint32_t crc32;
 };
 struct xz_dec {
 	/* Position in dec_main() */
 	enum {
 		SEQ_STREAM_HEADER,
 		SEQ_BLOCK_START,
 		SEQ_BLOCK_HEADER,
 		SEQ_BLOCK_UNCOMPRESS,
 		SEQ_BLOCK_PADDING,
 		SEQ_BLOCK_CHECK,
 		SEQ_INDEX,
 		SEQ_INDEX_PADDING,
 		SEQ_INDEX_CRC32,
 		SEQ_STREAM_FOOTER
 	} sequence;
 	/* Position in variable-length integers and Check fields */
 	uint32_t pos;
 	/* Variable-length integer decoded by dec_vli() */
 	vli_type vli;
 	/* Saved in_pos and out_pos */
 	size_t in_start;
 	size_t out_start;
 	/* CRC32 value in Block or Index */
 	uint32_t crc32;
 	/* Type of the integrity check calculated from uncompressed data */
 	enum xz_check check_type;
 	/* Operation mode */
 	enum xz_mode mode;
 	/*
 	 * True if the next call to xz_dec_run() is allowed to return
 	 * XZ_BUF_ERROR.
 	 */
 	bool allow_buf_error;
 	/* Information stored in Block Header */
 	struct {
 		/*
 		 * Value stored in the Compressed Size field, or
 		 * VLI_UNKNOWN if Compressed Size is not present.
 		 */
 		vli_type compressed;
 		/*
 		 * Value stored in the Uncompressed Size field, or
 		 * VLI_UNKNOWN if Uncompressed Size is not present.
 		 */
 		vli_type uncompressed;
 		/* Size of the Block Header field */
 		uint32_t size;
 	} block_header;
 	/* Information collected when decoding Blocks */
 	struct {
 		/* Observed compressed size of the current Block */
 		vli_type compressed;
 		/* Observed uncompressed size of the current Block */
 		vli_type uncompressed;
 		/* Number of Blocks decoded so far */
 		vli_type count;
 		/*
 		 * Hash calculated from the Block sizes. This is used to
 		 * validate the Index field.
 		 */
 		struct xz_dec_hash hash;
 	} block;
 	/* Variables needed when verifying the Index field */
 	struct {
 		/* Position in dec_index() */
 		enum {
 			SEQ_INDEX_COUNT,
 			SEQ_INDEX_UNPADDED,
 			SEQ_INDEX_UNCOMPRESSED
 		} sequence;
 		/* Size of the Index in bytes */
 		vli_type size;
 		/* Number of Records (matches block.count in valid files) */
 		vli_type count;
 		/*
 		 * Hash calculated from the Records (matches block.hash in
 		 * valid files).
 		 */
 		struct xz_dec_hash hash;
 	} index;
 	/*
 	 * Temporary buffer needed to hold Stream Header, Block Header,
 	 * and Stream Footer. The Block Header is the biggest (1 KiB)
 	 * so we reserve space according to that. buf[] has to be aligned
 	 * to a multiple of four bytes; the size_t variables before it
 	 * should guarantee this.
 	 */
 	struct {
 		size_t pos;
 		size_t size;
 		uint8_t buf[1024];
 	} temp;
 	struct xz_dec_lzma2 *lzma2;
 #ifdef XZ_DEC_BCJ
 	struct xz_dec_bcj *bcj;
 	bool bcj_active;
 #endif
 };
 #ifdef XZ_DEC_ANY_CHECK
 /* Sizes of the Check field with different Check IDs */
 static const uint8_t check_sizes[16] = {
 	0,
 	4, 4, 4,
 	8, 8, 8,
 	16, 16, 16,
 	32, 32, 32,
 	64, 64, 64
 };
 #endif
 /*
 * Fill s->temp by copying data starting from b->in[b->in_pos]. Caller
 * must have set s->temp.pos to indicate how much data we are supposed
 * to copy into s->temp.buf. Return true once s->temp.pos has reached
 * s->temp.size.
 */
 static bool fill_temp(struct xz_dec *s, struct xz_buf *b)
 {
 	size_t copy_size = min_t(size_t,
 			b->in_size - b->in_pos, s->temp.size - s->temp.pos);
 	memcpy(s->temp.buf + s->temp.pos, b->in + b->in_pos, copy_size);
 	b->in_pos += copy_size;
 	s->temp.pos += copy_size;
 	if (s->temp.pos == s->temp.size) {
 		s->temp.pos = 0;
 		return true;
 	}
 	return false;
 }
 /* Decode a variable-length integer (little-endian base-128 encoding) */
 static enum xz_ret dec_vli(struct xz_dec *s, const uint8_t *in,
 			   size_t *in_pos, size_t in_size)
 {
 	uint8_t byte;
 	if (s->pos == 0)
 		s->vli = 0;
 	while (*in_pos < in_size) {
 		byte = in[*in_pos];
 		++*in_pos;
 		s->vli |= (vli_type)(byte & 0x7F) << s->pos;
 		if ((byte & 0x80) == 0) {
 			/* Don't allow non-minimal encodings. */
 			if (byte == 0 && s->pos != 0)
 				return XZ_DATA_ERROR;
 			s->pos = 0;
 			return XZ_STREAM_END;
 		}
 		s->pos += 7;
 		if (s->pos == 7 * VLI_BYTES_MAX)
 			return XZ_DATA_ERROR;
 	}
 	return XZ_OK;
 }
 /*
 * Decode the Compressed Data field from a Block. Update and validate
 * the observed compressed and uncompressed sizes of the Block so that
 * they don't exceed the values possibly stored in the Block Header
 * (validation assumes that no integer overflow occurs, since vli_type
 * is normally uint64_t). Update the CRC32 if presence of the CRC32
 * field was indicated in Stream Header.
 *
 * Once the decoding is finished, validate that the observed sizes match
 * the sizes possibly stored in the Block Header. Update the hash and
 * Block count, which are later used to validate the Index field.
 */
 static enum xz_ret dec_block(struct xz_dec *s, struct xz_buf *b)
 {
 	enum xz_ret ret;
 	s->in_start = b->in_pos;
 	s->out_start = b->out_pos;
 #ifdef XZ_DEC_BCJ
 	if (s->bcj_active)
 		ret = xz_dec_bcj_run(s->bcj, s->lzma2, b);
 	else
 #endif
 		ret = xz_dec_lzma2_run(s->lzma2, b);
 	s->block.compressed += b->in_pos - s->in_start;
 	s->block.uncompressed += b->out_pos - s->out_start;
 	/*
 	 * There is no need to separately check for VLI_UNKNOWN, since
 	 * the observed sizes are always smaller than VLI_UNKNOWN.
 	 */
 	if (s->block.compressed > s->block_header.compressed
 			|| s->block.uncompressed
 				> s->block_header.uncompressed)
 		return XZ_DATA_ERROR;
 	if (s->check_type == XZ_CHECK_CRC32)
 		s->crc32 = xz_crc32(b->out + s->out_start,
 				b->out_pos - s->out_start, s->crc32);
 	if (ret == XZ_STREAM_END) {
 		if (s->block_header.compressed != VLI_UNKNOWN
 				&& s->block_header.compressed
 					!= s->block.compressed)
 			return XZ_DATA_ERROR;
 		if (s->block_header.uncompressed != VLI_UNKNOWN
 				&& s->block_header.uncompressed
 					!= s->block.uncompressed)
 			return XZ_DATA_ERROR;
 		s->block.hash.unpadded += s->block_header.size
 				+ s->block.compressed;
 #ifdef XZ_DEC_ANY_CHECK
 		s->block.hash.unpadded += check_sizes[s->check_type];
 #else
 		if (s->check_type == XZ_CHECK_CRC32)
 			s->block.hash.unpadded += 4;
 #endif
 		s->block.hash.uncompressed += s->block.uncompressed;
 		s->block.hash.crc32 = xz_crc32(
 				(const uint8_t *)&s->block.hash,
 				sizeof(s->block.hash), s->block.hash.crc32);
 		++s->block.count;
 	}
 	return ret;
 }
 /* Update the Index size and the CRC32 value. */
 static void index_update(struct xz_dec *s, const struct xz_buf *b)
 {
 	size_t in_used = b->in_pos - s->in_start;
 	s->index.size += in_used;
 	s->crc32 = xz_crc32(b->in + s->in_start, in_used, s->crc32);
 }
 /*
 * Decode the Number of Records, Unpadded Size, and Uncompressed Size
 * fields from the Index field. That is, Index Padding and CRC32 are not
 * decoded by this function.
 *
 * This can return XZ_OK (more input needed), XZ_STREAM_END (everything
 * successfully decoded), or XZ_DATA_ERROR (input is corrupt).
 */
 static enum xz_ret dec_index(struct xz_dec *s, struct xz_buf *b)
 {
 	enum xz_ret ret;
 	do {
 		ret = dec_vli(s, b->in, &b->in_pos, b->in_size);
 		if (ret != XZ_STREAM_END) {
 			index_update(s, b);
 			return ret;
 		}
 		switch (s->index.sequence) {
 		case SEQ_INDEX_COUNT:
 			s->index.count = s->vli;
 			/*
 			 * Validate that the Number of Records field
 			 * indicates the same number of Records as
 			 * there were Blocks in the Stream.
 			 */
 			if (s->index.count != s->block.count)
 				return XZ_DATA_ERROR;
 			s->index.sequence = SEQ_INDEX_UNPADDED;
 			break;
 		case SEQ_INDEX_UNPADDED:
 			s->index.hash.unpadded += s->vli;
 			s->index.sequence = SEQ_INDEX_UNCOMPRESSED;
 			break;
 		case SEQ_INDEX_UNCOMPRESSED:
 			s->index.hash.uncompressed += s->vli;
 			s->index.hash.crc32 = xz_crc32(
 					(const uint8_t *)&s->index.hash,
 					sizeof(s->index.hash),
 					s->index.hash.crc32);
 			--s->index.count;
 			s->index.sequence = SEQ_INDEX_UNPADDED;
 			break;
 		}
 	} while (s->index.count > 0);
 	return XZ_STREAM_END;
 }
 /*
 * Validate that the next four input bytes match the value of s->crc32.
 * s->pos must be zero when starting to validate the first byte.
 */
 static enum xz_ret crc32_validate(struct xz_dec *s, struct xz_buf *b)
 {
 	do {
 		if (b->in_pos == b->in_size)
 			return XZ_OK;
 		if (((s->crc32 >> s->pos) & 0xFF) != b->in[b->in_pos++])
 			return XZ_DATA_ERROR;
 		s->pos += 8;
 	} while (s->pos < 32);
 	s->crc32 = 0;
 	s->pos = 0;
 	return XZ_STREAM_END;
 }
 #ifdef XZ_DEC_ANY_CHECK
 /*
 * Skip over the Check field when the Check ID is not supported.
 * Returns true once the whole Check field has been skipped over.
 */
 static bool check_skip(struct xz_dec *s, struct xz_buf *b)
 {
 	while (s->pos < check_sizes[s->check_type]) {
 		if (b->in_pos == b->in_size)
 			return false;
 		++b->in_pos;
 		++s->pos;
 	}
 	s->pos = 0;
 	return true;
 }
 #endif
 /* Decode the Stream Header field (the first 12 bytes of the .xz Stream). */
 static enum xz_ret dec_stream_header(struct xz_dec *s)
 {
 	if (!memeq(s->temp.buf, HEADER_MAGIC, HEADER_MAGIC_SIZE))
 		return XZ_FORMAT_ERROR;
 	if (xz_crc32(s->temp.buf + HEADER_MAGIC_SIZE, 2, 0)
 			!= get_le32(s->temp.buf + HEADER_MAGIC_SIZE + 2))
 		return XZ_DATA_ERROR;
 	if (s->temp.buf[HEADER_MAGIC_SIZE] != 0)
 		return XZ_OPTIONS_ERROR;
 	/*
 	 * Of integrity checks, we support only none (Check ID = 0) and
 	 * CRC32 (Check ID = 1). However, if XZ_DEC_ANY_CHECK is defined,
 	 * we will accept other check types too, but then the check won't
 	 * be verified and a warning (XZ_UNSUPPORTED_CHECK) will be given.
 	 */
 	s->check_type = s->temp.buf[HEADER_MAGIC_SIZE + 1];
 #ifdef XZ_DEC_ANY_CHECK
 	if (s->check_type > XZ_CHECK_MAX)
 		return XZ_OPTIONS_ERROR;
 	if (s->check_type > XZ_CHECK_CRC32)
 		return XZ_UNSUPPORTED_CHECK;
 #else
 	if (s->check_type > XZ_CHECK_CRC32)
 		return XZ_OPTIONS_ERROR;
 #endif
 	return XZ_OK;
 }
 /* Decode the Stream Footer field (the last 12 bytes of the .xz Stream) */
 static enum xz_ret dec_stream_footer(struct xz_dec *s)
 {
 	if (!memeq(s->temp.buf + 10, FOOTER_MAGIC, FOOTER_MAGIC_SIZE))
 		return XZ_DATA_ERROR;
 	if (xz_crc32(s->temp.buf + 4, 6, 0) != get_le32(s->temp.buf))
 		return XZ_DATA_ERROR;
 	/*
 	 * Validate Backward Size. Note that we never added the size of the
 	 * Index CRC32 field to s->index.size, thus we use s->index.size / 4
 	 * instead of s->index.size / 4 - 1.
 	 */
 	if ((s->index.size >> 2) != get_le32(s->temp.buf + 4))
 		return XZ_DATA_ERROR;
 	if (s->temp.buf[8] != 0 || s->temp.buf[9] != s->check_type)
 		return XZ_DATA_ERROR;
 	/*
 	 * Use XZ_STREAM_END instead of XZ_OK to be more convenient
 	 * for the caller.
 	 */
 	return XZ_STREAM_END;
 }
 /* Decode the Block Header and initialize the filter chain. */
 static enum xz_ret dec_block_header(struct xz_dec *s)
 {
 	enum xz_ret ret;
 	/*
 	 * Validate the CRC32. We know that the temp buffer is at least
 	 * eight bytes so this is safe.
 	 */
 	s->temp.size -= 4;
 	if (xz_crc32(s->temp.buf, s->temp.size, 0)
 			!= get_le32(s->temp.buf + s->temp.size))
 		return XZ_DATA_ERROR;
 	s->temp.pos = 2;
 	/*
 	 * Catch unsupported Block Flags. We support only one or two filters
 	 * in the chain, so we catch that with the same test.
 	 */
 #ifdef XZ_DEC_BCJ
 	if (s->temp.buf[1] & 0x3E)
 #else
 	if (s->temp.buf[1] & 0x3F)
 #endif
 		return XZ_OPTIONS_ERROR;
 	/* Compressed Size */
 	if (s->temp.buf[1] & 0x40) {
 		if (dec_vli(s, s->temp.buf, &s->temp.pos, s->temp.size)
 					!= XZ_STREAM_END)
 			return XZ_DATA_ERROR;
 		s->block_header.compressed = s->vli;
 	} else {
 		s->block_header.compressed = VLI_UNKNOWN;
 	}
 	/* Uncompressed Size */
 	if (s->temp.buf[1] & 0x80) {
 		if (dec_vli(s, s->temp.buf, &s->temp.pos, s->temp.size)
 				!= XZ_STREAM_END)
 			return XZ_DATA_ERROR;
 		s->block_header.uncompressed = s->vli;
 	} else {
 		s->block_header.uncompressed = VLI_UNKNOWN;
 	}
 #ifdef XZ_DEC_BCJ
 	/* If there are two filters, the first one must be a BCJ filter. */
 	s->bcj_active = s->temp.buf[1] & 0x01;
 	if (s->bcj_active) {
 		if (s->temp.size - s->temp.pos < 2)
 			return XZ_OPTIONS_ERROR;
 		ret = xz_dec_bcj_reset(s->bcj, s->temp.buf[s->temp.pos++]);
 		if (ret != XZ_OK)
 			return ret;
 		/*
 		 * We don't support custom start offset,
 		 * so Size of Properties must be zero.
 		 */
 		if (s->temp.buf[s->temp.pos++] != 0x00)
 			return XZ_OPTIONS_ERROR;
 	}
 #endif
 	/* Valid Filter Flags always take at least two bytes. */
 	if (s->temp.size - s->temp.pos < 2)
 		return XZ_DATA_ERROR;
 	/* Filter ID = LZMA2 */
 	if (s->temp.buf[s->temp.pos++] != 0x21)
 		return XZ_OPTIONS_ERROR;
 	/* Size of Properties = 1-byte Filter Properties */
 	if (s->temp.buf[s->temp.pos++] != 0x01)
 		return XZ_OPTIONS_ERROR;
 	/* Filter Properties contains LZMA2 dictionary size. */
 	if (s->temp.size - s->temp.pos < 1)
 		return XZ_DATA_ERROR;
 	ret = xz_dec_lzma2_reset(s->lzma2, s->temp.buf[s->temp.pos++]);
 	if (ret != XZ_OK)
 		return ret;
 	/* The rest must be Header Padding. */
 	while (s->temp.pos < s->temp.size)
 		if (s->temp.buf[s->temp.pos++] != 0x00)
 			return XZ_OPTIONS_ERROR;
 	s->temp.pos = 0;
 	s->block.compressed = 0;
 	s->block.uncompressed = 0;
 	return XZ_OK;
 }
 static enum xz_ret dec_main(struct xz_dec *s, struct xz_buf *b)
 {
 	enum xz_ret ret;
 	/*
 	 * Store the start position for the case when we are in the middle
 	 * of the Index field.
 	 */
 	s->in_start = b->in_pos;
 	while (true) {
 		switch (s->sequence) {
 		case SEQ_STREAM_HEADER:
 			/*
 			 * Stream Header is copied to s->temp, and then
 			 * decoded from there. This way if the caller
 			 * gives us only little input at a time, we can
 			 * still keep the Stream Header decoding code
 			 * simple. Similar approach is used in many places
 			 * in this file.
 			 */
 			if (!fill_temp(s, b))
 				return XZ_OK;
 			/*
 			 * If dec_stream_header() returns
 			 * XZ_UNSUPPORTED_CHECK, it is still possible
 			 * to continue decoding if working in multi-call
 			 * mode. Thus, update s->sequence before calling
 			 * dec_stream_header().
 			 */
 			s->sequence = SEQ_BLOCK_START;
 			ret = dec_stream_header(s);
 			if (ret != XZ_OK)
 				return ret;
 		case SEQ_BLOCK_START:
 			/* We need one byte of input to continue. */
 			if (b->in_pos == b->in_size)
 				return XZ_OK;
 			/* See if this is the beginning of the Index field. */
 			if (b->in[b->in_pos] == 0) {
 				s->in_start = b->in_pos++;
 				s->sequence = SEQ_INDEX;
 				break;
 			}
 			/*
 			 * Calculate the size of the Block Header and
 			 * prepare to decode it.
 			 */
 			s->block_header.size
 				= ((uint32_t)b->in[b->in_pos] + 1) * 4;
 			s->temp.size = s->block_header.size;
 			s->temp.pos = 0;
 			s->sequence = SEQ_BLOCK_HEADER;
 		case SEQ_BLOCK_HEADER:
 			if (!fill_temp(s, b))
 				return XZ_OK;
 			ret = dec_block_header(s);
 			if (ret != XZ_OK)
 				return ret;
 			s->sequence = SEQ_BLOCK_UNCOMPRESS;
 		case SEQ_BLOCK_UNCOMPRESS:
 			ret = dec_block(s, b);
 			if (ret != XZ_STREAM_END)
 				return ret;
 			s->sequence = SEQ_BLOCK_PADDING;
 		case SEQ_BLOCK_PADDING:
 			/*
 			 * Size of Compressed Data + Block Padding
 			 * must be a multiple of four. We don't need
 			 * s->block.compressed for anything else
 			 * anymore, so we use it here to test the size
 			 * of the Block Padding field.
 			 */
 			while (s->block.compressed & 3) {
 				if (b->in_pos == b->in_size)
 					return XZ_OK;
 				if (b->in[b->in_pos++] != 0)
 					return XZ_DATA_ERROR;
 				++s->block.compressed;
 			}
 			s->sequence = SEQ_BLOCK_CHECK;
 		case SEQ_BLOCK_CHECK:
 			if (s->check_type == XZ_CHECK_CRC32) {
 				ret = crc32_validate(s, b);
 				if (ret != XZ_STREAM_END)
 					return ret;
 			}
 #ifdef XZ_DEC_ANY_CHECK
 			else if (!check_skip(s, b)) {
 				return XZ_OK;
 			}
 #endif
 			s->sequence = SEQ_BLOCK_START;
 			break;
 		case SEQ_INDEX:
 			ret = dec_index(s, b);
 			if (ret != XZ_STREAM_END)
 				return ret;
 			s->sequence = SEQ_INDEX_PADDING;
 		case SEQ_INDEX_PADDING:
 			while ((s->index.size + (b->in_pos - s->in_start))
 					& 3) {
 				if (b->in_pos == b->in_size) {
 					index_update(s, b);
 					return XZ_OK;
 				}
 				if (b->in[b->in_pos++] != 0)
 					return XZ_DATA_ERROR;
 			}
 			/* Finish the CRC32 value and Index size. */
 			index_update(s, b);
 			/* Compare the hashes to validate the Index field. */
 			if (!memeq(&s->block.hash, &s->index.hash,
 					sizeof(s->block.hash)))
 				return XZ_DATA_ERROR;
 			s->sequence = SEQ_INDEX_CRC32;
 		case SEQ_INDEX_CRC32:
 			ret = crc32_validate(s, b);
 			if (ret != XZ_STREAM_END)
 				return ret;
 			s->temp.size = STREAM_HEADER_SIZE;
 			s->sequence = SEQ_STREAM_FOOTER;
 		case SEQ_STREAM_FOOTER:
 			if (!fill_temp(s, b))
 				return XZ_OK;
 			return dec_stream_footer(s);
 		}
 	}
 	/* Never reached */
 }
 /*
 * xz_dec_run() is a wrapper for dec_main() to handle some special cases in
 * multi-call and single-call decoding.
 *
 * In multi-call mode, we must return XZ_BUF_ERROR when it seems clear that we
 * are not going to make any progress anymore. This is to prevent the caller
 * from calling us infinitely when the input file is truncated or otherwise
 * corrupt. Since zlib-style API allows that the caller fills the input buffer
 * only when the decoder doesn't produce any new output, we have to be careful
 * to avoid returning XZ_BUF_ERROR too easily: XZ_BUF_ERROR is returned only
 * after the second consecutive call to xz_dec_run() that makes no progress.
 *
 * In single-call mode, if we couldn't decode everything and no error
 * occurred, either the input is truncated or the output buffer is too small.
 * Since we know that the last input byte never produces any output, we know
 * that if all the input was consumed and decoding wasn't finished, the file
 * must be corrupt. Otherwise the output buffer has to be too small or the
 * file is corrupt in a way that decoding it produces too big output.
 *
 * If single-call decoding fails, we reset b->in_pos and b->out_pos back to
 * their original values. This is because with some filter chains there won't
 * be any valid uncompressed data in the output buffer unless the decoding
 * actually succeeds (that's the price to pay of using the output buffer as
 * the workspace).
 */
 XZ_EXTERN enum xz_ret xz_dec_run(struct xz_dec *s, struct xz_buf *b)
 {
 	size_t in_start;
 	size_t out_start;
 	enum xz_ret ret;
 	if (DEC_IS_SINGLE(s->mode))
 		xz_dec_reset(s);
 	in_start = b->in_pos;
 	out_start = b->out_pos;
 	ret = dec_main(s, b);
 	if (DEC_IS_SINGLE(s->mode)) {
 		if (ret == XZ_OK)
 			ret = b->in_pos == b->in_size
 					? XZ_DATA_ERROR : XZ_BUF_ERROR;
 		if (ret != XZ_STREAM_END) {
 			b->in_pos = in_start;
 			b->out_pos = out_start;
 		}
 	} else if (ret == XZ_OK && in_start == b->in_pos
 			&& out_start == b->out_pos) {
 		if (s->allow_buf_error)
 			ret = XZ_BUF_ERROR;
 		s->allow_buf_error = true;
 	} else {
 		s->allow_buf_error = false;
 	}
 	return ret;
 }
 XZ_EXTERN struct xz_dec *xz_dec_init(enum xz_mode mode, uint32_t dict_max)
 {
 	struct xz_dec *s = kmalloc(sizeof(*s), GFP_KERNEL);
 	if (s == NULL)
 		return NULL;
 	s->mode = mode;
 #ifdef XZ_DEC_BCJ
 	s->bcj = xz_dec_bcj_create(DEC_IS_SINGLE(mode));
 	if (s->bcj == NULL)
 		goto error_bcj;
 #endif
 	s->lzma2 = xz_dec_lzma2_create(mode, dict_max);
 	if (s->lzma2 == NULL)
 		goto error_lzma2;
 	xz_dec_reset(s);
 	return s;
 error_lzma2:
 #ifdef XZ_DEC_BCJ
 	xz_dec_bcj_end(s->bcj);
 error_bcj:
 #endif
 	kfree(s);
 	return NULL;
 }
 XZ_EXTERN void xz_dec_reset(struct xz_dec *s)
 {
 	s->sequence = SEQ_STREAM_HEADER;
 	s->allow_buf_error = false;
 	s->pos = 0;
 	s->crc32 = 0;
 	memzero(&s->block, sizeof(s->block));
 	memzero(&s->index, sizeof(s->index));
 	s->temp.pos = 0;
 	s->temp.size = STREAM_HEADER_SIZE;
 }
 XZ_EXTERN void xz_dec_end(struct xz_dec *s)
 {
 	if (s != NULL) {
 		xz_dec_lzma2_end(s->lzma2);
 #ifdef XZ_DEC_BCJ
 		xz_dec_bcj_end(s->bcj);
 #endif
 		kfree(s);
 	}
 }
--- a/lib/xz/xz_lzma2.h
+++ b/lib/xz/xz_lzma2.h
@ -0,0 +1,204 @@
 /*
 * LZMA2 definitions
 *
 * Authors: Lasse Collin <lasse.collin@tukaani.org>
 *          Igor Pavlov <http://7-zip.org/>
 *
 * This file has been put into the public domain.
 * You can do whatever you want with this file.
 */
 #ifndef XZ_LZMA2_H
 #define XZ_LZMA2_H
 /* Range coder constants */
 #define RC_SHIFT_BITS 8
 #define RC_TOP_BITS 24
 #define RC_TOP_VALUE (1 << RC_TOP_BITS)
 #define RC_BIT_MODEL_TOTAL_BITS 11
 #define RC_BIT_MODEL_TOTAL (1 << RC_BIT_MODEL_TOTAL_BITS)
 #define RC_MOVE_BITS 5
 /*
 * Maximum number of position states. A position state is the lowest pb
 * number of bits of the current uncompressed offset. In some places there
 * are different sets of probabilities for different position states.
 */
 #define POS_STATES_MAX (1 << 4)
 /*
 * This enum is used to track which LZMA symbols have occurred most recently
 * and in which order. This information is used to predict the next symbol.
 *
 * Symbols:
 *  - Literal: One 8-bit byte
 *  - Match: Repeat a chunk of data at some distance
 *  - Long repeat: Multi-byte match at a recently seen distance
 *  - Short repeat: One-byte repeat at a recently seen distance
 *
 * The symbol names are in from STATE_oldest_older_previous. REP means
 * either short or long repeated match, and NONLIT means any non-literal.
 */
 enum lzma_state {
 	STATE_LIT_LIT,
 	STATE_MATCH_LIT_LIT,
 	STATE_REP_LIT_LIT,
 	STATE_SHORTREP_LIT_LIT,
 	STATE_MATCH_LIT,
 	STATE_REP_LIT,
 	STATE_SHORTREP_LIT,
 	STATE_LIT_MATCH,
 	STATE_LIT_LONGREP,
 	STATE_LIT_SHORTREP,
 	STATE_NONLIT_MATCH,
 	STATE_NONLIT_REP
 };
 /* Total number of states */
 #define STATES 12
 /* The lowest 7 states indicate that the previous state was a literal. */
 #define LIT_STATES 7
 /* Indicate that the latest symbol was a literal. */
 static inline void lzma_state_literal(enum lzma_state *state)
 {
 	if (*state <= STATE_SHORTREP_LIT_LIT)
 		*state = STATE_LIT_LIT;
 	else if (*state <= STATE_LIT_SHORTREP)
 		*state -= 3;
 	else
 		*state -= 6;
 }
 /* Indicate that the latest symbol was a match. */
 static inline void lzma_state_match(enum lzma_state *state)
 {
 	*state = *state < LIT_STATES ? STATE_LIT_MATCH : STATE_NONLIT_MATCH;
 }
 /* Indicate that the latest state was a long repeated match. */
 static inline void lzma_state_long_rep(enum lzma_state *state)
 {
 	*state = *state < LIT_STATES ? STATE_LIT_LONGREP : STATE_NONLIT_REP;
 }
 /* Indicate that the latest symbol was a short match. */
 static inline void lzma_state_short_rep(enum lzma_state *state)
 {
 	*state = *state < LIT_STATES ? STATE_LIT_SHORTREP : STATE_NONLIT_REP;
 }
 /* Test if the previous symbol was a literal. */
 static inline bool lzma_state_is_literal(enum lzma_state state)
 {
 	return state < LIT_STATES;
 }
 /* Each literal coder is divided in three sections:
 *   - 0x001-0x0FF: Without match byte
 *   - 0x101-0x1FF: With match byte; match bit is 0
 *   - 0x201-0x2FF: With match byte; match bit is 1
 *
 * Match byte is used when the previous LZMA symbol was something else than
 * a literal (that is, it was some kind of match).
 */
 #define LITERAL_CODER_SIZE 0x300
 /* Maximum number of literal coders */
 #define LITERAL_CODERS_MAX (1 << 4)
 /* Minimum length of a match is two bytes. */
 #define MATCH_LEN_MIN 2
 /* Match length is encoded with 4, 5, or 10 bits.
 *
 * Length   Bits
 *  2-9      4 = Choice=0 + 3 bits
 * 10-17     5 = Choice=1 + Choice2=0 + 3 bits
 * 18-273   10 = Choice=1 + Choice2=1 + 8 bits
 */
 #define LEN_LOW_BITS 3
 #define LEN_LOW_SYMBOLS (1 << LEN_LOW_BITS)
 #define LEN_MID_BITS 3
 #define LEN_MID_SYMBOLS (1 << LEN_MID_BITS)
 #define LEN_HIGH_BITS 8
 #define LEN_HIGH_SYMBOLS (1 << LEN_HIGH_BITS)
 #define LEN_SYMBOLS (LEN_LOW_SYMBOLS + LEN_MID_SYMBOLS + LEN_HIGH_SYMBOLS)
 /*
 * Maximum length of a match is 273 which is a result of the encoding
 * described above.
 */
 #define MATCH_LEN_MAX (MATCH_LEN_MIN + LEN_SYMBOLS - 1)
 /*
 * Different sets of probabilities are used for match distances that have
 * very short match length: Lengths of 2, 3, and 4 bytes have a separate
 * set of probabilities for each length. The matches with longer length
 * use a shared set of probabilities.
 */
 #define DIST_STATES 4
 /*
 * Get the index of the appropriate probability array for decoding
 * the distance slot.
 */
 static inline uint32_t lzma_get_dist_state(uint32_t len)
 {
 	return len < DIST_STATES + MATCH_LEN_MIN
 			? len - MATCH_LEN_MIN : DIST_STATES - 1;
 }
 /*
 * The highest two bits of a 32-bit match distance are encoded using six bits.
 * This six-bit value is called a distance slot. This way encoding a 32-bit
 * value takes 6-36 bits, larger values taking more bits.
 */
 #define DIST_SLOT_BITS 6
 #define DIST_SLOTS (1 << DIST_SLOT_BITS)
 /* Match distances up to 127 are fully encoded using probabilities. Since
 * the highest two bits (distance slot) are always encoded using six bits,
 * the distances 0-3 don't need any additional bits to encode, since the
 * distance slot itself is the same as the actual distance. DIST_MODEL_START
 * indicates the first distance slot where at least one additional bit is
 * needed.
 */
 #define DIST_MODEL_START 4
 /*
 * Match distances greater than 127 are encoded in three pieces:
 *   - distance slot: the highest two bits
 *   - direct bits: 2-26 bits below the highest two bits
 *   - alignment bits: four lowest bits
 *
 * Direct bits don't use any probabilities.
 *
 * The distance slot value of 14 is for distances 128-191.
 */
 #define DIST_MODEL_END 14
 /* Distance slots that indicate a distance <= 127. */
 #define FULL_DISTANCES_BITS (DIST_MODEL_END / 2)
 #define FULL_DISTANCES (1 << FULL_DISTANCES_BITS)
 /*
 * For match distances greater than 127, only the highest two bits and the
 * lowest four bits (alignment) is encoded using probabilities.
 */
 #define ALIGN_BITS 4
 #define ALIGN_SIZE (1 << ALIGN_BITS)
 #define ALIGN_MASK (ALIGN_SIZE - 1)
 /* Total number of all probability variables */
 #define PROBS_TOTAL (1846 + LITERAL_CODERS_MAX * LITERAL_CODER_SIZE)
 /*
 * LZMA remembers the four most recent match distances. Reusing these
 * distances tends to take less space than re-encoding the actual
 * distance value.
 */
 #define REPS 4
 #endif
--- a/lib/xz/xz_private.h
+++ b/lib/xz/xz_private.h
@ -0,0 +1,156 @@
 /*
 * Private includes and definitions
 *
 * Author: Lasse Collin <lasse.collin@tukaani.org>
 *
 * This file has been put into the public domain.
 * You can do whatever you want with this file.
 */
 #ifndef XZ_PRIVATE_H
 #define XZ_PRIVATE_H
 #ifdef __KERNEL__
 #	include <linux/xz.h>
 #	include <linux/kernel.h>
 #	include <asm/unaligned.h>
 	/* XZ_PREBOOT may be defined only via decompress_unxz.c. */
 #	ifndef XZ_PREBOOT
 #		include <string.h>
 #		include <malloc.h>
 #		include <asm/byteorder.h>
 #		ifdef CONFIG_XZ_DEC_X86
 #			define XZ_DEC_X86
 #		endif
 #		ifdef CONFIG_XZ_DEC_POWERPC
 #			define XZ_DEC_POWERPC
 #		endif
 #		ifdef CONFIG_XZ_DEC_IA64
 #			define XZ_DEC_IA64
 #		endif
 #		ifdef CONFIG_XZ_DEC_ARM
 #			define XZ_DEC_ARM
 #		endif
 #		ifdef CONFIG_XZ_DEC_ARMTHUMB
 #			define XZ_DEC_ARMTHUMB
 #		endif
 #		ifdef CONFIG_XZ_DEC_SPARC
 #			define XZ_DEC_SPARC
 #		endif
 #		define memeq(a, b, size) (memcmp(a, b, size) == 0)
 #		define memzero(buf, size) memset(buf, 0, size)
 #	endif
 #	define get_le32(p) le32_to_cpup((const uint32_t *)(p))
 #else
 	/*
 	 * For userspace builds, use a separate header to define the required
 	 * macros and functions. This makes it easier to adapt the code into
 	 * different environments and avoids clutter in the Linux kernel tree.
 	 */
 #	include "xz_config.h"
 #endif
 /* If no specific decoding mode is requested, enable support for all modes. */
 #if !defined(XZ_DEC_SINGLE) && !defined(XZ_DEC_PREALLOC) \
 		&& !defined(XZ_DEC_DYNALLOC)
 #	define XZ_DEC_SINGLE
 #	define XZ_DEC_PREALLOC
 #	define XZ_DEC_DYNALLOC
 #endif
 /*
 * The DEC_IS_foo(mode) macros are used in "if" statements. If only some
 * of the supported modes are enabled, these macros will evaluate to true or
 * false at compile time and thus allow the compiler to omit unneeded code.
 */
 #ifdef XZ_DEC_SINGLE
 #	define DEC_IS_SINGLE(mode) ((mode) == XZ_SINGLE)
 #else
 #	define DEC_IS_SINGLE(mode) (false)
 #endif
 #ifdef XZ_DEC_PREALLOC
 #	define DEC_IS_PREALLOC(mode) ((mode) == XZ_PREALLOC)
 #else
 #	define DEC_IS_PREALLOC(mode) (false)
 #endif
 #ifdef XZ_DEC_DYNALLOC
 #	define DEC_IS_DYNALLOC(mode) ((mode) == XZ_DYNALLOC)
 #else
 #	define DEC_IS_DYNALLOC(mode) (false)
 #endif
 #if !defined(XZ_DEC_SINGLE)
 #	define DEC_IS_MULTI(mode) (true)
 #elif defined(XZ_DEC_PREALLOC) || defined(XZ_DEC_DYNALLOC)
 #	define DEC_IS_MULTI(mode) ((mode) != XZ_SINGLE)
 #else
 #	define DEC_IS_MULTI(mode) (false)
 #endif
 /*
 * If any of the BCJ filter decoders are wanted, define XZ_DEC_BCJ.
 * XZ_DEC_BCJ is used to enable generic support for BCJ decoders.
 */
 #ifndef XZ_DEC_BCJ
 #	if defined(XZ_DEC_X86) || defined(XZ_DEC_POWERPC) \
 			|| defined(XZ_DEC_IA64) || defined(XZ_DEC_ARM) \
 			|| defined(XZ_DEC_ARM) || defined(XZ_DEC_ARMTHUMB) \
 			|| defined(XZ_DEC_SPARC)
 #		define XZ_DEC_BCJ
 #	endif
 #endif
 /*
 * Allocate memory for LZMA2 decoder. xz_dec_lzma2_reset() must be used
 * before calling xz_dec_lzma2_run().
 */
 XZ_EXTERN struct xz_dec_lzma2 *xz_dec_lzma2_create(enum xz_mode mode,
 						   uint32_t dict_max);
 /*
 * Decode the LZMA2 properties (one byte) and reset the decoder. Return
 * XZ_OK on success, XZ_MEMLIMIT_ERROR if the preallocated dictionary is not
 * big enough, and XZ_OPTIONS_ERROR if props indicates something that this
 * decoder doesn't support.
 */
 XZ_EXTERN enum xz_ret xz_dec_lzma2_reset(struct xz_dec_lzma2 *s,
 					 uint8_t props);
 /* Decode raw LZMA2 stream from b->in to b->out. */
 XZ_EXTERN enum xz_ret xz_dec_lzma2_run(struct xz_dec_lzma2 *s,
 				       struct xz_buf *b);
 /* Free the memory allocated for the LZMA2 decoder. */
 XZ_EXTERN void xz_dec_lzma2_end(struct xz_dec_lzma2 *s);
 #ifdef XZ_DEC_BCJ
 /*
 * Allocate memory for BCJ decoders. xz_dec_bcj_reset() must be used before
 * calling xz_dec_bcj_run().
 */
 XZ_EXTERN struct xz_dec_bcj *xz_dec_bcj_create(bool single_call);
 /*
 * Decode the Filter ID of a BCJ filter. This implementation doesn't
 * support custom start offsets, so no decoding of Filter Properties
 * is needed. Returns XZ_OK if the given Filter ID is supported.
 * Otherwise XZ_OPTIONS_ERROR is returned.
 */
 XZ_EXTERN enum xz_ret xz_dec_bcj_reset(struct xz_dec_bcj *s, uint8_t id);
 /*
 * Decode raw BCJ + LZMA2 stream. This must be used only if there actually is
 * a BCJ filter in the chain. If the chain has only LZMA2, xz_dec_lzma2_run()
 * must be called directly.
 */
 XZ_EXTERN enum xz_ret xz_dec_bcj_run(struct xz_dec_bcj *s,
 				     struct xz_dec_lzma2 *lzma2,
 				     struct xz_buf *b);
 /* Free the memory allocated for the BCJ filters. */
 #define xz_dec_bcj_end(s) kfree(s)
 #endif
 #endif
--- a/lib/xz/xz_stream.h
+++ b/lib/xz/xz_stream.h
@ -0,0 +1,61 @@
 /*
 * Definitions for handling the .xz file format
 *
 * Author: Lasse Collin <lasse.collin@tukaani.org>
 *
 * This file has been put into the public domain.
 * You can do whatever you want with this file.
 */
 #ifndef XZ_STREAM_H
 #define XZ_STREAM_H
 #if defined(__KERNEL__) && !XZ_INTERNAL_CRC32
 #	undef crc32
 #	define xz_crc32(buf, size, crc) \
 		(~crc32_le(~(uint32_t)(crc), buf, size))
 #endif
 /*
 * See the .xz file format specification at
 * http://tukaani.org/xz/xz-file-format.txt
 * to understand the container format.
 */
 #define STREAM_HEADER_SIZE 12
 #define HEADER_MAGIC "\3757zXZ"
 #define HEADER_MAGIC_SIZE 6
 #define FOOTER_MAGIC "YZ"
 #define FOOTER_MAGIC_SIZE 2
 /*
 * Variable-length integer can hold a 63-bit unsigned integer or a special
 * value indicating that the value is unknown.
 *
 * Experimental: vli_type can be defined to uint32_t to save a few bytes
 * in code size (no effect on speed). Doing so limits the uncompressed and
 * compressed size of the file to less than 256 MiB and may also weaken
 * error detection slightly.
 */
 typedef uint64_t vli_type;
 #define VLI_MAX ((vli_type)-1 / 2)
 #define VLI_UNKNOWN ((vli_type)-1)
 /* Maximum encoded size of a VLI */
 #define VLI_BYTES_MAX (sizeof(vli_type) * 8 / 7)
 /* Integrity Check types */
 enum xz_check {
 	XZ_CHECK_NONE = 0,
 	XZ_CHECK_CRC32 = 1,
 	XZ_CHECK_CRC64 = 4,
 	XZ_CHECK_SHA256 = 10
 };
 /* Maximum possible Check ID */
 #define XZ_CHECK_MAX 15
 #endif
--- a/pbl/Kconfig
+++ b/pbl/Kconfig
@ -64,6 +64,9 @@ config IMAGE_COMPRESSION_LZO
 config IMAGE_COMPRESSION_GZIP
 	bool "gzip"
 config IMAGE_COMPRESSION_XZKERN
 	bool "xz"
 config IMAGE_COMPRESSION_NONE
 	bool "none"
--- a/pbl/decomp.c
+++ b/pbl/decomp.c
@ -22,6 +22,10 @@
 #include "../../../lib/decompress_inflate.c"
 #endif
 #ifdef CONFIG_IMAGE_COMPRESSION_XZKERN
 #include "../../../lib/decompress_unxz.c"
 #endif
 #ifdef CONFIG_IMAGE_COMPRESSION_NONE
 STATIC int decompress(u8 *input, int in_len,
 				int (*fill) (void *, unsigned int),
--- a/scripts/Makefile.lib
+++ b/scripts/Makefile.lib
@ -205,6 +205,37 @@ cmd_gzip = (cat $(filter-out FORCE,$^) | gzip -n -f -9 > $@) || \
 %.gz: %
 	$(call if_changed,gzip)
 # XZ
 # ---------------------------------------------------------------------------
 # Use xzkern to compress the kernel image and xzmisc to compress other things.
 #
 # xzkern uses a big LZMA2 dictionary since it doesn't increase memory usage
 # of the kernel decompressor. A BCJ filter is used if it is available for
 # the target architecture. xzkern also appends uncompressed size of the data
 # using size_append. The .xz format has the size information available at
 # the end of the file too, but it's in more complex format and it's good to
 # avoid changing the part of the boot code that reads the uncompressed size.
 # Note that the bytes added by size_append will make the xz tool think that
 # the file is corrupt. This is expected.
 #
 # xzmisc doesn't use size_append, so it can be used to create normal .xz
 # files. xzmisc uses smaller LZMA2 dictionary than xzkern, because a very
 # big dictionary would increase the memory usage too much in the multi-call
 # decompression mode. A BCJ filter isn't used either.
 quiet_cmd_xzkern = XZKERN  $@
 cmd_xzkern = (cat $(filter-out FORCE,$^) | \
 	sh $(srctree)/scripts/xz_wrap.sh && \
 	$(call size_append, $(filter-out FORCE,$^))) > $@ || \
 	(rm -f $@ ; false)
 quiet_cmd_xzmisc = XZMISC  $@
 cmd_xzmisc = (cat $(filter-out FORCE,$^) | \
 	xz --check=crc32 --lzma2=dict=1MiB) > $@ || \
 	(rm -f $@ ; false)
 %.xzkern: %
 	$(call if_changed,xzkern)
 # DTC
 # ---------------------------------------------------------------------------
--- a/scripts/xz_wrap.sh
+++ b/scripts/xz_wrap.sh
@ -0,0 +1,23 @@
 #!/bin/sh
 #
 # This is a wrapper for xz to compress the kernel image using appropriate
 # compression options depending on the architecture.
 #
 # Author: Lasse Collin <lasse.collin@tukaani.org>
 #
 # This file has been put into the public domain.
 # You can do whatever you want with this file.
 #
 BCJ=
 LZMA2OPTS=
 case $SRCARCH in
 	x86)            BCJ=--x86 ;;
 	powerpc)        BCJ=--powerpc ;;
 	ia64)           BCJ=--ia64; LZMA2OPTS=pb=4 ;;
 	arm)            BCJ=--arm ;;
 	sparc)          BCJ=--sparc ;;
 esac
 exec xz --check=crc32 $BCJ --lzma2=$LZMA2OPTS,dict=32MiB
		`@ -0,0 +1,2 @@`
							`obj-$(CONFIG_XZ_DECOMPRESS) += xz_crc32.o xz_dec_bcj.o`
							`obj-$(CONFIG_XZ_DECOMPRESS) += xz_dec_lzma2.o xz_dec_stream.o`