From 64c70b1cf43de158282bc1675918d503e5b15cc1 Mon Sep 17 00:00:00 2001 From: Richard Purdie Date: Tue, 10 Jul 2007 17:22:24 -0700 Subject: [PATCH] Add LZO1X algorithm to the kernel This is a hybrid version of the patch to add the LZO1X compression algorithm to the kernel. Nitin and myself have merged the best parts of the various patches to form this version which we're both happy with (and are jointly signing off). The performance of this version is equivalent to the original minilzo code it was based on. Bytecode comparisons have also been made on ARM, i386 and x86_64 with favourable results. There are several users of LZO lined up including jffs2, crypto and reiser4 since its much faster than zlib. Signed-off-by: Nitin Gupta Signed-off-by: Richard Purdie Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/lzo.h | 44 +++++++ lib/Kconfig | 6 + lib/Makefile | 2 + lib/lzo/Makefile | 5 + lib/lzo/lzo1x_compress.c | 226 +++++++++++++++++++++++++++++++++ lib/lzo/lzo1x_decompress.c | 254 +++++++++++++++++++++++++++++++++++++ lib/lzo/lzodefs.h | 43 +++++++ 7 files changed, 580 insertions(+) create mode 100644 include/linux/lzo.h create mode 100644 lib/lzo/Makefile create mode 100644 lib/lzo/lzo1x_compress.c create mode 100644 lib/lzo/lzo1x_decompress.c create mode 100644 lib/lzo/lzodefs.h diff --git a/include/linux/lzo.h b/include/linux/lzo.h new file mode 100644 index 000000000000..582d8b711a13 --- /dev/null +++ b/include/linux/lzo.h @@ -0,0 +1,44 @@ +#ifndef __LZO_H__ +#define __LZO_H__ +/* + * LZO Public Kernel Interface + * A mini subset of the LZO real-time data compression library + * + * Copyright (C) 1996-2005 Markus F.X.J. Oberhumer + * + * The full LZO package can be found at: + * http://www.oberhumer.com/opensource/lzo/ + * + * Changed for kernel use by: + * Nitin Gupta + * Richard Purdie + */ + +#define LZO1X_MEM_COMPRESS (16384 * sizeof(unsigned char *)) +#define LZO1X_1_MEM_COMPRESS LZO1X_MEM_COMPRESS + +#define lzo1x_worst_compress(x) (x + (x / 64) + 16 + 3) + +/* This requires 'workmem' of size LZO1X_1_MEM_COMPRESS */ +int lzo1x_1_compress(const unsigned char *src, size_t src_len, + unsigned char *dst, size_t *dst_len, void *wrkmem); + +/* safe decompression with overrun testing */ +int lzo1x_decompress_safe(const unsigned char *src, size_t src_len, + unsigned char *dst, size_t *dst_len); + +/* + * Return values (< 0 = Error) + */ +#define LZO_E_OK 0 +#define LZO_E_ERROR (-1) +#define LZO_E_OUT_OF_MEMORY (-2) +#define LZO_E_NOT_COMPRESSIBLE (-3) +#define LZO_E_INPUT_OVERRUN (-4) +#define LZO_E_OUTPUT_OVERRUN (-5) +#define LZO_E_LOOKBEHIND_OVERRUN (-6) +#define LZO_E_EOF_NOT_FOUND (-7) +#define LZO_E_INPUT_NOT_CONSUMED (-8) +#define LZO_E_NOT_YET_IMPLEMENTED (-9) + +#endif diff --git a/lib/Kconfig b/lib/Kconfig index 2e7ae6b9215b..3eb29d5dc4f5 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -64,6 +64,12 @@ config ZLIB_INFLATE config ZLIB_DEFLATE tristate +config LZO_COMPRESS + tristate + +config LZO_DECOMPRESS + tristate + # # Generic allocator support is selected if needed # diff --git a/lib/Makefile b/lib/Makefile index c8c8e20784ce..d1b366bdf86e 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -49,6 +49,8 @@ obj-$(CONFIG_GENERIC_ALLOCATOR) += genalloc.o obj-$(CONFIG_ZLIB_INFLATE) += zlib_inflate/ obj-$(CONFIG_ZLIB_DEFLATE) += zlib_deflate/ obj-$(CONFIG_REED_SOLOMON) += reed_solomon/ +obj-$(CONFIG_LZO_COMPRESS) += lzo/ +obj-$(CONFIG_LZO_DECOMPRESS) += lzo/ obj-$(CONFIG_TEXTSEARCH) += textsearch.o obj-$(CONFIG_TEXTSEARCH_KMP) += ts_kmp.o diff --git a/lib/lzo/Makefile b/lib/lzo/Makefile new file mode 100644 index 000000000000..e764116ea12d --- /dev/null +++ b/lib/lzo/Makefile @@ -0,0 +1,5 @@ +lzo_compress-objs := lzo1x_compress.o +lzo_decompress-objs := lzo1x_decompress.o + +obj-$(CONFIG_LZO_COMPRESS) += lzo_compress.o +obj-$(CONFIG_LZO_DECOMPRESS) += lzo_decompress.o diff --git a/lib/lzo/lzo1x_compress.c b/lib/lzo/lzo1x_compress.c new file mode 100644 index 000000000000..c935f00073e9 --- /dev/null +++ b/lib/lzo/lzo1x_compress.c @@ -0,0 +1,226 @@ +/* + * LZO1X Compressor from MiniLZO + * + * Copyright (C) 1996-2005 Markus F.X.J. Oberhumer + * + * The full LZO package can be found at: + * http://www.oberhumer.com/opensource/lzo/ + * + * Changed for kernel use by: + * Nitin Gupta + * Richard Purdie + */ + +#include +#include +#include +#include +#include "lzodefs.h" + +static noinline size_t +_lzo1x_1_do_compress(const unsigned char *in, size_t in_len, + unsigned char *out, size_t *out_len, void *wrkmem) +{ + const unsigned char * const in_end = in + in_len; + const unsigned char * const ip_end = in + in_len - M2_MAX_LEN - 5; + const unsigned char ** const dict = wrkmem; + const unsigned char *ip = in, *ii = ip; + const unsigned char *end, *m, *m_pos; + size_t m_off, m_len, dindex; + unsigned char *op = out; + + ip += 4; + + for (;;) { + dindex = ((0x21 * DX3(ip, 5, 5, 6)) >> 5) & D_MASK; + m_pos = dict[dindex]; + + if (m_pos < in) + goto literal; + + if (ip == m_pos || (ip - m_pos) > M4_MAX_OFFSET) + goto literal; + + m_off = ip - m_pos; + if (m_off <= M2_MAX_OFFSET || m_pos[3] == ip[3]) + goto try_match; + + dindex = (dindex & (D_MASK & 0x7ff)) ^ (D_HIGH | 0x1f); + m_pos = dict[dindex]; + + if (m_pos < in) + goto literal; + + if (ip == m_pos || (ip - m_pos) > M4_MAX_OFFSET) + goto literal; + + m_off = ip - m_pos; + if (m_off <= M2_MAX_OFFSET || m_pos[3] == ip[3]) + goto try_match; + + goto literal; + +try_match: + if (get_unaligned((const unsigned short *)m_pos) + == get_unaligned((const unsigned short *)ip)) { + if (likely(m_pos[2] == ip[2])) + goto match; + } + +literal: + dict[dindex] = ip; + ++ip; + if (unlikely(ip >= ip_end)) + break; + continue; + +match: + dict[dindex] = ip; + if (ip != ii) { + size_t t = ip - ii; + + if (t <= 3) { + op[-2] |= t; + } else if (t <= 18) { + *op++ = (t - 3); + } else { + size_t tt = t - 18; + + *op++ = 0; + while (tt > 255) { + tt -= 255; + *op++ = 0; + } + *op++ = tt; + } + do { + *op++ = *ii++; + } while (--t > 0); + } + + ip += 3; + if (m_pos[3] != *ip++ || m_pos[4] != *ip++ + || m_pos[5] != *ip++ || m_pos[6] != *ip++ + || m_pos[7] != *ip++ || m_pos[8] != *ip++) { + --ip; + m_len = ip - ii; + + if (m_off <= M2_MAX_OFFSET) { + m_off -= 1; + *op++ = (((m_len - 1) << 5) + | ((m_off & 7) << 2)); + *op++ = (m_off >> 3); + } else if (m_off <= M3_MAX_OFFSET) { + m_off -= 1; + *op++ = (M3_MARKER | (m_len - 2)); + goto m3_m4_offset; + } else { + m_off -= 0x4000; + + *op++ = (M4_MARKER | ((m_off & 0x4000) >> 11) + | (m_len - 2)); + goto m3_m4_offset; + } + } else { + end = in_end; + m = m_pos + M2_MAX_LEN + 1; + + while (ip < end && *m == *ip) { + m++; + ip++; + } + m_len = ip - ii; + + if (m_off <= M3_MAX_OFFSET) { + m_off -= 1; + if (m_len <= 33) { + *op++ = (M3_MARKER | (m_len - 2)); + } else { + m_len -= 33; + *op++ = M3_MARKER | 0; + goto m3_m4_len; + } + } else { + m_off -= 0x4000; + if (m_len <= M4_MAX_LEN) { + *op++ = (M4_MARKER + | ((m_off & 0x4000) >> 11) + | (m_len - 2)); + } else { + m_len -= M4_MAX_LEN; + *op++ = (M4_MARKER + | ((m_off & 0x4000) >> 11)); +m3_m4_len: + while (m_len > 255) { + m_len -= 255; + *op++ = 0; + } + + *op++ = (m_len); + } + } +m3_m4_offset: + *op++ = ((m_off & 63) << 2); + *op++ = (m_off >> 6); + } + + ii = ip; + if (unlikely(ip >= ip_end)) + break; + } + + *out_len = op - out; + return in_end - ii; +} + +int lzo1x_1_compress(const unsigned char *in, size_t in_len, unsigned char *out, + size_t *out_len, void *wrkmem) +{ + const unsigned char *ii; + unsigned char *op = out; + size_t t; + + if (unlikely(in_len <= M2_MAX_LEN + 5)) { + t = in_len; + } else { + t = _lzo1x_1_do_compress(in, in_len, op, out_len, wrkmem); + op += *out_len; + } + + if (t > 0) { + ii = in + in_len - t; + + if (op == out && t <= 238) { + *op++ = (17 + t); + } else if (t <= 3) { + op[-2] |= t; + } else if (t <= 18) { + *op++ = (t - 3); + } else { + size_t tt = t - 18; + + *op++ = 0; + while (tt > 255) { + tt -= 255; + *op++ = 0; + } + + *op++ = tt; + } + do { + *op++ = *ii++; + } while (--t > 0); + } + + *op++ = M4_MARKER | 1; + *op++ = 0; + *op++ = 0; + + *out_len = op - out; + return LZO_E_OK; +} +EXPORT_SYMBOL_GPL(lzo1x_1_compress); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("LZO1X-1 Compressor"); + diff --git a/lib/lzo/lzo1x_decompress.c b/lib/lzo/lzo1x_decompress.c new file mode 100644 index 000000000000..9dc7056e5520 --- /dev/null +++ b/lib/lzo/lzo1x_decompress.c @@ -0,0 +1,254 @@ +/* + * LZO1X Decompressor from MiniLZO + * + * Copyright (C) 1996-2005 Markus F.X.J. Oberhumer + * + * The full LZO package can be found at: + * http://www.oberhumer.com/opensource/lzo/ + * + * Changed for kernel use by: + * Nitin Gupta + * Richard Purdie + */ + +#include +#include +#include +#include +#include +#include "lzodefs.h" + +#define HAVE_IP(x, ip_end, ip) ((size_t)(ip_end - ip) < (x)) +#define HAVE_OP(x, op_end, op) ((size_t)(op_end - op) < (x)) +#define HAVE_LB(m_pos, out, op) (m_pos < out || m_pos >= op) + +#define COPY4(dst, src) \ + put_unaligned(get_unaligned((const u32 *)(src)), (u32 *)(dst)) + +int lzo1x_decompress_safe(const unsigned char *in, size_t in_len, + unsigned char *out, size_t *out_len) +{ + const unsigned char * const ip_end = in + in_len; + unsigned char * const op_end = out + *out_len; + const unsigned char *ip = in, *m_pos; + unsigned char *op = out; + size_t t; + + *out_len = 0; + + if (*ip > 17) { + t = *ip++ - 17; + if (t < 4) + goto match_next; + if (HAVE_OP(t, op_end, op)) + goto output_overrun; + if (HAVE_IP(t + 1, ip_end, ip)) + goto input_overrun; + do { + *op++ = *ip++; + } while (--t > 0); + goto first_literal_run; + } + + while ((ip < ip_end)) { + t = *ip++; + if (t >= 16) + goto match; + if (t == 0) { + if (HAVE_IP(1, ip_end, ip)) + goto input_overrun; + while (*ip == 0) { + t += 255; + ip++; + if (HAVE_IP(1, ip_end, ip)) + goto input_overrun; + } + t += 15 + *ip++; + } + if (HAVE_OP(t + 3, op_end, op)) + goto output_overrun; + if (HAVE_IP(t + 4, ip_end, ip)) + goto input_overrun; + + COPY4(op, ip); + op += 4; + ip += 4; + if (--t > 0) { + if (t >= 4) { + do { + COPY4(op, ip); + op += 4; + ip += 4; + t -= 4; + } while (t >= 4); + if (t > 0) { + do { + *op++ = *ip++; + } while (--t > 0); + } + } else { + do { + *op++ = *ip++; + } while (--t > 0); + } + } + +first_literal_run: + t = *ip++; + if (t >= 16) + goto match; + m_pos = op - (1 + M2_MAX_OFFSET); + m_pos -= t >> 2; + m_pos -= *ip++ << 2; + + if (HAVE_LB(m_pos, out, op)) + goto lookbehind_overrun; + + if (HAVE_OP(3, op_end, op)) + goto output_overrun; + *op++ = *m_pos++; + *op++ = *m_pos++; + *op++ = *m_pos; + + goto match_done; + + do { +match: + if (t >= 64) { + m_pos = op - 1; + m_pos -= (t >> 2) & 7; + m_pos -= *ip++ << 3; + t = (t >> 5) - 1; + if (HAVE_LB(m_pos, out, op)) + goto lookbehind_overrun; + if (HAVE_OP(t + 3 - 1, op_end, op)) + goto output_overrun; + goto copy_match; + } else if (t >= 32) { + t &= 31; + if (t == 0) { + if (HAVE_IP(1, ip_end, ip)) + goto input_overrun; + while (*ip == 0) { + t += 255; + ip++; + if (HAVE_IP(1, ip_end, ip)) + goto input_overrun; + } + t += 31 + *ip++; + } + m_pos = op - 1; + m_pos -= le16_to_cpu(get_unaligned( + (const unsigned short *)ip)) >> 2; + ip += 2; + } else if (t >= 16) { + m_pos = op; + m_pos -= (t & 8) << 11; + + t &= 7; + if (t == 0) { + if (HAVE_IP(1, ip_end, ip)) + goto input_overrun; + while (*ip == 0) { + t += 255; + ip++; + if (HAVE_IP(1, ip_end, ip)) + goto input_overrun; + } + t += 7 + *ip++; + } + m_pos -= le16_to_cpu(get_unaligned( + (const unsigned short *)ip) >> 2); + ip += 2; + if (m_pos == op) + goto eof_found; + m_pos -= 0x4000; + } else { + m_pos = op - 1; + m_pos -= t >> 2; + m_pos -= *ip++ << 2; + + if (HAVE_LB(m_pos, out, op)) + goto lookbehind_overrun; + if (HAVE_OP(2, op_end, op)) + goto output_overrun; + + *op++ = *m_pos++; + *op++ = *m_pos; + goto match_done; + } + + if (HAVE_LB(m_pos, out, op)) + goto lookbehind_overrun; + if (HAVE_OP(t + 3 - 1, op_end, op)) + goto output_overrun; + + if (t >= 2 * 4 - (3 - 1) && (op - m_pos) >= 4) { + COPY4(op, m_pos); + op += 4; + m_pos += 4; + t -= 4 - (3 - 1); + do { + COPY4(op, m_pos); + op += 4; + m_pos += 4; + t -= 4; + } while (t >= 4); + if (t > 0) + do { + *op++ = *m_pos++; + } while (--t > 0); + } else { +copy_match: + *op++ = *m_pos++; + *op++ = *m_pos++; + do { + *op++ = *m_pos++; + } while (--t > 0); + } +match_done: + t = ip[-2] & 3; + if (t == 0) + break; +match_next: + if (HAVE_OP(t, op_end, op)) + goto output_overrun; + if (HAVE_IP(t + 1, ip_end, ip)) + goto input_overrun; + + *op++ = *ip++; + if (t > 1) { + *op++ = *ip++; + if (t > 2) + *op++ = *ip++; + } + + t = *ip++; + } while (ip < ip_end); + } + + *out_len = op - out; + return LZO_E_EOF_NOT_FOUND; + +eof_found: + *out_len = op - out; + return (ip == ip_end ? LZO_E_OK : + (ip < ip_end ? LZO_E_INPUT_NOT_CONSUMED : LZO_E_INPUT_OVERRUN)); +input_overrun: + *out_len = op - out; + return LZO_E_INPUT_OVERRUN; + +output_overrun: + *out_len = op - out; + return LZO_E_OUTPUT_OVERRUN; + +lookbehind_overrun: + *out_len = op - out; + return LZO_E_LOOKBEHIND_OVERRUN; +} + +EXPORT_SYMBOL_GPL(lzo1x_decompress_safe); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("LZO1X Decompressor"); + diff --git a/lib/lzo/lzodefs.h b/lib/lzo/lzodefs.h new file mode 100644 index 000000000000..b6d482c492ef --- /dev/null +++ b/lib/lzo/lzodefs.h @@ -0,0 +1,43 @@ +/* + * lzodefs.h -- architecture, OS and compiler specific defines + * + * Copyright (C) 1996-2005 Markus F.X.J. Oberhumer + * + * The full LZO package can be found at: + * http://www.oberhumer.com/opensource/lzo/ + * + * Changed for kernel use by: + * Nitin Gupta + * Richard Purdie + */ + +#define LZO_VERSION 0x2020 +#define LZO_VERSION_STRING "2.02" +#define LZO_VERSION_DATE "Oct 17 2005" + +#define M1_MAX_OFFSET 0x0400 +#define M2_MAX_OFFSET 0x0800 +#define M3_MAX_OFFSET 0x4000 +#define M4_MAX_OFFSET 0xbfff + +#define M1_MIN_LEN 2 +#define M1_MAX_LEN 2 +#define M2_MIN_LEN 3 +#define M2_MAX_LEN 8 +#define M3_MIN_LEN 3 +#define M3_MAX_LEN 33 +#define M4_MIN_LEN 3 +#define M4_MAX_LEN 9 + +#define M1_MARKER 0 +#define M2_MARKER 64 +#define M3_MARKER 32 +#define M4_MARKER 16 + +#define D_BITS 14 +#define D_MASK ((1u << D_BITS) - 1) +#define D_HIGH ((D_MASK >> 1) + 1) + +#define DX2(p, s1, s2) (((((size_t)((p)[2]) << (s2)) ^ (p)[1]) \ + << (s1)) ^ (p)[0]) +#define DX3(p, s1, s2, s3) ((DX2((p)+1, s2, s3) << (s1)) ^ (p)[0])