diff options
Diffstat (limited to 'src/libs/3rdparty/libarchive/archive_read_support_format_lha.c')
-rw-r--r-- | src/libs/3rdparty/libarchive/archive_read_support_format_lha.c | 2919 |
1 files changed, 2919 insertions, 0 deletions
diff --git a/src/libs/3rdparty/libarchive/archive_read_support_format_lha.c b/src/libs/3rdparty/libarchive/archive_read_support_format_lha.c new file mode 100644 index 000000000..1c64b2900 --- /dev/null +++ b/src/libs/3rdparty/libarchive/archive_read_support_format_lha.c @@ -0,0 +1,2919 @@ +/*- + * Copyright (c) 2008-2014 Michihiro NAKAJIMA + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "archive_platform.h" + +#ifdef HAVE_ERRNO_H +#include <errno.h> +#endif +#ifdef HAVE_LIMITS_H +#include <limits.h> +#endif +#ifdef HAVE_STDLIB_H +#include <stdlib.h> +#endif +#ifdef HAVE_STRING_H +#include <string.h> +#endif + +#include "archive.h" +#include "archive_entry.h" +#include "archive_entry_locale.h" +#include "archive_private.h" +#include "archive_read_private.h" +#include "archive_endian.h" + + +#define MAXMATCH 256 /* Maximum match length. */ +#define MINMATCH 3 /* Minimum match length. */ +/* + * Literal table format: + * +0 +256 +510 + * +---------------+-------------------------+ + * | literal code | match length | + * | 0 ... 255 | MINMATCH ... MAXMATCH | + * +---------------+-------------------------+ + * <--- LT_BITLEN_SIZE ---> + */ +/* Literal table size. */ +#define LT_BITLEN_SIZE (UCHAR_MAX + 1 + MAXMATCH - MINMATCH + 1) +/* Position table size. + * Note: this used for both position table and pre literal table.*/ +#define PT_BITLEN_SIZE (3 + 16) + +struct lzh_dec { + /* Decoding status. */ + int state; + + /* + * Window to see last 8Ki(lh5),32Ki(lh6),64Ki(lh7) bytes of decoded + * data. + */ + int w_size; + int w_mask; + /* Window buffer, which is a loop buffer. */ + unsigned char *w_buff; + /* The insert position to the window. */ + int w_pos; + /* The position where we can copy decoded code from the window. */ + int copy_pos; + /* The length how many bytes we can copy decoded code from + * the window. */ + int copy_len; + + /* + * Bit stream reader. + */ + struct lzh_br { +#define CACHE_TYPE uint64_t +#define CACHE_BITS (8 * sizeof(CACHE_TYPE)) + /* Cache buffer. */ + CACHE_TYPE cache_buffer; + /* Indicates how many bits avail in cache_buffer. */ + int cache_avail; + } br; + + /* + * Huffman coding. + */ + struct huffman { + int len_size; + int len_avail; + int len_bits; + int freq[17]; + unsigned char *bitlen; + + /* + * Use a index table. It's faster than searching a huffman + * coding tree, which is a binary tree. But a use of a large + * index table causes L1 cache read miss many times. + */ +#define HTBL_BITS 10 + int max_bits; + int shift_bits; + int tbl_bits; + int tree_used; + int tree_avail; + /* Direct access table. */ + uint16_t *tbl; + /* Binary tree table for extra bits over the direct access. */ + struct htree_t { + uint16_t left; + uint16_t right; + } *tree; + } lt, pt; + + int blocks_avail; + int pos_pt_len_size; + int pos_pt_len_bits; + int literal_pt_len_size; + int literal_pt_len_bits; + int reading_position; + int loop; + int error; +}; + +struct lzh_stream { + const unsigned char *next_in; + int avail_in; + int64_t total_in; + const unsigned char *ref_ptr; + int avail_out; + int64_t total_out; + struct lzh_dec *ds; +}; + +struct lha { + /* entry_bytes_remaining is the number of bytes we expect. */ + int64_t entry_offset; + int64_t entry_bytes_remaining; + int64_t entry_unconsumed; + uint16_t entry_crc_calculated; + + size_t header_size; /* header size */ + unsigned char level; /* header level */ + char method[3]; /* compress type */ + int64_t compsize; /* compressed data size */ + int64_t origsize; /* original file size */ + int setflag; +#define BIRTHTIME_IS_SET 1 +#define ATIME_IS_SET 2 +#define UNIX_MODE_IS_SET 4 +#define CRC_IS_SET 8 + time_t birthtime; + long birthtime_tv_nsec; + time_t mtime; + long mtime_tv_nsec; + time_t atime; + long atime_tv_nsec; + mode_t mode; + int64_t uid; + int64_t gid; + struct archive_string uname; + struct archive_string gname; + uint16_t header_crc; + uint16_t crc; + /* dirname and filename could be in different codepages */ + struct archive_string_conv *sconv_dir; + struct archive_string_conv *sconv_fname; + struct archive_string_conv *opt_sconv; + + struct archive_string dirname; + struct archive_string filename; + struct archive_wstring ws; + + unsigned char dos_attr; + + /* Flag to mark progress that an archive was read their first header.*/ + char found_first_header; + /* Flag to mark that indicates an empty directory. */ + char directory; + + /* Flags to mark progress of decompression. */ + char decompress_init; + char end_of_entry; + char end_of_entry_cleanup; + char entry_is_compressed; + + char format_name[64]; + + struct lzh_stream strm; +}; + +/* + * LHA header common member offset. + */ +#define H_METHOD_OFFSET 2 /* Compress type. */ +#define H_ATTR_OFFSET 19 /* DOS attribute. */ +#define H_LEVEL_OFFSET 20 /* Header Level. */ +#define H_SIZE 22 /* Minimum header size. */ + +static int archive_read_format_lha_bid(struct archive_read *, int); +static int archive_read_format_lha_options(struct archive_read *, + const char *, const char *); +static int archive_read_format_lha_read_header(struct archive_read *, + struct archive_entry *); +static int archive_read_format_lha_read_data(struct archive_read *, + const void **, size_t *, int64_t *); +static int archive_read_format_lha_read_data_skip(struct archive_read *); +static int archive_read_format_lha_cleanup(struct archive_read *); + +static void lha_replace_path_separator(struct lha *, + struct archive_entry *); +static int lha_read_file_header_0(struct archive_read *, struct lha *); +static int lha_read_file_header_1(struct archive_read *, struct lha *); +static int lha_read_file_header_2(struct archive_read *, struct lha *); +static int lha_read_file_header_3(struct archive_read *, struct lha *); +static int lha_read_file_extended_header(struct archive_read *, + struct lha *, uint16_t *, int, size_t, size_t *); +static size_t lha_check_header_format(const void *); +static int lha_skip_sfx(struct archive_read *); +static time_t lha_dos_time(const unsigned char *); +static time_t lha_win_time(uint64_t, long *); +static unsigned char lha_calcsum(unsigned char, const void *, + int, size_t); +static int lha_parse_linkname(struct archive_wstring *, + struct archive_wstring *); +static int lha_read_data_none(struct archive_read *, const void **, + size_t *, int64_t *); +static int lha_read_data_lzh(struct archive_read *, const void **, + size_t *, int64_t *); +static void lha_crc16_init(void); +static uint16_t lha_crc16(uint16_t, const void *, size_t); +static int lzh_decode_init(struct lzh_stream *, const char *); +static void lzh_decode_free(struct lzh_stream *); +static int lzh_decode(struct lzh_stream *, int); +static int lzh_br_fillup(struct lzh_stream *, struct lzh_br *); +static int lzh_huffman_init(struct huffman *, size_t, int); +static void lzh_huffman_free(struct huffman *); +static int lzh_read_pt_bitlen(struct lzh_stream *, int start, int end); +static int lzh_make_fake_table(struct huffman *, uint16_t); +static int lzh_make_huffman_table(struct huffman *); +static inline int lzh_decode_huffman(struct huffman *, unsigned); +static int lzh_decode_huffman_tree(struct huffman *, unsigned, int); + + +int +archive_read_support_format_lha(struct archive *_a) +{ + struct archive_read *a = (struct archive_read *)_a; + struct lha *lha; + int r; + + archive_check_magic(_a, ARCHIVE_READ_MAGIC, + ARCHIVE_STATE_NEW, "archive_read_support_format_lha"); + + lha = (struct lha *)calloc(1, sizeof(*lha)); + if (lha == NULL) { + archive_set_error(&a->archive, ENOMEM, + "Can't allocate lha data"); + return (ARCHIVE_FATAL); + } + archive_string_init(&lha->ws); + + r = __archive_read_register_format(a, + lha, + "lha", + archive_read_format_lha_bid, + archive_read_format_lha_options, + archive_read_format_lha_read_header, + archive_read_format_lha_read_data, + archive_read_format_lha_read_data_skip, + NULL, + archive_read_format_lha_cleanup, + NULL, + NULL); + + if (r != ARCHIVE_OK) + free(lha); + return (ARCHIVE_OK); +} + +static size_t +lha_check_header_format(const void *h) +{ + const unsigned char *p = h; + size_t next_skip_bytes; + + switch (p[H_METHOD_OFFSET+3]) { + /* + * "-lh0-" ... "-lh7-" "-lhd-" + * "-lzs-" "-lz5-" + */ + case '0': case '1': case '2': case '3': + case '4': case '5': case '6': case '7': + case 'd': + case 's': + next_skip_bytes = 4; + + /* b0 == 0 means the end of an LHa archive file. */ + if (p[0] == 0) + break; + if (p[H_METHOD_OFFSET] != '-' || p[H_METHOD_OFFSET+1] != 'l' + || p[H_METHOD_OFFSET+4] != '-') + break; + + if (p[H_METHOD_OFFSET+2] == 'h') { + /* "-lh?-" */ + if (p[H_METHOD_OFFSET+3] == 's') + break; + if (p[H_LEVEL_OFFSET] == 0) + return (0); + if (p[H_LEVEL_OFFSET] <= 3 && p[H_ATTR_OFFSET] == 0x20) + return (0); + } + if (p[H_METHOD_OFFSET+2] == 'z') { + /* LArc extensions: -lzs-,-lz4- and -lz5- */ + if (p[H_LEVEL_OFFSET] != 0) + break; + if (p[H_METHOD_OFFSET+3] == 's' + || p[H_METHOD_OFFSET+3] == '4' + || p[H_METHOD_OFFSET+3] == '5') + return (0); + } + break; + case 'h': next_skip_bytes = 1; break; + case 'z': next_skip_bytes = 1; break; + case 'l': next_skip_bytes = 2; break; + case '-': next_skip_bytes = 3; break; + default : next_skip_bytes = 4; break; + } + + return (next_skip_bytes); +} + +static int +archive_read_format_lha_bid(struct archive_read *a, int best_bid) +{ + const char *p; + const void *buff; + ssize_t bytes_avail, offset, window; + size_t next; + + /* If there's already a better bid than we can ever + make, don't bother testing. */ + if (best_bid > 30) + return (-1); + + if ((p = __archive_read_ahead(a, H_SIZE, NULL)) == NULL) + return (-1); + + if (lha_check_header_format(p) == 0) + return (30); + + if (p[0] == 'M' && p[1] == 'Z') { + /* PE file */ + offset = 0; + window = 4096; + while (offset < (1024 * 20)) { + buff = __archive_read_ahead(a, offset + window, + &bytes_avail); + if (buff == NULL) { + /* Remaining bytes are less than window. */ + window >>= 1; + if (window < (H_SIZE + 3)) + return (0); + continue; + } + p = (const char *)buff + offset; + while (p + H_SIZE < (const char *)buff + bytes_avail) { + if ((next = lha_check_header_format(p)) == 0) + return (30); + p += next; + } + offset = p - (const char *)buff; + } + } + return (0); +} + +static int +archive_read_format_lha_options(struct archive_read *a, + const char *key, const char *val) +{ + struct lha *lha; + int ret = ARCHIVE_FAILED; + + lha = (struct lha *)(a->format->data); + if (strcmp(key, "hdrcharset") == 0) { + if (val == NULL || val[0] == 0) + archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, + "lha: hdrcharset option needs a character-set name"); + else { + lha->opt_sconv = + archive_string_conversion_from_charset( + &a->archive, val, 0); + if (lha->opt_sconv != NULL) + ret = ARCHIVE_OK; + else + ret = ARCHIVE_FATAL; + } + return (ret); + } + + /* Note: The "warn" return is just to inform the options + * supervisor that we didn't handle it. It will generate + * a suitable error if no one used this option. */ + return (ARCHIVE_WARN); +} + +static int +lha_skip_sfx(struct archive_read *a) +{ + const void *h; + const char *p, *q; + size_t next, skip; + ssize_t bytes, window; + + window = 4096; + for (;;) { + h = __archive_read_ahead(a, window, &bytes); + if (h == NULL) { + /* Remaining bytes are less than window. */ + window >>= 1; + if (window < (H_SIZE + 3)) + goto fatal; + continue; + } + if (bytes < H_SIZE) + goto fatal; + p = h; + q = p + bytes; + + /* + * Scan ahead until we find something that looks + * like the lha header. + */ + while (p + H_SIZE < q) { + if ((next = lha_check_header_format(p)) == 0) { + skip = p - (const char *)h; + __archive_read_consume(a, skip); + return (ARCHIVE_OK); + } + p += next; + } + skip = p - (const char *)h; + __archive_read_consume(a, skip); + } +fatal: + archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, + "Couldn't find out LHa header"); + return (ARCHIVE_FATAL); +} + +static int +truncated_error(struct archive_read *a) +{ + archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, + "Truncated LHa header"); + return (ARCHIVE_FATAL); +} + +static int +archive_read_format_lha_read_header(struct archive_read *a, + struct archive_entry *entry) +{ + struct archive_wstring linkname; + struct archive_wstring pathname; + struct lha *lha; + const unsigned char *p; + const char *signature; + int err; + struct archive_mstring conv_buffer; + const wchar_t *conv_buffer_p; + + lha_crc16_init(); + + a->archive.archive_format = ARCHIVE_FORMAT_LHA; + if (a->archive.archive_format_name == NULL) + a->archive.archive_format_name = "lha"; + + lha = (struct lha *)(a->format->data); + lha->decompress_init = 0; + lha->end_of_entry = 0; + lha->end_of_entry_cleanup = 0; + lha->entry_unconsumed = 0; + + if ((p = __archive_read_ahead(a, H_SIZE, NULL)) == NULL) { + /* + * LHa archiver added 0 to the tail of its archive file as + * the mark of the end of the archive. + */ + signature = __archive_read_ahead(a, sizeof(signature[0]), NULL); + if (signature == NULL || signature[0] == 0) + return (ARCHIVE_EOF); + return (truncated_error(a)); + } + + signature = (const char *)p; + if (lha->found_first_header == 0 && + signature[0] == 'M' && signature[1] == 'Z') { + /* This is an executable? Must be self-extracting... */ + err = lha_skip_sfx(a); + if (err < ARCHIVE_WARN) + return (err); + + if ((p = __archive_read_ahead(a, sizeof(*p), NULL)) == NULL) + return (truncated_error(a)); + signature = (const char *)p; + } + /* signature[0] == 0 means the end of an LHa archive file. */ + if (signature[0] == 0) + return (ARCHIVE_EOF); + + /* + * Check the header format and method type. + */ + if (lha_check_header_format(p) != 0) { + archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, + "Bad LHa file"); + return (ARCHIVE_FATAL); + } + + /* We've found the first header. */ + lha->found_first_header = 1; + /* Set a default value and common data */ + lha->header_size = 0; + lha->level = p[H_LEVEL_OFFSET]; + lha->method[0] = p[H_METHOD_OFFSET+1]; + lha->method[1] = p[H_METHOD_OFFSET+2]; + lha->method[2] = p[H_METHOD_OFFSET+3]; + if (memcmp(lha->method, "lhd", 3) == 0) + lha->directory = 1; + else + lha->directory = 0; + if (memcmp(lha->method, "lh0", 3) == 0 || + memcmp(lha->method, "lz4", 3) == 0) + lha->entry_is_compressed = 0; + else + lha->entry_is_compressed = 1; + + lha->compsize = 0; + lha->origsize = 0; + lha->setflag = 0; + lha->birthtime = 0; + lha->birthtime_tv_nsec = 0; + lha->mtime = 0; + lha->mtime_tv_nsec = 0; + lha->atime = 0; + lha->atime_tv_nsec = 0; + lha->mode = (lha->directory)? 0777 : 0666; + lha->uid = 0; + lha->gid = 0; + archive_string_empty(&lha->dirname); + archive_string_empty(&lha->filename); + lha->dos_attr = 0; + if (lha->opt_sconv != NULL) { + lha->sconv_dir = lha->opt_sconv; + lha->sconv_fname = lha->opt_sconv; + } else { + lha->sconv_dir = NULL; + lha->sconv_fname = NULL; + } + + switch (p[H_LEVEL_OFFSET]) { + case 0: + err = lha_read_file_header_0(a, lha); + break; + case 1: + err = lha_read_file_header_1(a, lha); + break; + case 2: + err = lha_read_file_header_2(a, lha); + break; + case 3: + err = lha_read_file_header_3(a, lha); + break; + default: + archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, + "Unsupported LHa header level %d", p[H_LEVEL_OFFSET]); + err = ARCHIVE_FATAL; + break; + } + if (err < ARCHIVE_WARN) + return (err); + + + if (!lha->directory && archive_strlen(&lha->filename) == 0) + /* The filename has not been set */ + return (truncated_error(a)); + + /* + * Make a pathname from a dirname and a filename, after converting to Unicode. + * This is because codepages might differ between dirname and filename. + */ + archive_string_init(&pathname); + archive_string_init(&linkname); + archive_string_init(&conv_buffer.aes_mbs); + archive_string_init(&conv_buffer.aes_mbs_in_locale); + archive_string_init(&conv_buffer.aes_utf8); + archive_string_init(&conv_buffer.aes_wcs); + if (0 != archive_mstring_copy_mbs_len_l(&conv_buffer, lha->dirname.s, lha->dirname.length, lha->sconv_dir)) { + archive_set_error(&a->archive, + ARCHIVE_ERRNO_FILE_FORMAT, + "Pathname cannot be converted " + "from %s to Unicode.", + archive_string_conversion_charset_name(lha->sconv_dir)); + err = ARCHIVE_FATAL; + } else if (0 != archive_mstring_get_wcs(&a->archive, &conv_buffer, &conv_buffer_p)) + err = ARCHIVE_FATAL; + if (err == ARCHIVE_FATAL) { + archive_mstring_clean(&conv_buffer); + archive_wstring_free(&pathname); + archive_wstring_free(&linkname); + return (err); + } + archive_wstring_copy(&pathname, &conv_buffer.aes_wcs); + + archive_string_empty(&conv_buffer.aes_mbs); + archive_string_empty(&conv_buffer.aes_mbs_in_locale); + archive_string_empty(&conv_buffer.aes_utf8); + archive_wstring_empty(&conv_buffer.aes_wcs); + if (0 != archive_mstring_copy_mbs_len_l(&conv_buffer, lha->filename.s, lha->filename.length, lha->sconv_fname)) { + archive_set_error(&a->archive, + ARCHIVE_ERRNO_FILE_FORMAT, + "Pathname cannot be converted " + "from %s to Unicode.", + archive_string_conversion_charset_name(lha->sconv_fname)); + err = ARCHIVE_FATAL; + } + else if (0 != archive_mstring_get_wcs(&a->archive, &conv_buffer, &conv_buffer_p)) + err = ARCHIVE_FATAL; + if (err == ARCHIVE_FATAL) { + archive_mstring_clean(&conv_buffer); + archive_wstring_free(&pathname); + archive_wstring_free(&linkname); + return (err); + } + archive_wstring_concat(&pathname, &conv_buffer.aes_wcs); + archive_mstring_clean(&conv_buffer); + + if ((lha->mode & AE_IFMT) == AE_IFLNK) { + /* + * Extract the symlink-name if it's included in the pathname. + */ + if (!lha_parse_linkname(&linkname, &pathname)) { + /* We couldn't get the symlink-name. */ + archive_set_error(&a->archive, + ARCHIVE_ERRNO_FILE_FORMAT, + "Unknown symlink-name"); + archive_wstring_free(&pathname); + archive_wstring_free(&linkname); + return (ARCHIVE_FAILED); + } + } else { + /* + * Make sure a file-type is set. + * The mode has been overridden if it is in the extended data. + */ + lha->mode = (lha->mode & ~AE_IFMT) | + ((lha->directory)? AE_IFDIR: AE_IFREG); + } + if ((lha->setflag & UNIX_MODE_IS_SET) == 0 && + (lha->dos_attr & 1) != 0) + lha->mode &= ~(0222);/* read only. */ + + /* + * Set basic file parameters. + */ + archive_entry_copy_pathname_w(entry, pathname.s); + archive_wstring_free(&pathname); + if (archive_strlen(&linkname) > 0) { + archive_entry_copy_symlink_w(entry, linkname.s); + } else + archive_entry_set_symlink(entry, NULL); + archive_wstring_free(&linkname); + /* + * When a header level is 0, there is a possibility that + * a pathname and a symlink has '\' character, a directory + * separator in DOS/Windows. So we should convert it to '/'. + */ + if (p[H_LEVEL_OFFSET] == 0) + lha_replace_path_separator(lha, entry); + + archive_entry_set_mode(entry, lha->mode); + archive_entry_set_uid(entry, lha->uid); + archive_entry_set_gid(entry, lha->gid); + if (archive_strlen(&lha->uname) > 0) + archive_entry_set_uname(entry, lha->uname.s); + if (archive_strlen(&lha->gname) > 0) + archive_entry_set_gname(entry, lha->gname.s); + if (lha->setflag & BIRTHTIME_IS_SET) { + archive_entry_set_birthtime(entry, lha->birthtime, + lha->birthtime_tv_nsec); + archive_entry_set_ctime(entry, lha->birthtime, + lha->birthtime_tv_nsec); + } else { + archive_entry_unset_birthtime(entry); + archive_entry_unset_ctime(entry); + } + archive_entry_set_mtime(entry, lha->mtime, lha->mtime_tv_nsec); + if (lha->setflag & ATIME_IS_SET) + archive_entry_set_atime(entry, lha->atime, + lha->atime_tv_nsec); + else + archive_entry_unset_atime(entry); + if (lha->directory || archive_entry_symlink(entry) != NULL) + archive_entry_unset_size(entry); + else + archive_entry_set_size(entry, lha->origsize); + + /* + * Prepare variables used to read a file content. + */ + lha->entry_bytes_remaining = lha->compsize; + if (lha->entry_bytes_remaining < 0) { + archive_set_error(&a->archive, + ARCHIVE_ERRNO_FILE_FORMAT, + "Invalid LHa entry size"); + return (ARCHIVE_FATAL); + } + lha->entry_offset = 0; + lha->entry_crc_calculated = 0; + + /* + * This file does not have a content. + */ + if (lha->directory || lha->compsize == 0) + lha->end_of_entry = 1; + + snprintf(lha->format_name, sizeof(lha->format_name), "lha -%c%c%c-", + lha->method[0], lha->method[1], lha->method[2]); + a->archive.archive_format_name = lha->format_name; + + return (err); +} + +/* + * Replace a DOS path separator '\' by a character '/'. + * Some multi-byte character set have a character '\' in its second byte. + */ +static void +lha_replace_path_separator(struct lha *lha, struct archive_entry *entry) +{ + const wchar_t *wp; + size_t i; + + if ((wp = archive_entry_pathname_w(entry)) != NULL) { + archive_wstrcpy(&(lha->ws), wp); + for (i = 0; i < archive_strlen(&(lha->ws)); i++) { + if (lha->ws.s[i] == L'\\') + lha->ws.s[i] = L'/'; + } + archive_entry_copy_pathname_w(entry, lha->ws.s); + } + + if ((wp = archive_entry_symlink_w(entry)) != NULL) { + archive_wstrcpy(&(lha->ws), wp); + for (i = 0; i < archive_strlen(&(lha->ws)); i++) { + if (lha->ws.s[i] == L'\\') + lha->ws.s[i] = L'/'; + } + archive_entry_copy_symlink_w(entry, lha->ws.s); + } +} + +/* + * Header 0 format + * + * +0 +1 +2 +7 +11 + * +---------------+----------+----------------+-------------------+ + * |header size(*1)|header sum|compression type|compressed size(*2)| + * +---------------+----------+----------------+-------------------+ + * <---------------------(*1)----------* + * + * +11 +15 +17 +19 +20 +21 + * +-----------------+---------+---------+--------------+----------------+ + * |uncompressed size|time(DOS)|date(DOS)|attribute(DOS)|header level(=0)| + * +-----------------+---------+---------+--------------+----------------+ + * *--------------------------------(*1)---------------------------------* + * + * +21 +22 +22+(*3) +22+(*3)+2 +22+(*3)+2+(*4) + * +---------------+---------+----------+----------------+------------------+ + * |name length(*3)|file name|file CRC16|extra header(*4)| compressed data | + * +---------------+---------+----------+----------------+------------------+ + * <--(*3)-> <------(*2)------> + * *----------------------(*1)--------------------------> + * + */ +#define H0_HEADER_SIZE_OFFSET 0 +#define H0_HEADER_SUM_OFFSET 1 +#define H0_COMP_SIZE_OFFSET 7 +#define H0_ORIG_SIZE_OFFSET 11 +#define H0_DOS_TIME_OFFSET 15 +#define H0_NAME_LEN_OFFSET 21 +#define H0_FILE_NAME_OFFSET 22 +#define H0_FIXED_SIZE 24 +static int +lha_read_file_header_0(struct archive_read *a, struct lha *lha) +{ + const unsigned char *p; + int extdsize, namelen; + unsigned char headersum, sum_calculated; + + if ((p = __archive_read_ahead(a, H0_FIXED_SIZE, NULL)) == NULL) + return (truncated_error(a)); + lha->header_size = p[H0_HEADER_SIZE_OFFSET] + 2; + headersum = p[H0_HEADER_SUM_OFFSET]; + lha->compsize = archive_le32dec(p + H0_COMP_SIZE_OFFSET); + lha->origsize = archive_le32dec(p + H0_ORIG_SIZE_OFFSET); + lha->mtime = lha_dos_time(p + H0_DOS_TIME_OFFSET); + namelen = p[H0_NAME_LEN_OFFSET]; + extdsize = (int)lha->header_size - H0_FIXED_SIZE - namelen; + if ((namelen > 221 || extdsize < 0) && extdsize != -2) { + archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, + "Invalid LHa header"); + return (ARCHIVE_FATAL); + } + if ((p = __archive_read_ahead(a, lha->header_size, NULL)) == NULL) + return (truncated_error(a)); + + archive_strncpy(&lha->filename, p + H0_FILE_NAME_OFFSET, namelen); + /* When extdsize == -2, A CRC16 value is not present in the header. */ + if (extdsize >= 0) { + lha->crc = archive_le16dec(p + H0_FILE_NAME_OFFSET + namelen); + lha->setflag |= CRC_IS_SET; + } + sum_calculated = lha_calcsum(0, p, 2, lha->header_size - 2); + + /* Read an extended header */ + if (extdsize > 0) { + /* This extended data is set by 'LHa for UNIX' only. + * Maybe fixed size. + */ + p += H0_FILE_NAME_OFFSET + namelen + 2; + if (p[0] == 'U' && extdsize == 12) { + /* p[1] is a minor version. */ + lha->mtime = archive_le32dec(&p[2]); + lha->mode = archive_le16dec(&p[6]); + lha->uid = archive_le16dec(&p[8]); + lha->gid = archive_le16dec(&p[10]); + lha->setflag |= UNIX_MODE_IS_SET; + } + } + __archive_read_consume(a, lha->header_size); + + if (sum_calculated != headersum) { + archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, + "LHa header sum error"); + return (ARCHIVE_FATAL); + } + + return (ARCHIVE_OK); +} + +/* + * Header 1 format + * + * +0 +1 +2 +7 +11 + * +---------------+----------+----------------+-------------+ + * |header size(*1)|header sum|compression type|skip size(*2)| + * +---------------+----------+----------------+-------------+ + * <---------------(*1)----------* + * + * +11 +15 +17 +19 +20 +21 + * +-----------------+---------+---------+--------------+----------------+ + * |uncompressed size|time(DOS)|date(DOS)|attribute(DOS)|header level(=1)| + * +-----------------+---------+---------+--------------+----------------+ + * *-------------------------------(*1)----------------------------------* + * + * +21 +22 +22+(*3) +22+(*3)+2 +22+(*3)+3 +22+(*3)+3+(*4) + * +---------------+---------+----------+-----------+-----------+ + * |name length(*3)|file name|file CRC16| creator |padding(*4)| + * +---------------+---------+----------+-----------+-----------+ + * <--(*3)-> + * *----------------------------(*1)----------------------------* + * + * +22+(*3)+3+(*4) +22+(*3)+3+(*4)+2 +22+(*3)+3+(*4)+2+(*5) + * +----------------+---------------------+------------------------+ + * |next header size| extended header(*5) | compressed data | + * +----------------+---------------------+------------------------+ + * *------(*1)-----> <--------------------(*2)--------------------> + */ +#define H1_HEADER_SIZE_OFFSET 0 +#define H1_HEADER_SUM_OFFSET 1 +#define H1_COMP_SIZE_OFFSET 7 +#define H1_ORIG_SIZE_OFFSET 11 +#define H1_DOS_TIME_OFFSET 15 +#define H1_NAME_LEN_OFFSET 21 +#define H1_FILE_NAME_OFFSET 22 +#define H1_FIXED_SIZE 27 +static int +lha_read_file_header_1(struct archive_read *a, struct lha *lha) +{ + const unsigned char *p; + size_t extdsize; + int i, err, err2; + int namelen, padding; + unsigned char headersum, sum_calculated; + + err = ARCHIVE_OK; + + if ((p = __archive_read_ahead(a, H1_FIXED_SIZE, NULL)) == NULL) + return (truncated_error(a)); + + lha->header_size = p[H1_HEADER_SIZE_OFFSET] + 2; + headersum = p[H1_HEADER_SUM_OFFSET]; + /* Note: An extended header size is included in a compsize. */ + lha->compsize = archive_le32dec(p + H1_COMP_SIZE_OFFSET); + lha->origsize = archive_le32dec(p + H1_ORIG_SIZE_OFFSET); + lha->mtime = lha_dos_time(p + H1_DOS_TIME_OFFSET); + namelen = p[H1_NAME_LEN_OFFSET]; + /* Calculate a padding size. The result will be normally 0 only(?) */ + padding = ((int)lha->header_size) - H1_FIXED_SIZE - namelen; + + if (namelen > 230 || padding < 0) + goto invalid; + + if ((p = __archive_read_ahead(a, lha->header_size, NULL)) == NULL) + return (truncated_error(a)); + + for (i = 0; i < namelen; i++) { + if (p[i + H1_FILE_NAME_OFFSET] == 0xff) + goto invalid;/* Invalid filename. */ + } + archive_strncpy(&lha->filename, p + H1_FILE_NAME_OFFSET, namelen); + lha->crc = archive_le16dec(p + H1_FILE_NAME_OFFSET + namelen); + lha->setflag |= CRC_IS_SET; + + sum_calculated = lha_calcsum(0, p, 2, lha->header_size - 2); + /* Consume used bytes but not include `next header size' data + * since it will be consumed in lha_read_file_extended_header(). */ + __archive_read_consume(a, lha->header_size - 2); + + /* Read extended headers */ + err2 = lha_read_file_extended_header(a, lha, NULL, 2, + (size_t)(lha->compsize + 2), &extdsize); + if (err2 < ARCHIVE_WARN) + return (err2); + if (err2 < err) + err = err2; + /* Get a real compressed file size. */ + lha->compsize -= extdsize - 2; + + if (lha->compsize < 0) + goto invalid; /* Invalid compressed file size */ + + if (sum_calculated != headersum) { + archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, + "LHa header sum error"); + return (ARCHIVE_FATAL); + } + return (err); +invalid: + archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, + "Invalid LHa header"); + return (ARCHIVE_FATAL); +} + +/* + * Header 2 format + * + * +0 +2 +7 +11 +15 + * +---------------+----------------+-------------------+-----------------+ + * |header size(*1)|compression type|compressed size(*2)|uncompressed size| + * +---------------+----------------+-------------------+-----------------+ + * <--------------------------------(*1)---------------------------------* + * + * +15 +19 +20 +21 +23 +24 + * +-----------------+------------+----------------+----------+-----------+ + * |data/time(time_t)| 0x20 fixed |header level(=2)|file CRC16| creator | + * +-----------------+------------+----------------+----------+-----------+ + * *---------------------------------(*1)---------------------------------* + * + * +24 +26 +26+(*3) +26+(*3)+(*4) + * +----------------+-------------------+-------------+-------------------+ + * |next header size|extended header(*3)| padding(*4) | compressed data | + * +----------------+-------------------+-------------+-------------------+ + * *--------------------------(*1)-------------------> <------(*2)-------> + * + */ +#define H2_HEADER_SIZE_OFFSET 0 +#define H2_COMP_SIZE_OFFSET 7 +#define H2_ORIG_SIZE_OFFSET 11 +#define H2_TIME_OFFSET 15 +#define H2_CRC_OFFSET 21 +#define H2_FIXED_SIZE 24 +static int +lha_read_file_header_2(struct archive_read *a, struct lha *lha) +{ + const unsigned char *p; + size_t extdsize; + int err, padding; + uint16_t header_crc; + + if ((p = __archive_read_ahead(a, H2_FIXED_SIZE, NULL)) == NULL) + return (truncated_error(a)); + + lha->header_size =archive_le16dec(p + H2_HEADER_SIZE_OFFSET); + lha->compsize = archive_le32dec(p + H2_COMP_SIZE_OFFSET); + lha->origsize = archive_le32dec(p + H2_ORIG_SIZE_OFFSET); + lha->mtime = archive_le32dec(p + H2_TIME_OFFSET); + lha->crc = archive_le16dec(p + H2_CRC_OFFSET); + lha->setflag |= CRC_IS_SET; + + if (lha->header_size < H2_FIXED_SIZE) { + archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, + "Invalid LHa header size"); + return (ARCHIVE_FATAL); + } + + header_crc = lha_crc16(0, p, H2_FIXED_SIZE); + __archive_read_consume(a, H2_FIXED_SIZE); + + /* Read extended headers */ + err = lha_read_file_extended_header(a, lha, &header_crc, 2, + lha->header_size - H2_FIXED_SIZE, &extdsize); + if (err < ARCHIVE_WARN) + return (err); + + /* Calculate a padding size. The result will be normally 0 or 1. */ + padding = (int)lha->header_size - (int)(H2_FIXED_SIZE + extdsize); + if (padding > 0) { + if ((p = __archive_read_ahead(a, padding, NULL)) == NULL) + return (truncated_error(a)); + header_crc = lha_crc16(header_crc, p, padding); + __archive_read_consume(a, padding); + } + + if (header_crc != lha->header_crc) { +#ifndef DONT_FAIL_ON_CRC_ERROR + archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, + "LHa header CRC error"); + return (ARCHIVE_FATAL); +#endif + } + return (err); +} + +/* + * Header 3 format + * + * +0 +2 +7 +11 +15 + * +------------+----------------+-------------------+-----------------+ + * | 0x04 fixed |compression type|compressed size(*2)|uncompressed size| + * +------------+----------------+-------------------+-----------------+ + * <-------------------------------(*1)-------------------------------* + * + * +15 +19 +20 +21 +23 +24 + * +-----------------+------------+----------------+----------+-----------+ + * |date/time(time_t)| 0x20 fixed |header level(=3)|file CRC16| creator | + * +-----------------+------------+----------------+----------+-----------+ + * *--------------------------------(*1)----------------------------------* + * + * +24 +28 +32 +32+(*3) + * +---------------+----------------+-------------------+-----------------+ + * |header size(*1)|next header size|extended header(*3)| compressed data | + * +---------------+----------------+-------------------+-----------------+ + * *------------------------(*1)-----------------------> <------(*2)-----> + * + */ +#define H3_FIELD_LEN_OFFSET 0 +#define H3_COMP_SIZE_OFFSET 7 +#define H3_ORIG_SIZE_OFFSET 11 +#define H3_TIME_OFFSET 15 +#define H3_CRC_OFFSET 21 +#define H3_HEADER_SIZE_OFFSET 24 +#define H3_FIXED_SIZE 28 +static int +lha_read_file_header_3(struct archive_read *a, struct lha *lha) +{ + const unsigned char *p; + size_t extdsize; + int err; + uint16_t header_crc; + + if ((p = __archive_read_ahead(a, H3_FIXED_SIZE, NULL)) == NULL) + return (truncated_error(a)); + + if (archive_le16dec(p + H3_FIELD_LEN_OFFSET) != 4) + goto invalid; + lha->header_size =archive_le32dec(p + H3_HEADER_SIZE_OFFSET); + lha->compsize = archive_le32dec(p + H3_COMP_SIZE_OFFSET); + lha->origsize = archive_le32dec(p + H3_ORIG_SIZE_OFFSET); + lha->mtime = archive_le32dec(p + H3_TIME_OFFSET); + lha->crc = archive_le16dec(p + H3_CRC_OFFSET); + lha->setflag |= CRC_IS_SET; + + if (lha->header_size < H3_FIXED_SIZE + 4) + goto invalid; + header_crc = lha_crc16(0, p, H3_FIXED_SIZE); + __archive_read_consume(a, H3_FIXED_SIZE); + + /* Read extended headers */ + err = lha_read_file_extended_header(a, lha, &header_crc, 4, + lha->header_size - H3_FIXED_SIZE, &extdsize); + if (err < ARCHIVE_WARN) + return (err); + + if (header_crc != lha->header_crc) { +#ifndef DONT_FAIL_ON_CRC_ERROR + archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, + "LHa header CRC error"); + return (ARCHIVE_FATAL); +#endif + } + return (err); +invalid: + archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, + "Invalid LHa header"); + return (ARCHIVE_FATAL); +} + +/* + * Extended header format + * + * +0 +2 +3 -- used in header 1 and 2 + * +0 +4 +5 -- used in header 3 + * +--------------+---------+-------------------+--------------+-- + * |ex-header size|header id| data |ex-header size| ....... + * +--------------+---------+-------------------+--------------+-- + * <-------------( ex-header size)------------> <-- next extended header --* + * + * If the ex-header size is zero, it is the make of the end of extended + * headers. + * + */ +static int +lha_read_file_extended_header(struct archive_read *a, struct lha *lha, + uint16_t *crc, int sizefield_length, size_t limitsize, size_t *total_size) +{ + const void *h; + const unsigned char *extdheader; + size_t extdsize; + size_t datasize; + unsigned int i; + unsigned char extdtype; + +#define EXT_HEADER_CRC 0x00 /* Header CRC and information*/ +#define EXT_FILENAME 0x01 /* Filename */ +#define EXT_DIRECTORY 0x02 /* Directory name */ +#define EXT_DOS_ATTR 0x40 /* MS-DOS attribute */ +#define EXT_TIMESTAMP 0x41 /* Windows time stamp */ +#define EXT_FILESIZE 0x42 /* Large file size */ +#define EXT_TIMEZONE 0x43 /* Time zone */ +#define EXT_UTF16_FILENAME 0x44 /* UTF-16 filename */ +#define EXT_UTF16_DIRECTORY 0x45 /* UTF-16 directory name */ +#define EXT_CODEPAGE 0x46 /* Codepage */ +#define EXT_UNIX_MODE 0x50 /* File permission */ +#define EXT_UNIX_GID_UID 0x51 /* gid,uid */ +#define EXT_UNIX_GNAME 0x52 /* Group name */ +#define EXT_UNIX_UNAME 0x53 /* User name */ +#define EXT_UNIX_MTIME 0x54 /* Modified time */ +#define EXT_OS2_NEW_ATTR 0x7f /* new attribute(OS/2 only) */ +#define EXT_NEW_ATTR 0xff /* new attribute */ + + *total_size = sizefield_length; + + for (;;) { + /* Read an extended header size. */ + if ((h = + __archive_read_ahead(a, sizefield_length, NULL)) == NULL) + return (truncated_error(a)); + /* Check if the size is the zero indicates the end of the + * extended header. */ + if (sizefield_length == sizeof(uint16_t)) + extdsize = archive_le16dec(h); + else + extdsize = archive_le32dec(h); + if (extdsize == 0) { + /* End of extended header */ + if (crc != NULL) + *crc = lha_crc16(*crc, h, sizefield_length); + __archive_read_consume(a, sizefield_length); + return (ARCHIVE_OK); + } + + /* Sanity check to the extended header size. */ + if (((uint64_t)*total_size + extdsize) > + (uint64_t)limitsize || + extdsize <= (size_t)sizefield_length) + goto invalid; + + /* Read the extended header. */ + if ((h = __archive_read_ahead(a, extdsize, NULL)) == NULL) + return (truncated_error(a)); + *total_size += extdsize; + + extdheader = (const unsigned char *)h; + /* Get the extended header type. */ + extdtype = extdheader[sizefield_length]; + /* Calculate an extended data size. */ + datasize = extdsize - (1 + sizefield_length); + /* Skip an extended header size field and type field. */ + extdheader += sizefield_length + 1; + + if (crc != NULL && extdtype != EXT_HEADER_CRC) + *crc = lha_crc16(*crc, h, extdsize); + switch (extdtype) { + case EXT_HEADER_CRC: + /* We only use a header CRC. Following data will not + * be used. */ + if (datasize >= 2) { + lha->header_crc = archive_le16dec(extdheader); + if (crc != NULL) { + static const char zeros[2] = {0, 0}; + *crc = lha_crc16(*crc, h, + extdsize - datasize); + /* CRC value itself as zero */ + *crc = lha_crc16(*crc, zeros, 2); + *crc = lha_crc16(*crc, + extdheader+2, datasize - 2); + } + } + break; + case EXT_FILENAME: + if (datasize == 0) { + /* maybe directory header */ + archive_string_empty(&lha->filename); + break; + } + if (extdheader[0] == '\0') + goto invalid; + archive_strncpy(&lha->filename, + (const char *)extdheader, datasize); + break; + case EXT_UTF16_FILENAME: + if (datasize == 0) { + /* maybe directory header */ + archive_string_empty(&lha->filename); + break; + } else if (datasize & 1) { + /* UTF-16 characters take always 2 or 4 bytes */ + goto invalid; + } + if (extdheader[0] == '\0') + goto invalid; + archive_string_empty(&lha->filename); + archive_array_append(&lha->filename, + (const char *)extdheader, datasize); + /* Setup a string conversion for a filename. */ + lha->sconv_fname = + archive_string_conversion_from_charset(&a->archive, + "UTF-16LE", 1); + if (lha->sconv_fname == NULL) + return (ARCHIVE_FATAL); + break; + case EXT_DIRECTORY: + if (datasize == 0 || extdheader[0] == '\0') + /* no directory name data. exit this case. */ + goto invalid; + + archive_strncpy(&lha->dirname, + (const char *)extdheader, datasize); + /* + * Convert directory delimiter from 0xFF + * to '/' for local system. + */ + for (i = 0; i < lha->dirname.length; i++) { + if ((unsigned char)lha->dirname.s[i] == 0xFF) + lha->dirname.s[i] = '/'; + } + /* Is last character directory separator? */ + if (lha->dirname.s[lha->dirname.length-1] != '/') + /* invalid directory data */ + goto invalid; + break; + case EXT_UTF16_DIRECTORY: + /* UTF-16 characters take always 2 or 4 bytes */ + if (datasize == 0 || (datasize & 1) || + extdheader[0] == '\0') { + /* no directory name data. exit this case. */ + goto invalid; + } + + archive_string_empty(&lha->dirname); + archive_array_append(&lha->dirname, + (const char *)extdheader, datasize); + lha->sconv_dir = + archive_string_conversion_from_charset(&a->archive, + "UTF-16LE", 1); + if (lha->sconv_dir == NULL) + return (ARCHIVE_FATAL); + else { + /* + * Convert directory delimiter from 0xFFFF + * to '/' for local system. + */ + uint16_t dirSep; + uint16_t d = 1; + if (archive_be16dec(&d) == 1) + dirSep = 0x2F00; + else + dirSep = 0x002F; + + /* UTF-16LE character */ + uint16_t *utf16name = + (uint16_t *)lha->dirname.s; + for (i = 0; i < lha->dirname.length / 2; i++) { + if (utf16name[i] == 0xFFFF) { + utf16name[i] = dirSep; + } + } + /* Is last character directory separator? */ + if (utf16name[lha->dirname.length / 2 - 1] != + dirSep) { + /* invalid directory data */ + goto invalid; + } + } + break; + case EXT_DOS_ATTR: + if (datasize == 2) + lha->dos_attr = (unsigned char) + (archive_le16dec(extdheader) & 0xff); + break; + case EXT_TIMESTAMP: + if (datasize == (sizeof(uint64_t) * 3)) { + lha->birthtime = lha_win_time( + archive_le64dec(extdheader), + &lha->birthtime_tv_nsec); + extdheader += sizeof(uint64_t); + lha->mtime = lha_win_time( + archive_le64dec(extdheader), + &lha->mtime_tv_nsec); + extdheader += sizeof(uint64_t); + lha->atime = lha_win_time( + archive_le64dec(extdheader), + &lha->atime_tv_nsec); + lha->setflag |= BIRTHTIME_IS_SET | + ATIME_IS_SET; + } + break; + case EXT_FILESIZE: + if (datasize == sizeof(uint64_t) * 2) { + lha->compsize = archive_le64dec(extdheader); + extdheader += sizeof(uint64_t); + lha->origsize = archive_le64dec(extdheader); + } + break; + case EXT_CODEPAGE: + /* Get an archived filename charset from codepage. + * This overwrites the charset specified by + * hdrcharset option. */ + if (datasize == sizeof(uint32_t)) { + struct archive_string cp; + const char *charset; + + archive_string_init(&cp); + switch (archive_le32dec(extdheader)) { + case 65001: /* UTF-8 */ + charset = "UTF-8"; + break; + default: + archive_string_sprintf(&cp, "CP%d", + (int)archive_le32dec(extdheader)); + charset = cp.s; + break; + } + lha->sconv_dir = + archive_string_conversion_from_charset( + &(a->archive), charset, 1); + lha->sconv_fname = + archive_string_conversion_from_charset( + &(a->archive), charset, 1); + archive_string_free(&cp); + if (lha->sconv_dir == NULL) + return (ARCHIVE_FATAL); + if (lha->sconv_fname == NULL) + return (ARCHIVE_FATAL); + } + break; + case EXT_UNIX_MODE: + if (datasize == sizeof(uint16_t)) { + lha->mode = archive_le16dec(extdheader); + lha->setflag |= UNIX_MODE_IS_SET; + } + break; + case EXT_UNIX_GID_UID: + if (datasize == (sizeof(uint16_t) * 2)) { + lha->gid = archive_le16dec(extdheader); + lha->uid = archive_le16dec(extdheader+2); + } + break; + case EXT_UNIX_GNAME: + if (datasize > 0) + archive_strncpy(&lha->gname, + (const char *)extdheader, datasize); + break; + case EXT_UNIX_UNAME: + if (datasize > 0) + archive_strncpy(&lha->uname, + (const char *)extdheader, datasize); + break; + case EXT_UNIX_MTIME: + if (datasize == sizeof(uint32_t)) + lha->mtime = archive_le32dec(extdheader); + break; + case EXT_OS2_NEW_ATTR: + /* This extended header is OS/2 depend. */ + if (datasize == 16) { + lha->dos_attr = (unsigned char) + (archive_le16dec(extdheader) & 0xff); + lha->mode = archive_le16dec(extdheader+2); + lha->gid = archive_le16dec(extdheader+4); + lha->uid = archive_le16dec(extdheader+6); + lha->birthtime = archive_le32dec(extdheader+8); + lha->atime = archive_le32dec(extdheader+12); + lha->setflag |= UNIX_MODE_IS_SET + | BIRTHTIME_IS_SET | ATIME_IS_SET; + } + break; + case EXT_NEW_ATTR: + if (datasize == 20) { + lha->mode = (mode_t)archive_le32dec(extdheader); + lha->gid = archive_le32dec(extdheader+4); + lha->uid = archive_le32dec(extdheader+8); + lha->birthtime = archive_le32dec(extdheader+12); + lha->atime = archive_le32dec(extdheader+16); + lha->setflag |= UNIX_MODE_IS_SET + | BIRTHTIME_IS_SET | ATIME_IS_SET; + } + break; + case EXT_TIMEZONE: /* Not supported */ + break; + default: + break; + } + + __archive_read_consume(a, extdsize); + } +invalid: + archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, + "Invalid extended LHa header"); + return (ARCHIVE_FATAL); +} + +static int +lha_end_of_entry(struct archive_read *a) +{ + struct lha *lha = (struct lha *)(a->format->data); + int r = ARCHIVE_EOF; + + if (!lha->end_of_entry_cleanup) { + if ((lha->setflag & CRC_IS_SET) && + lha->crc != lha->entry_crc_calculated) { + archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, + "LHa data CRC error"); + r = ARCHIVE_WARN; + } + + /* End-of-entry cleanup done. */ + lha->end_of_entry_cleanup = 1; + } + return (r); +} + +static int +archive_read_format_lha_read_data(struct archive_read *a, + const void **buff, size_t *size, int64_t *offset) +{ + struct lha *lha = (struct lha *)(a->format->data); + int r; + + if (lha->entry_unconsumed) { + /* Consume as much as the decompressor actually used. */ + __archive_read_consume(a, lha->entry_unconsumed); + lha->entry_unconsumed = 0; + } + if (lha->end_of_entry) { + *offset = lha->entry_offset; + *size = 0; + *buff = NULL; + return (lha_end_of_entry(a)); + } + + if (lha->entry_is_compressed) + r = lha_read_data_lzh(a, buff, size, offset); + else + /* No compression. */ + r = lha_read_data_none(a, buff, size, offset); + return (r); +} + +/* + * Read a file content in no compression. + * + * Returns ARCHIVE_OK if successful, ARCHIVE_FATAL otherwise, sets + * lha->end_of_entry if it consumes all of the data. + */ +static int +lha_read_data_none(struct archive_read *a, const void **buff, + size_t *size, int64_t *offset) +{ + struct lha *lha = (struct lha *)(a->format->data); + ssize_t bytes_avail; + + if (lha->entry_bytes_remaining == 0) { + *buff = NULL; + *size = 0; + *offset = lha->entry_offset; + lha->end_of_entry = 1; + return (ARCHIVE_OK); + } + /* + * Note: '1' here is a performance optimization. + * Recall that the decompression layer returns a count of + * available bytes; asking for more than that forces the + * decompressor to combine reads by copying data. + */ + *buff = __archive_read_ahead(a, 1, &bytes_avail); + if (bytes_avail <= 0) { + archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, + "Truncated LHa file data"); + return (ARCHIVE_FATAL); + } + if (bytes_avail > lha->entry_bytes_remaining) + bytes_avail = (ssize_t)lha->entry_bytes_remaining; + lha->entry_crc_calculated = + lha_crc16(lha->entry_crc_calculated, *buff, bytes_avail); + *size = bytes_avail; + *offset = lha->entry_offset; + lha->entry_offset += bytes_avail; + lha->entry_bytes_remaining -= bytes_avail; + if (lha->entry_bytes_remaining == 0) + lha->end_of_entry = 1; + lha->entry_unconsumed = bytes_avail; + return (ARCHIVE_OK); +} + +/* + * Read a file content in LZHUFF encoding. + * + * Returns ARCHIVE_OK if successful, returns ARCHIVE_WARN if compression is + * unsupported, ARCHIVE_FATAL otherwise, sets lha->end_of_entry if it consumes + * all of the data. + */ +static int +lha_read_data_lzh(struct archive_read *a, const void **buff, + size_t *size, int64_t *offset) +{ + struct lha *lha = (struct lha *)(a->format->data); + ssize_t bytes_avail; + int r; + + /* If we haven't yet read any data, initialize the decompressor. */ + if (!lha->decompress_init) { + r = lzh_decode_init(&(lha->strm), lha->method); + switch (r) { + case ARCHIVE_OK: + break; + case ARCHIVE_FAILED: + /* Unsupported compression. */ + *buff = NULL; + *size = 0; + *offset = 0; + archive_set_error(&a->archive, + ARCHIVE_ERRNO_FILE_FORMAT, + "Unsupported lzh compression method -%c%c%c-", + lha->method[0], lha->method[1], lha->method[2]); + /* We know compressed size; just skip it. */ + archive_read_format_lha_read_data_skip(a); + return (ARCHIVE_WARN); + default: + archive_set_error(&a->archive, ENOMEM, + "Couldn't allocate memory " + "for lzh decompression"); + return (ARCHIVE_FATAL); + } + /* We've initialized decompression for this stream. */ + lha->decompress_init = 1; + lha->strm.avail_out = 0; + lha->strm.total_out = 0; + } + + /* + * Note: '1' here is a performance optimization. + * Recall that the decompression layer returns a count of + * available bytes; asking for more than that forces the + * decompressor to combine reads by copying data. + */ + lha->strm.next_in = __archive_read_ahead(a, 1, &bytes_avail); + if (bytes_avail <= 0) { + archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, + "Truncated LHa file body"); + return (ARCHIVE_FATAL); + } + if (bytes_avail > lha->entry_bytes_remaining) + bytes_avail = (ssize_t)lha->entry_bytes_remaining; + + lha->strm.avail_in = (int)bytes_avail; + lha->strm.total_in = 0; + lha->strm.avail_out = 0; + + r = lzh_decode(&(lha->strm), bytes_avail == lha->entry_bytes_remaining); + switch (r) { + case ARCHIVE_OK: + break; + case ARCHIVE_EOF: + lha->end_of_entry = 1; + break; + default: + archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, + "Bad lzh data"); + return (ARCHIVE_FAILED); + } + lha->entry_unconsumed = lha->strm.total_in; + lha->entry_bytes_remaining -= lha->strm.total_in; + + if (lha->strm.avail_out) { + *offset = lha->entry_offset; + *size = lha->strm.avail_out; + *buff = lha->strm.ref_ptr; + lha->entry_crc_calculated = + lha_crc16(lha->entry_crc_calculated, *buff, *size); + lha->entry_offset += *size; + } else { + *offset = lha->entry_offset; + *size = 0; + *buff = NULL; + if (lha->end_of_entry) + return (lha_end_of_entry(a)); + } + return (ARCHIVE_OK); +} + +/* + * Skip a file content. + */ +static int +archive_read_format_lha_read_data_skip(struct archive_read *a) +{ + struct lha *lha; + int64_t bytes_skipped; + + lha = (struct lha *)(a->format->data); + + if (lha->entry_unconsumed) { + /* Consume as much as the decompressor actually used. */ + __archive_read_consume(a, lha->entry_unconsumed); + lha->entry_unconsumed = 0; + } + + /* if we've already read to end of data, we're done. */ + if (lha->end_of_entry_cleanup) + return (ARCHIVE_OK); + + /* + * If the length is at the beginning, we can skip the + * compressed data much more quickly. + */ + bytes_skipped = __archive_read_consume(a, lha->entry_bytes_remaining); + if (bytes_skipped < 0) + return (ARCHIVE_FATAL); + + /* This entry is finished and done. */ + lha->end_of_entry_cleanup = lha->end_of_entry = 1; + return (ARCHIVE_OK); +} + +static int +archive_read_format_lha_cleanup(struct archive_read *a) +{ + struct lha *lha = (struct lha *)(a->format->data); + + lzh_decode_free(&(lha->strm)); + archive_string_free(&(lha->dirname)); + archive_string_free(&(lha->filename)); + archive_string_free(&(lha->uname)); + archive_string_free(&(lha->gname)); + archive_wstring_free(&(lha->ws)); + free(lha); + (a->format->data) = NULL; + return (ARCHIVE_OK); +} + +/* + * 'LHa for UNIX' utility has archived a symbolic-link name after + * a pathname with '|' character. + * This function extracts the symbolic-link name from the pathname. + * + * example. + * 1. a symbolic-name is 'aaa/bb/cc' + * 2. a filename is 'xxx/bbb' + * then a archived pathname is 'xxx/bbb|aaa/bb/cc' + */ +static int +lha_parse_linkname(struct archive_wstring *linkname, + struct archive_wstring *pathname) +{ + wchar_t * linkptr; + size_t symlen; + + linkptr = wcschr(pathname->s, L'|'); + if (linkptr != NULL) { + symlen = wcslen(linkptr + 1); + archive_wstrncpy(linkname, linkptr+1, symlen); + + *linkptr = 0; + pathname->length = wcslen(pathname->s); + + return (1); + } + return (0); +} + +/* Convert an MSDOS-style date/time into Unix-style time. */ +static time_t +lha_dos_time(const unsigned char *p) +{ + int msTime, msDate; + struct tm ts; + + msTime = archive_le16dec(p); + msDate = archive_le16dec(p+2); + + memset(&ts, 0, sizeof(ts)); + ts.tm_year = ((msDate >> 9) & 0x7f) + 80; /* Years since 1900. */ + ts.tm_mon = ((msDate >> 5) & 0x0f) - 1; /* Month number. */ + ts.tm_mday = msDate & 0x1f; /* Day of month. */ + ts.tm_hour = (msTime >> 11) & 0x1f; + ts.tm_min = (msTime >> 5) & 0x3f; + ts.tm_sec = (msTime << 1) & 0x3e; + ts.tm_isdst = -1; + return (mktime(&ts)); +} + +/* Convert an MS-Windows-style date/time into Unix-style time. */ +static time_t +lha_win_time(uint64_t wintime, long *ns) +{ +#define EPOC_TIME ARCHIVE_LITERAL_ULL(116444736000000000) + + if (wintime >= EPOC_TIME) { + wintime -= EPOC_TIME; /* 1970-01-01 00:00:00 (UTC) */ + if (ns != NULL) + *ns = (long)(wintime % 10000000) * 100; + return (wintime / 10000000); + } else { + if (ns != NULL) + *ns = 0; + return (0); + } +} + +static unsigned char +lha_calcsum(unsigned char sum, const void *pp, int offset, size_t size) +{ + unsigned char const *p = (unsigned char const *)pp; + + p += offset; + for (;size > 0; --size) + sum += *p++; + return (sum); +} + +static uint16_t crc16tbl[2][256]; +static void +lha_crc16_init(void) +{ + unsigned int i; + static int crc16init = 0; + + if (crc16init) + return; + crc16init = 1; + + for (i = 0; i < 256; i++) { + unsigned int j; + uint16_t crc = (uint16_t)i; + for (j = 8; j; j--) + crc = (crc >> 1) ^ ((crc & 1) * 0xA001); + crc16tbl[0][i] = crc; + } + + for (i = 0; i < 256; i++) { + crc16tbl[1][i] = (crc16tbl[0][i] >> 8) + ^ crc16tbl[0][crc16tbl[0][i] & 0xff]; + } +} + +static uint16_t +lha_crc16(uint16_t crc, const void *pp, size_t len) +{ + const unsigned char *p = (const unsigned char *)pp; + const uint16_t *buff; + const union { + uint32_t i; + char c[4]; + } u = { 0x01020304 }; + + if (len == 0) + return crc; + + /* Process unaligned address. */ + if (((uintptr_t)p) & (uintptr_t)0x1) { + crc = (crc >> 8) ^ crc16tbl[0][(crc ^ *p++) & 0xff]; + len--; + } + buff = (const uint16_t *)p; + /* + * Modern C compiler such as GCC does not unroll automatically yet + * without unrolling pragma, and Clang is so. So we should + * unroll this loop for its performance. + */ + for (;len >= 8; len -= 8) { + /* This if statement expects compiler optimization will + * remove the statement which will not be executed. */ +#undef bswap16 +#ifndef __has_builtin +#define __has_builtin(x) 0 +#endif +#if defined(_MSC_VER) && _MSC_VER >= 1400 /* Visual Studio */ +# define bswap16(x) _byteswap_ushort(x) +#elif defined(__GNUC__) && ((__GNUC__ == 4 && __GNUC_MINOR__ >= 8) || __GNUC__ > 4) +/* GCC 4.8 and later has __builtin_bswap16() */ +# define bswap16(x) __builtin_bswap16(x) +#elif defined(__clang__) && __has_builtin(__builtin_bswap16) +/* Newer clang versions have __builtin_bswap16() */ +# define bswap16(x) __builtin_bswap16(x) +#else +# define bswap16(x) ((((x) >> 8) & 0xff) | ((x) << 8)) +#endif +#define CRC16W do { \ + if(u.c[0] == 1) { /* Big endian */ \ + crc ^= bswap16(*buff); buff++; \ + } else \ + crc ^= *buff++; \ + crc = crc16tbl[1][crc & 0xff] ^ crc16tbl[0][crc >> 8];\ +} while (0) + CRC16W; + CRC16W; + CRC16W; + CRC16W; +#undef CRC16W +#undef bswap16 + } + + p = (const unsigned char *)buff; + for (;len; len--) { + crc = (crc >> 8) ^ crc16tbl[0][(crc ^ *p++) & 0xff]; + } + return crc; +} + +/* + * Initialize LZHUF decoder. + * + * Returns ARCHIVE_OK if initialization was successful. + * Returns ARCHIVE_FAILED if method is unsupported. + * Returns ARCHIVE_FATAL if initialization failed; memory allocation + * error occurred. + */ +static int +lzh_decode_init(struct lzh_stream *strm, const char *method) +{ + struct lzh_dec *ds; + int w_bits, w_size; + + if (strm->ds == NULL) { + strm->ds = calloc(1, sizeof(*strm->ds)); + if (strm->ds == NULL) + return (ARCHIVE_FATAL); + } + ds = strm->ds; + ds->error = ARCHIVE_FAILED; + if (method == NULL || method[0] != 'l' || method[1] != 'h') + return (ARCHIVE_FAILED); + switch (method[2]) { + case '5': + w_bits = 13;/* 8KiB for window */ + break; + case '6': + w_bits = 15;/* 32KiB for window */ + break; + case '7': + w_bits = 16;/* 64KiB for window */ + break; + default: + return (ARCHIVE_FAILED);/* Not supported. */ + } + ds->error = ARCHIVE_FATAL; + /* Expand a window size up to 128 KiB for decompressing process + * performance whatever its original window size is. */ + ds->w_size = 1U << 17; + ds->w_mask = ds->w_size -1; + if (ds->w_buff == NULL) { + ds->w_buff = malloc(ds->w_size); + if (ds->w_buff == NULL) + return (ARCHIVE_FATAL); + } + w_size = 1U << w_bits; + memset(ds->w_buff + ds->w_size - w_size, 0x20, w_size); + ds->w_pos = 0; + ds->state = 0; + ds->pos_pt_len_size = w_bits + 1; + ds->pos_pt_len_bits = (w_bits == 15 || w_bits == 16)? 5: 4; + ds->literal_pt_len_size = PT_BITLEN_SIZE; + ds->literal_pt_len_bits = 5; + ds->br.cache_buffer = 0; + ds->br.cache_avail = 0; + + if (lzh_huffman_init(&(ds->lt), LT_BITLEN_SIZE, 16) + != ARCHIVE_OK) + return (ARCHIVE_FATAL); + ds->lt.len_bits = 9; + if (lzh_huffman_init(&(ds->pt), PT_BITLEN_SIZE, 16) + != ARCHIVE_OK) + return (ARCHIVE_FATAL); + ds->error = 0; + + return (ARCHIVE_OK); +} + +/* + * Release LZHUF decoder. + */ +static void +lzh_decode_free(struct lzh_stream *strm) +{ + + if (strm->ds == NULL) + return; + free(strm->ds->w_buff); + lzh_huffman_free(&(strm->ds->lt)); + lzh_huffman_free(&(strm->ds->pt)); + free(strm->ds); + strm->ds = NULL; +} + +/* + * Bit stream reader. + */ +/* Check that the cache buffer has enough bits. */ +#define lzh_br_has(br, n) ((br)->cache_avail >= n) +/* Get compressed data by bit. */ +#define lzh_br_bits(br, n) \ + (((uint16_t)((br)->cache_buffer >> \ + ((br)->cache_avail - (n)))) & cache_masks[n]) +#define lzh_br_bits_forced(br, n) \ + (((uint16_t)((br)->cache_buffer << \ + ((n) - (br)->cache_avail))) & cache_masks[n]) +/* Read ahead to make sure the cache buffer has enough compressed data we + * will use. + * True : completed, there is enough data in the cache buffer. + * False : we met that strm->next_in is empty, we have to get following + * bytes. */ +#define lzh_br_read_ahead_0(strm, br, n) \ + (lzh_br_has(br, (n)) || lzh_br_fillup(strm, br)) +/* True : the cache buffer has some bits as much as we need. + * False : there are no enough bits in the cache buffer to be used, + * we have to get following bytes if we could. */ +#define lzh_br_read_ahead(strm, br, n) \ + (lzh_br_read_ahead_0((strm), (br), (n)) || lzh_br_has((br), (n))) + +/* Notify how many bits we consumed. */ +#define lzh_br_consume(br, n) ((br)->cache_avail -= (n)) +#define lzh_br_unconsume(br, n) ((br)->cache_avail += (n)) + +static const uint16_t cache_masks[] = { + 0x0000, 0x0001, 0x0003, 0x0007, + 0x000F, 0x001F, 0x003F, 0x007F, + 0x00FF, 0x01FF, 0x03FF, 0x07FF, + 0x0FFF, 0x1FFF, 0x3FFF, 0x7FFF, + 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF +}; + +/* + * Shift away used bits in the cache data and fill it up with following bits. + * Call this when cache buffer does not have enough bits you need. + * + * Returns 1 if the cache buffer is full. + * Returns 0 if the cache buffer is not full; input buffer is empty. + */ +static int +lzh_br_fillup(struct lzh_stream *strm, struct lzh_br *br) +{ + int n = CACHE_BITS - br->cache_avail; + + for (;;) { + const int x = n >> 3; + if (strm->avail_in >= x) { + switch (x) { + case 8: + br->cache_buffer = + ((uint64_t)strm->next_in[0]) << 56 | + ((uint64_t)strm->next_in[1]) << 48 | + ((uint64_t)strm->next_in[2]) << 40 | + ((uint64_t)strm->next_in[3]) << 32 | + ((uint32_t)strm->next_in[4]) << 24 | + ((uint32_t)strm->next_in[5]) << 16 | + ((uint32_t)strm->next_in[6]) << 8 | + (uint32_t)strm->next_in[7]; + strm->next_in += 8; + strm->avail_in -= 8; + br->cache_avail += 8 * 8; + return (1); + case 7: + br->cache_buffer = + (br->cache_buffer << 56) | + ((uint64_t)strm->next_in[0]) << 48 | + ((uint64_t)strm->next_in[1]) << 40 | + ((uint64_t)strm->next_in[2]) << 32 | + ((uint64_t)strm->next_in[3]) << 24 | + ((uint64_t)strm->next_in[4]) << 16 | + ((uint64_t)strm->next_in[5]) << 8 | + (uint64_t)strm->next_in[6]; + strm->next_in += 7; + strm->avail_in -= 7; + br->cache_avail += 7 * 8; + return (1); + case 6: + br->cache_buffer = + (br->cache_buffer << 48) | + ((uint64_t)strm->next_in[0]) << 40 | + ((uint64_t)strm->next_in[1]) << 32 | + ((uint64_t)strm->next_in[2]) << 24 | + ((uint64_t)strm->next_in[3]) << 16 | + ((uint64_t)strm->next_in[4]) << 8 | + (uint64_t)strm->next_in[5]; + strm->next_in += 6; + strm->avail_in -= 6; + br->cache_avail += 6 * 8; + return (1); + case 0: + /* We have enough compressed data in + * the cache buffer.*/ + return (1); + default: + break; + } + } + if (strm->avail_in == 0) { + /* There is not enough compressed data to fill up the + * cache buffer. */ + return (0); + } + br->cache_buffer = + (br->cache_buffer << 8) | *strm->next_in++; + strm->avail_in--; + br->cache_avail += 8; + n -= 8; + } +} + +/* + * Decode LZHUF. + * + * 1. Returns ARCHIVE_OK if output buffer or input buffer are empty. + * Please set available buffer and call this function again. + * 2. Returns ARCHIVE_EOF if decompression has been completed. + * 3. Returns ARCHIVE_FAILED if an error occurred; compressed data + * is broken or you do not set 'last' flag properly. + * 4. 'last' flag is very important, you must set 1 to the flag if there + * is no input data. The lha compressed data format does not provide how + * to know the compressed data is really finished. + * Note: lha command utility check if the total size of output bytes is + * reached the uncompressed size recorded in its header. it does not mind + * that the decoding process is properly finished. + * GNU ZIP can decompress another compressed file made by SCO LZH compress. + * it handles EOF as null to fill read buffer with zero until the decoding + * process meet 2 bytes of zeros at reading a size of a next chunk, so the + * zeros are treated as the mark of the end of the data although the zeros + * is dummy, not the file data. + */ +static int lzh_read_blocks(struct lzh_stream *, int); +static int lzh_decode_blocks(struct lzh_stream *, int); +#define ST_RD_BLOCK 0 +#define ST_RD_PT_1 1 +#define ST_RD_PT_2 2 +#define ST_RD_PT_3 3 +#define ST_RD_PT_4 4 +#define ST_RD_LITERAL_1 5 +#define ST_RD_LITERAL_2 6 +#define ST_RD_LITERAL_3 7 +#define ST_RD_POS_DATA_1 8 +#define ST_GET_LITERAL 9 +#define ST_GET_POS_1 10 +#define ST_GET_POS_2 11 +#define ST_COPY_DATA 12 + +static int +lzh_decode(struct lzh_stream *strm, int last) +{ + struct lzh_dec *ds = strm->ds; + int avail_in; + int r; + + if (ds->error) + return (ds->error); + + avail_in = strm->avail_in; + do { + if (ds->state < ST_GET_LITERAL) + r = lzh_read_blocks(strm, last); + else + r = lzh_decode_blocks(strm, last); + } while (r == 100); + strm->total_in += avail_in - strm->avail_in; + return (r); +} + +static void +lzh_emit_window(struct lzh_stream *strm, size_t s) +{ + strm->ref_ptr = strm->ds->w_buff; + strm->avail_out = (int)s; + strm->total_out += s; +} + +static int +lzh_read_blocks(struct lzh_stream *strm, int last) +{ + struct lzh_dec *ds = strm->ds; + struct lzh_br *br = &(ds->br); + int c = 0, i; + unsigned rbits; + + for (;;) { + switch (ds->state) { + case ST_RD_BLOCK: + /* + * Read a block number indicates how many blocks + * we will handle. The block is composed of a + * literal and a match, sometimes a literal only + * in particular, there are no reference data at + * the beginning of the decompression. + */ + if (!lzh_br_read_ahead_0(strm, br, 16)) { + if (!last) + /* We need following data. */ + return (ARCHIVE_OK); + if (lzh_br_has(br, 8)) { + /* + * It seems there are extra bits. + * 1. Compressed data is broken. + * 2. `last' flag does not properly + * set. + */ + goto failed; + } + if (ds->w_pos > 0) { + lzh_emit_window(strm, ds->w_pos); + ds->w_pos = 0; + return (ARCHIVE_OK); + } + /* End of compressed data; we have completely + * handled all compressed data. */ + return (ARCHIVE_EOF); + } + ds->blocks_avail = lzh_br_bits(br, 16); + if (ds->blocks_avail == 0) + goto failed; + lzh_br_consume(br, 16); + /* + * Read a literal table compressed in huffman + * coding. + */ + ds->pt.len_size = ds->literal_pt_len_size; + ds->pt.len_bits = ds->literal_pt_len_bits; + ds->reading_position = 0; + /* FALL THROUGH */ + case ST_RD_PT_1: + /* Note: ST_RD_PT_1, ST_RD_PT_2 and ST_RD_PT_4 are + * used in reading both a literal table and a + * position table. */ + if (!lzh_br_read_ahead(strm, br, ds->pt.len_bits)) { + if (last) + goto failed;/* Truncated data. */ + ds->state = ST_RD_PT_1; + return (ARCHIVE_OK); + } + ds->pt.len_avail = lzh_br_bits(br, ds->pt.len_bits); + lzh_br_consume(br, ds->pt.len_bits); + /* FALL THROUGH */ + case ST_RD_PT_2: + if (ds->pt.len_avail == 0) { + /* There is no bitlen. */ + if (!lzh_br_read_ahead(strm, br, + ds->pt.len_bits)) { + if (last) + goto failed;/* Truncated data.*/ + ds->state = ST_RD_PT_2; + return (ARCHIVE_OK); + } + if (!lzh_make_fake_table(&(ds->pt), + lzh_br_bits(br, ds->pt.len_bits))) + goto failed;/* Invalid data. */ + lzh_br_consume(br, ds->pt.len_bits); + if (ds->reading_position) + ds->state = ST_GET_LITERAL; + else + ds->state = ST_RD_LITERAL_1; + break; + } else if (ds->pt.len_avail > ds->pt.len_size) + goto failed;/* Invalid data. */ + ds->loop = 0; + memset(ds->pt.freq, 0, sizeof(ds->pt.freq)); + if (ds->pt.len_avail < 3 || + ds->pt.len_size == ds->pos_pt_len_size) { + ds->state = ST_RD_PT_4; + break; + } + /* FALL THROUGH */ + case ST_RD_PT_3: + ds->loop = lzh_read_pt_bitlen(strm, ds->loop, 3); + if (ds->loop < 3) { + if (ds->loop < 0 || last) + goto failed;/* Invalid data. */ + /* Not completed, get following data. */ + ds->state = ST_RD_PT_3; + return (ARCHIVE_OK); + } + /* There are some null in bitlen of the literal. */ + if (!lzh_br_read_ahead(strm, br, 2)) { + if (last) + goto failed;/* Truncated data. */ + ds->state = ST_RD_PT_3; + return (ARCHIVE_OK); + } + c = lzh_br_bits(br, 2); + lzh_br_consume(br, 2); + if (c > ds->pt.len_avail - 3) + goto failed;/* Invalid data. */ + for (i = 3; c-- > 0 ;) + ds->pt.bitlen[i++] = 0; + ds->loop = i; + /* FALL THROUGH */ + case ST_RD_PT_4: + ds->loop = lzh_read_pt_bitlen(strm, ds->loop, + ds->pt.len_avail); + if (ds->loop < ds->pt.len_avail) { + if (ds->loop < 0 || last) + goto failed;/* Invalid data. */ + /* Not completed, get following data. */ + ds->state = ST_RD_PT_4; + return (ARCHIVE_OK); + } + if (!lzh_make_huffman_table(&(ds->pt))) + goto failed;/* Invalid data */ + if (ds->reading_position) { + ds->state = ST_GET_LITERAL; + break; + } + /* FALL THROUGH */ + case ST_RD_LITERAL_1: + if (!lzh_br_read_ahead(strm, br, ds->lt.len_bits)) { + if (last) + goto failed;/* Truncated data. */ + ds->state = ST_RD_LITERAL_1; + return (ARCHIVE_OK); + } + ds->lt.len_avail = lzh_br_bits(br, ds->lt.len_bits); + lzh_br_consume(br, ds->lt.len_bits); + /* FALL THROUGH */ + case ST_RD_LITERAL_2: + if (ds->lt.len_avail == 0) { + /* There is no bitlen. */ + if (!lzh_br_read_ahead(strm, br, + ds->lt.len_bits)) { + if (last) + goto failed;/* Truncated data.*/ + ds->state = ST_RD_LITERAL_2; + return (ARCHIVE_OK); + } + if (!lzh_make_fake_table(&(ds->lt), + lzh_br_bits(br, ds->lt.len_bits))) + goto failed;/* Invalid data */ + lzh_br_consume(br, ds->lt.len_bits); + ds->state = ST_RD_POS_DATA_1; + break; + } else if (ds->lt.len_avail > ds->lt.len_size) + goto failed;/* Invalid data */ + ds->loop = 0; + memset(ds->lt.freq, 0, sizeof(ds->lt.freq)); + /* FALL THROUGH */ + case ST_RD_LITERAL_3: + i = ds->loop; + while (i < ds->lt.len_avail) { + if (!lzh_br_read_ahead(strm, br, + ds->pt.max_bits)) { + if (last) + goto failed;/* Truncated data.*/ + ds->loop = i; + ds->state = ST_RD_LITERAL_3; + return (ARCHIVE_OK); + } + rbits = lzh_br_bits(br, ds->pt.max_bits); + c = lzh_decode_huffman(&(ds->pt), rbits); + if (c > 2) { + /* Note: 'c' will never be more than + * eighteen since it's limited by + * PT_BITLEN_SIZE, which is being set + * to ds->pt.len_size through + * ds->literal_pt_len_size. */ + lzh_br_consume(br, ds->pt.bitlen[c]); + c -= 2; + ds->lt.freq[c]++; + ds->lt.bitlen[i++] = c; + } else if (c == 0) { + lzh_br_consume(br, ds->pt.bitlen[c]); + ds->lt.bitlen[i++] = 0; + } else { + /* c == 1 or c == 2 */ + int n = (c == 1)?4:9; + if (!lzh_br_read_ahead(strm, br, + ds->pt.bitlen[c] + n)) { + if (last) /* Truncated data. */ + goto failed; + ds->loop = i; + ds->state = ST_RD_LITERAL_3; + return (ARCHIVE_OK); + } + lzh_br_consume(br, ds->pt.bitlen[c]); + c = lzh_br_bits(br, n); + lzh_br_consume(br, n); + c += (n == 4)?3:20; + if (i + c > ds->lt.len_avail) + goto failed;/* Invalid data */ + memset(&(ds->lt.bitlen[i]), 0, c); + i += c; + } + } + if (i > ds->lt.len_avail || + !lzh_make_huffman_table(&(ds->lt))) + goto failed;/* Invalid data */ + /* FALL THROUGH */ + case ST_RD_POS_DATA_1: + /* + * Read a position table compressed in huffman + * coding. + */ + ds->pt.len_size = ds->pos_pt_len_size; + ds->pt.len_bits = ds->pos_pt_len_bits; + ds->reading_position = 1; + ds->state = ST_RD_PT_1; + break; + case ST_GET_LITERAL: + return (100); + } + } +failed: + return (ds->error = ARCHIVE_FAILED); +} + +static int +lzh_decode_blocks(struct lzh_stream *strm, int last) +{ + struct lzh_dec *ds = strm->ds; + struct lzh_br bre = ds->br; + struct huffman *lt = &(ds->lt); + struct huffman *pt = &(ds->pt); + unsigned char *w_buff = ds->w_buff; + unsigned char *lt_bitlen = lt->bitlen; + unsigned char *pt_bitlen = pt->bitlen; + int blocks_avail = ds->blocks_avail, c = 0; + int copy_len = ds->copy_len, copy_pos = ds->copy_pos; + int w_pos = ds->w_pos, w_mask = ds->w_mask, w_size = ds->w_size; + int lt_max_bits = lt->max_bits, pt_max_bits = pt->max_bits; + int state = ds->state; + + for (;;) { + switch (state) { + case ST_GET_LITERAL: + for (;;) { + if (blocks_avail == 0) { + /* We have decoded all blocks. + * Let's handle next blocks. */ + ds->state = ST_RD_BLOCK; + ds->br = bre; + ds->blocks_avail = 0; + ds->w_pos = w_pos; + ds->copy_pos = 0; + return (100); + } + + /* lzh_br_read_ahead() always try to fill the + * cache buffer up. In specific situation we + * are close to the end of the data, the cache + * buffer will not be full and thus we have to + * determine if the cache buffer has some bits + * as much as we need after lzh_br_read_ahead() + * failed. */ + if (!lzh_br_read_ahead(strm, &bre, + lt_max_bits)) { + if (!last) + goto next_data; + /* Remaining bits are less than + * maximum bits(lt.max_bits) but maybe + * it still remains as much as we need, + * so we should try to use it with + * dummy bits. */ + c = lzh_decode_huffman(lt, + lzh_br_bits_forced(&bre, + lt_max_bits)); + lzh_br_consume(&bre, lt_bitlen[c]); + if (!lzh_br_has(&bre, 0)) + goto failed;/* Over read. */ + } else { + c = lzh_decode_huffman(lt, + lzh_br_bits(&bre, lt_max_bits)); + lzh_br_consume(&bre, lt_bitlen[c]); + } + blocks_avail--; + if (c > UCHAR_MAX) + /* Current block is a match data. */ + break; + /* + * 'c' is exactly a literal code. + */ + /* Save a decoded code to reference it + * afterward. */ + w_buff[w_pos] = c; + if (++w_pos >= w_size) { + w_pos = 0; + lzh_emit_window(strm, w_size); + goto next_data; + } + } + /* 'c' is the length of a match pattern we have + * already extracted, which has be stored in + * window(ds->w_buff). */ + copy_len = c - (UCHAR_MAX + 1) + MINMATCH; + /* FALL THROUGH */ + case ST_GET_POS_1: + /* + * Get a reference position. + */ + if (!lzh_br_read_ahead(strm, &bre, pt_max_bits)) { + if (!last) { + state = ST_GET_POS_1; + ds->copy_len = copy_len; + goto next_data; + } + copy_pos = lzh_decode_huffman(pt, + lzh_br_bits_forced(&bre, pt_max_bits)); + lzh_br_consume(&bre, pt_bitlen[copy_pos]); + if (!lzh_br_has(&bre, 0)) + goto failed;/* Over read. */ + } else { + copy_pos = lzh_decode_huffman(pt, + lzh_br_bits(&bre, pt_max_bits)); + lzh_br_consume(&bre, pt_bitlen[copy_pos]); + } + /* FALL THROUGH */ + case ST_GET_POS_2: + if (copy_pos > 1) { + /* We need an additional adjustment number to + * the position. */ + int p = copy_pos - 1; + if (!lzh_br_read_ahead(strm, &bre, p)) { + if (last) + goto failed;/* Truncated data.*/ + state = ST_GET_POS_2; + ds->copy_len = copy_len; + ds->copy_pos = copy_pos; + goto next_data; + } + copy_pos = (1 << p) + lzh_br_bits(&bre, p); + lzh_br_consume(&bre, p); + } + /* The position is actually a distance from the last + * code we had extracted and thus we have to convert + * it to a position of the window. */ + copy_pos = (w_pos - copy_pos - 1) & w_mask; + /* FALL THROUGH */ + case ST_COPY_DATA: + /* + * Copy `copy_len' bytes as extracted data from + * the window into the output buffer. + */ + for (;;) { + int l; + + l = copy_len; + if (copy_pos > w_pos) { + if (l > w_size - copy_pos) + l = w_size - copy_pos; + } else { + if (l > w_size - w_pos) + l = w_size - w_pos; + } + if ((copy_pos + l < w_pos) + || (w_pos + l < copy_pos)) { + /* No overlap. */ + memcpy(w_buff + w_pos, + w_buff + copy_pos, l); + } else { + const unsigned char *s; + unsigned char *d; + int li; + + d = w_buff + w_pos; + s = w_buff + copy_pos; + for (li = 0; li < l-1;) { + d[li] = s[li];li++; + d[li] = s[li];li++; + } + if (li < l) + d[li] = s[li]; + } + w_pos += l; + if (w_pos == w_size) { + w_pos = 0; + lzh_emit_window(strm, w_size); + if (copy_len <= l) + state = ST_GET_LITERAL; + else { + state = ST_COPY_DATA; + ds->copy_len = copy_len - l; + ds->copy_pos = + (copy_pos + l) & w_mask; + } + goto next_data; + } + if (copy_len <= l) + /* A copy of current pattern ended. */ + break; + copy_len -= l; + copy_pos = (copy_pos + l) & w_mask; + } + state = ST_GET_LITERAL; + break; + } + } +failed: + return (ds->error = ARCHIVE_FAILED); +next_data: + ds->br = bre; + ds->blocks_avail = blocks_avail; + ds->state = state; + ds->w_pos = w_pos; + return (ARCHIVE_OK); +} + +static int +lzh_huffman_init(struct huffman *hf, size_t len_size, int tbl_bits) +{ + int bits; + + if (hf->bitlen == NULL) { + hf->bitlen = malloc(len_size * sizeof(hf->bitlen[0])); + if (hf->bitlen == NULL) + return (ARCHIVE_FATAL); + } + if (hf->tbl == NULL) { + if (tbl_bits < HTBL_BITS) + bits = tbl_bits; + else + bits = HTBL_BITS; + hf->tbl = malloc(((size_t)1 << bits) * sizeof(hf->tbl[0])); + if (hf->tbl == NULL) + return (ARCHIVE_FATAL); + } + if (hf->tree == NULL && tbl_bits > HTBL_BITS) { + hf->tree_avail = 1 << (tbl_bits - HTBL_BITS + 4); + hf->tree = malloc(hf->tree_avail * sizeof(hf->tree[0])); + if (hf->tree == NULL) + return (ARCHIVE_FATAL); + } + hf->len_size = (int)len_size; + hf->tbl_bits = tbl_bits; + return (ARCHIVE_OK); +} + +static void +lzh_huffman_free(struct huffman *hf) +{ + free(hf->bitlen); + free(hf->tbl); + free(hf->tree); +} + +static const char bitlen_tbl[0x400] = { + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 13, 13, 13, 13, 13, 13, 13, 13, 14, 14, 14, 14, 15, 15, 16, 0 +}; +static int +lzh_read_pt_bitlen(struct lzh_stream *strm, int start, int end) +{ + struct lzh_dec *ds = strm->ds; + struct lzh_br *br = &(ds->br); + int c, i; + + for (i = start; i < end; ) { + /* + * bit pattern the number we need + * 000 -> 0 + * 001 -> 1 + * 010 -> 2 + * ... + * 110 -> 6 + * 1110 -> 7 + * 11110 -> 8 + * ... + * 1111111111110 -> 16 + */ + if (!lzh_br_read_ahead(strm, br, 3)) + return (i); + if ((c = lzh_br_bits(br, 3)) == 7) { + if (!lzh_br_read_ahead(strm, br, 13)) + return (i); + c = bitlen_tbl[lzh_br_bits(br, 13) & 0x3FF]; + if (c) + lzh_br_consume(br, c - 3); + else + return (-1);/* Invalid data. */ + } else + lzh_br_consume(br, 3); + ds->pt.bitlen[i++] = c; + ds->pt.freq[c]++; + } + return (i); +} + +static int +lzh_make_fake_table(struct huffman *hf, uint16_t c) +{ + if (c >= hf->len_size) + return (0); + hf->tbl[0] = c; + hf->max_bits = 0; + hf->shift_bits = 0; + hf->bitlen[hf->tbl[0]] = 0; + return (1); +} + +/* + * Make a huffman coding table. + */ +static int +lzh_make_huffman_table(struct huffman *hf) +{ + uint16_t *tbl; + const unsigned char *bitlen; + int bitptn[17], weight[17]; + int i, maxbits = 0, ptn, tbl_size, w; + int diffbits, len_avail; + + /* + * Initialize bit patterns. + */ + ptn = 0; + for (i = 1, w = 1 << 15; i <= 16; i++, w >>= 1) { + bitptn[i] = ptn; + weight[i] = w; + if (hf->freq[i]) { + ptn += hf->freq[i] * w; + maxbits = i; + } + } + if (ptn != 0x10000 || maxbits > hf->tbl_bits) + return (0);/* Invalid */ + + hf->max_bits = maxbits; + + /* + * Cut out extra bits which we won't house in the table. + * This preparation reduces the same calculation in the for-loop + * making the table. + */ + if (maxbits < 16) { + int ebits = 16 - maxbits; + for (i = 1; i <= maxbits; i++) { + bitptn[i] >>= ebits; + weight[i] >>= ebits; + } + } + if (maxbits > HTBL_BITS) { + unsigned htbl_max; + uint16_t *p; + + diffbits = maxbits - HTBL_BITS; + for (i = 1; i <= HTBL_BITS; i++) { + bitptn[i] >>= diffbits; + weight[i] >>= diffbits; + } + htbl_max = bitptn[HTBL_BITS] + + weight[HTBL_BITS] * hf->freq[HTBL_BITS]; + p = &(hf->tbl[htbl_max]); + while (p < &hf->tbl[1U<<HTBL_BITS]) + *p++ = 0; + } else + diffbits = 0; + hf->shift_bits = diffbits; + + /* + * Make the table. + */ + tbl_size = 1 << HTBL_BITS; + tbl = hf->tbl; + bitlen = hf->bitlen; + len_avail = hf->len_avail; + hf->tree_used = 0; + for (i = 0; i < len_avail; i++) { + uint16_t *p; + int len, cnt; + uint16_t bit; + int extlen; + struct htree_t *ht; + + if (bitlen[i] == 0) + continue; + /* Get a bit pattern */ + len = bitlen[i]; + ptn = bitptn[len]; + cnt = weight[len]; + if (len <= HTBL_BITS) { + /* Calculate next bit pattern */ + if ((bitptn[len] = ptn + cnt) > tbl_size) + return (0);/* Invalid */ + /* Update the table */ + p = &(tbl[ptn]); + if (cnt > 7) { + uint16_t *pc; + + cnt -= 8; + pc = &p[cnt]; + pc[0] = (uint16_t)i; + pc[1] = (uint16_t)i; + pc[2] = (uint16_t)i; + pc[3] = (uint16_t)i; + pc[4] = (uint16_t)i; + pc[5] = (uint16_t)i; + pc[6] = (uint16_t)i; + pc[7] = (uint16_t)i; + if (cnt > 7) { + cnt -= 8; + memcpy(&p[cnt], pc, + 8 * sizeof(uint16_t)); + pc = &p[cnt]; + while (cnt > 15) { + cnt -= 16; + memcpy(&p[cnt], pc, + 16 * sizeof(uint16_t)); + } + } + if (cnt) + memcpy(p, pc, cnt * sizeof(uint16_t)); + } else { + while (cnt > 1) { + p[--cnt] = (uint16_t)i; + p[--cnt] = (uint16_t)i; + } + if (cnt) + p[--cnt] = (uint16_t)i; + } + continue; + } + + /* + * A bit length is too big to be housed to a direct table, + * so we use a tree model for its extra bits. + */ + bitptn[len] = ptn + cnt; + bit = 1U << (diffbits -1); + extlen = len - HTBL_BITS; + + p = &(tbl[ptn >> diffbits]); + if (*p == 0) { + *p = len_avail + hf->tree_used; + ht = &(hf->tree[hf->tree_used++]); + if (hf->tree_used > hf->tree_avail) + return (0);/* Invalid */ + ht->left = 0; + ht->right = 0; + } else { + if (*p < len_avail || + *p >= (len_avail + hf->tree_used)) + return (0);/* Invalid */ + ht = &(hf->tree[*p - len_avail]); + } + while (--extlen > 0) { + if (ptn & bit) { + if (ht->left < len_avail) { + ht->left = len_avail + hf->tree_used; + ht = &(hf->tree[hf->tree_used++]); + if (hf->tree_used > hf->tree_avail) + return (0);/* Invalid */ + ht->left = 0; + ht->right = 0; + } else { + ht = &(hf->tree[ht->left - len_avail]); + } + } else { + if (ht->right < len_avail) { + ht->right = len_avail + hf->tree_used; + ht = &(hf->tree[hf->tree_used++]); + if (hf->tree_used > hf->tree_avail) + return (0);/* Invalid */ + ht->left = 0; + ht->right = 0; + } else { + ht = &(hf->tree[ht->right - len_avail]); + } + } + bit >>= 1; + } + if (ptn & bit) { + if (ht->left != 0) + return (0);/* Invalid */ + ht->left = (uint16_t)i; + } else { + if (ht->right != 0) + return (0);/* Invalid */ + ht->right = (uint16_t)i; + } + } + return (1); +} + +static int +lzh_decode_huffman_tree(struct huffman *hf, unsigned rbits, int c) +{ + struct htree_t *ht; + int extlen; + + ht = hf->tree; + extlen = hf->shift_bits; + while (c >= hf->len_avail) { + c -= hf->len_avail; + if (extlen-- <= 0 || c >= hf->tree_used) + return (0); + if (rbits & (1U << extlen)) + c = ht[c].left; + else + c = ht[c].right; + } + return (c); +} + +static inline int +lzh_decode_huffman(struct huffman *hf, unsigned rbits) +{ + int c; + /* + * At first search an index table for a bit pattern. + * If it fails, search a huffman tree for. + */ + c = hf->tbl[rbits >> hf->shift_bits]; + if (c < hf->len_avail || hf->len_avail == 0) + return (c); + /* This bit pattern needs to be found out at a huffman tree. */ + return (lzh_decode_huffman_tree(hf, rbits, c)); +} + |