diff --git a/config b/config index c5214b4..c2ac772 100644 --- a/config +++ b/config @@ -4,3 +4,20 @@ NGX_ADDON_SRCS="$NGX_ADDON_SRCS $ngx_addon_dir/ngx_http_zip_module.c" NGX_ADDON_SRCS="$NGX_ADDON_SRCS $ngx_addon_dir/ngx_http_zip_parsers.c" NGX_ADDON_SRCS="$NGX_ADDON_SRCS $ngx_addon_dir/ngx_http_zip_file.c" NGX_ADDON_SRCS="$NGX_ADDON_SRCS $ngx_addon_dir/ngx_http_zip_headers.c" + +ngx_feature="iconv_open()" +ngx_feature_name="NGX_ZIP_HAVE_ICONV" +ngx_feature_run=no +ngx_feature_incs="#include " +ngx_feature_path= +case "$NGX_PLATFORM" in + Linux:*) + ngx_feature_libs= + ;; + + *) + ngx_feature_libs="-liconv" + ;; +esac +ngx_feature_test="iconv_open(\"IBM-850\", \"ISO8859-1\");" +. auto/feature diff --git a/ngx_http_zip_file.c b/ngx_http_zip_file.c index 689d827..dbf1da7 100644 --- a/ngx_http_zip_file.c +++ b/ngx_http_zip_file.c @@ -3,6 +3,11 @@ #include "ngx_http_zip_file.h" #include "ngx_http_zip_file_format.h" +#ifdef NGX_ZIP_HAVE_ICONV +#include +#endif + +static ngx_str_t ngx_http_zip_header_charset_name = ngx_string("upstream_http_x_archive_charset"); #define NGX_MAX_UINT16_VALUE 0xffff @@ -26,6 +31,13 @@ static ngx_zip_extra_field_central_t ngx_zip_extra_field_central_template = { 0, /* modification time */ }; +static ngx_zip_extra_field_unicode_path_t ngx_zip_extra_field_unicode_path_template = { + 0x7075, /* Info-ZIP Unicode Path tag */ + 0, + 1, /* version of this extra field, currently 1 (c) */ + 0, /* crc-32 */ +}; + static ngx_zip_extra_field_zip64_sizes_only_t ngx_zip_extra_field_zip64_sizes_only_template = { 0x0001, //tag for zip64 extra field sizeof(ngx_zip_extra_field_zip64_sizes_only_t) - 4, @@ -155,6 +167,9 @@ ngx_http_zip_truncate_buffer(ngx_buf_t *b, } } +#ifndef ICONV_CSNMAXLEN +#define ICONV_CSNMAXLEN 64 +#endif // make our proposed ZIP-file chunk map ngx_int_t @@ -164,6 +179,34 @@ ngx_http_zip_generate_pieces(ngx_http_request_t *r, ngx_http_zip_ctx_t *ctx) off_t offset = 0; ngx_http_zip_file_t *file; ngx_http_zip_piece_t *header_piece, *file_piece, *trailer_piece, *cd_piece; + ngx_http_variable_value_t *vv; + + if ((vv = ngx_palloc(r->pool, sizeof(ngx_http_variable_value_t))) == NULL) + return NGX_ERROR; + + ctx->unicode_path = 0; + +#ifdef NGX_ZIP_HAVE_ICONV + iconv_t *iconv_cd = NULL; + + if (ngx_http_upstream_header_variable(r, vv, (uintptr_t)(&ngx_http_zip_header_charset_name)) == NGX_OK + && !vv->not_found && ngx_strncmp(vv->data, "utf8", sizeof("utf8") - 1) != 0) { + char encoding[ICONV_CSNMAXLEN]; + snprintf(encoding, sizeof(encoding), "%s//TRANSLIT//IGNORE", vv->data); + + iconv_cd = iconv_open((const char *)encoding, "utf-8"); + if (iconv_cd == (iconv_t)(-1)) { + ngx_log_error(NGX_LOG_WARN, r->connection->log, errno, + "mod_zip: iconv_open('%s', 'utf-8') failed", + vv->data); + iconv_cd = NULL; + } + } + + if (iconv_cd) { + ctx->unicode_path = 1; + } +#endif // pieces: for each file: header, data, footer (if needed) -> 2 or 3 per file // plus file footer (CD + [zip64 end + zip64 locator +] end of cd) in one chunk @@ -177,15 +220,48 @@ ngx_http_zip_generate_pieces(ngx_http_request_t *r, ngx_http_zip_ctx_t *ctx) file = &((ngx_http_zip_file_t *)ctx->files.elts)[i]; file->offset = offset; - if(offset >= NGX_MAX_UINT32_VALUE) +#ifdef NGX_ZIP_HAVE_ICONV + if (ctx->unicode_path) { + size_t inlen = file->filename.len, outlen, outleft; + size_t res; + u_char *p, *in; + + //inbuf + file->filename_utf8.data = ngx_pnalloc(r->pool, file->filename.len + 1); + ngx_memcpy(file->filename_utf8.data, file->filename.data, file->filename.len); + file->filename_utf8.len = file->filename.len; + file->filename_utf8.data[file->filename.len] = '\0'; + + //outbuf + outlen = outleft = inlen * sizeof(int) + 15; + file->filename.data = ngx_pnalloc(r->pool, outlen + 1); + + in = file->filename_utf8.data; + p = file->filename.data; + + //reset state + iconv(iconv_cd, NULL, NULL, NULL, NULL); + + //convert the string + res = iconv(iconv_cd, (char **)&in, &inlen, (char **)&p, &outleft); + //XXX if (res == (size_t)-1) { ? } + + file->filename.len = outlen - outleft; + + file->filename_utf8_crc32 = ngx_crc32_long(file->filename_utf8.data, file->filename_utf8.len); + } +#endif + + if(offset >= (off_t) NGX_MAX_UINT32_VALUE) ctx->zip64_used = file->need_zip64_offset = 1; - if(file->size >= NGX_MAX_UINT32_VALUE) + if(file->size >= (off_t) NGX_MAX_UINT32_VALUE) ctx->zip64_used = file->need_zip64 = 1; ctx->cd_size += sizeof(ngx_zip_central_directory_file_header_t) + file->filename.len + sizeof(ngx_zip_extra_field_central_t) + (file->need_zip64_offset ? (file->need_zip64 ? sizeof(ngx_zip_extra_field_zip64_sizes_offset_t) : sizeof(ngx_zip_extra_field_zip64_offset_only_t)) : - (file->need_zip64 ? sizeof(ngx_zip_extra_field_zip64_sizes_only_t) : 0) + (file->need_zip64 ? sizeof(ngx_zip_extra_field_zip64_sizes_only_t) : 0) + + (ctx->unicode_path ? (sizeof(ngx_zip_extra_field_unicode_path_t) + file->filename_utf8.len): 0) ); header_piece = &ctx->pieces[piece_i++]; @@ -193,7 +269,8 @@ ngx_http_zip_generate_pieces(ngx_http_request_t *r, ngx_http_zip_ctx_t *ctx) header_piece->file = file; header_piece->range.start = offset; header_piece->range.end = offset += sizeof(ngx_zip_local_file_header_t) - + file->filename.len + sizeof(ngx_zip_extra_field_local_t) + (file->need_zip64? sizeof(ngx_zip_extra_field_zip64_sizes_only_t):0); + + file->filename.len + sizeof(ngx_zip_extra_field_local_t) + (file->need_zip64? sizeof(ngx_zip_extra_field_zip64_sizes_only_t):0) + + (ctx->unicode_path ? (sizeof(ngx_zip_extra_field_unicode_path_t) + file->filename_utf8.len): 0); file_piece = &ctx->pieces[piece_i++]; file_piece->type = zip_file_piece; @@ -212,7 +289,13 @@ ngx_http_zip_generate_pieces(ngx_http_request_t *r, ngx_http_zip_ctx_t *ctx) } } - ctx->zip64_used |= offset >= NGX_MAX_UINT32_VALUE || ctx->files.nelts >= NGX_MAX_UINT16_VALUE; +#ifdef NGX_ZIP_HAVE_ICONV + if (ctx->unicode_path) { + iconv_close(iconv_cd); + } +#endif + + ctx->zip64_used |= offset >= (off_t) NGX_MAX_UINT32_VALUE || ctx->files.nelts >= NGX_MAX_UINT16_VALUE; ctx->cd_size += sizeof(ngx_zip_end_of_central_directory_record_t); if (ctx->zip64_used) @@ -243,9 +326,11 @@ ngx_http_zip_file_header_chain_link(ngx_http_request_t *r, ngx_http_zip_ctx_t *c ngx_zip_extra_field_local_t extra_field_local; ngx_zip_extra_field_zip64_sizes_only_t extra_field_zip64; ngx_zip_local_file_header_t local_file_header; + ngx_zip_extra_field_unicode_path_t extra_field_unicode_path; size_t len = sizeof(ngx_zip_local_file_header_t) + file->filename.len - + sizeof(ngx_zip_extra_field_local_t) + (file->need_zip64? sizeof(ngx_zip_extra_field_zip64_sizes_only_t):0); + + sizeof(ngx_zip_extra_field_local_t) + (file->need_zip64? sizeof(ngx_zip_extra_field_zip64_sizes_only_t):0 + + (ctx->unicode_path ? (sizeof(ngx_zip_extra_field_unicode_path_t) + file->filename_utf8.len): 0)); if ((link = ngx_alloc_chain_link(r->pool)) == NULL || (b = ngx_calloc_buf(r->pool)) == NULL || (b->pos = ngx_pcalloc(r->pool, len)) == NULL) @@ -266,6 +351,9 @@ ngx_http_zip_file_header_chain_link(ngx_http_request_t *r, ngx_http_zip_ctx_t *c local_file_header = ngx_zip_local_file_header_template; local_file_header.mtime = file->dos_time; local_file_header.filename_len = file->filename.len; + if (ctx->unicode_path) { + local_file_header.flags &= ~zip_utf8_flag; + } if (file->need_zip64) { local_file_header.version = zip_version_zip64; local_file_header.extra_field_len = sizeof(ngx_zip_extra_field_zip64_sizes_only_t) + sizeof(ngx_zip_extra_field_local_t); @@ -276,6 +364,14 @@ ngx_http_zip_file_header_chain_link(ngx_http_request_t *r, ngx_http_zip_ctx_t *c local_file_header.uncompressed_size = file->size; } + if (ctx->unicode_path) { + extra_field_unicode_path = ngx_zip_extra_field_unicode_path_template; + extra_field_unicode_path.crc32 = file->filename_utf8_crc32; + extra_field_unicode_path.size = sizeof(ngx_zip_extra_field_unicode_path_t) + file->filename_utf8.len; + + local_file_header.extra_field_len += sizeof(ngx_zip_extra_field_unicode_path_t) + file->filename_utf8.len; + } + if (!file->missing_crc32) { local_file_header.flags &= ~zip_missing_crc32_flag; local_file_header.crc32 = file->crc32; @@ -289,10 +385,19 @@ ngx_http_zip_file_header_chain_link(ngx_http_request_t *r, ngx_http_zip_ctx_t *c ngx_memcpy(b->pos + sizeof(ngx_zip_local_file_header_t), file->filename.data, file->filename.len); ngx_memcpy(b->pos + sizeof(ngx_zip_local_file_header_t) + file->filename.len, &extra_field_local, sizeof(ngx_zip_extra_field_local_t)); - if (file->need_zip64) + if (file->need_zip64) { ngx_memcpy(b->pos + sizeof(ngx_zip_local_file_header_t) + file->filename.len + sizeof(ngx_zip_extra_field_local_t), &extra_field_zip64, sizeof(ngx_zip_extra_field_zip64_sizes_only_t)); + if (ctx->unicode_path) { + ngx_memcpy(b->pos + sizeof(ngx_zip_local_file_header_t) + file->filename.len + sizeof(ngx_zip_extra_field_local_t) + sizeof(ngx_zip_extra_field_zip64_sizes_only_t), &extra_field_unicode_path, sizeof(ngx_zip_extra_field_unicode_path_t)); + ngx_memcpy(b->pos + sizeof(ngx_zip_local_file_header_t) + file->filename.len + sizeof(ngx_zip_extra_field_local_t) + sizeof(ngx_zip_extra_field_zip64_sizes_only_t) + sizeof(ngx_zip_extra_field_unicode_path_t), file->filename_utf8.data, file->filename_utf8.len); + } + } else if (ctx->unicode_path) { + ngx_memcpy(b->pos + sizeof(ngx_zip_local_file_header_t) + file->filename.len + sizeof(ngx_zip_extra_field_local_t), &extra_field_unicode_path, sizeof(ngx_zip_extra_field_unicode_path_t)); + ngx_memcpy(b->pos + sizeof(ngx_zip_local_file_header_t) + file->filename.len + sizeof(ngx_zip_extra_field_local_t) + sizeof(ngx_zip_extra_field_unicode_path_t), file->filename_utf8.data, file->filename_utf8.len); + } + ngx_http_zip_truncate_buffer(b, &piece->range, range); link->buf = b; @@ -384,9 +489,9 @@ ngx_http_zip_central_directory_chain_link(ngx_http_request_t *r, ngx_http_zip_ct - (!!ctx->zip64_used)*(sizeof(ngx_zip_zip64_end_of_central_directory_record_t) + sizeof(ngx_zip_zip64_end_of_central_directory_locator_t)); - if (cd_size < NGX_MAX_UINT32_VALUE) + if (cd_size < (off_t) NGX_MAX_UINT32_VALUE) eocdr.size = cd_size; - if (piece->range.start < NGX_MAX_UINT32_VALUE) + if (piece->range.start < (off_t) NGX_MAX_UINT32_VALUE) eocdr.offset = piece->range.start; if (ctx->zip64_used) { @@ -420,6 +525,7 @@ ngx_http_zip_write_central_directory_entry(u_char *p, ngx_http_zip_file_t *file, ngx_zip_extra_field_zip64_offset_only_t extra_zip64_offset; ngx_zip_extra_field_zip64_sizes_offset_t extra_zip64_offset_size; ngx_zip_extra_field_zip64_sizes_only_t extra_zip64_size; + ngx_zip_extra_field_unicode_path_t extra_field_unicode_path; void* extra_zip64_ptr = NULL; //!! size_t extra_zip64_ptr_size = 0; @@ -427,6 +533,10 @@ ngx_http_zip_write_central_directory_entry(u_char *p, ngx_http_zip_file_t *file, central_directory_file_header.mtime = file->dos_time; central_directory_file_header.crc32 = file->crc32; + if (ctx->unicode_path) { + central_directory_file_header.flags &= ~zip_utf8_flag; + } + if (!file->need_zip64) { central_directory_file_header.compressed_size = file->size; central_directory_file_header.uncompressed_size = file->size; @@ -463,6 +573,14 @@ ngx_http_zip_write_central_directory_entry(u_char *p, ngx_http_zip_file_t *file, extra_field_central = ngx_zip_extra_field_central_template; extra_field_central.mtime = file->unix_time; + if (ctx->unicode_path) { + extra_field_unicode_path = ngx_zip_extra_field_unicode_path_template; + extra_field_unicode_path.crc32 = file->filename_utf8_crc32; + extra_field_unicode_path.size = sizeof(ngx_zip_extra_field_unicode_path_t) + file->filename_utf8.len; + + central_directory_file_header.extra_field_len += sizeof(ngx_zip_extra_field_unicode_path_t) + file->filename_utf8.len; + } + ngx_memcpy(p, ¢ral_directory_file_header, sizeof(ngx_zip_central_directory_file_header_t)); p += sizeof(ngx_zip_central_directory_file_header_t); @@ -477,5 +595,12 @@ ngx_http_zip_write_central_directory_entry(u_char *p, ngx_http_zip_file_t *file, p += extra_zip64_ptr_size; } + if (ctx->unicode_path) { + ngx_memcpy(p, &extra_field_unicode_path, sizeof(ngx_zip_extra_field_unicode_path_t)); + p += sizeof(ngx_zip_extra_field_unicode_path_t); + + ngx_memcpy(p, file->filename_utf8.data, file->filename_utf8.len); + p += file->filename_utf8.len; + } return p; } diff --git a/ngx_http_zip_file_format.h b/ngx_http_zip_file_format.h index 2f53483..7a0f4be 100644 --- a/ngx_http_zip_file_format.h +++ b/ngx_http_zip_file_format.h @@ -25,6 +25,13 @@ typedef struct { uint32_t mtime; } ngx_zip_extra_field_central_t; +typedef struct { + uint16_t tag; //0x7075 + uint16_t size; + uint8_t version; //1 + uint32_t crc32; +} ngx_zip_extra_field_unicode_path_t; + typedef struct { // not entirely writen... uint16_t tag; //0x0001 uint16_t size; // size of this record (32) diff --git a/ngx_http_zip_module.h b/ngx_http_zip_module.h index bf1b08a..cba8607 100644 --- a/ngx_http_zip_module.h +++ b/ngx_http_zip_module.h @@ -18,6 +18,8 @@ typedef struct { ngx_uint_t dos_time; ngx_uint_t unix_time; ngx_str_t filename; + ngx_str_t filename_utf8; + uint32_t filename_utf8_crc32; off_t size; off_t offset; @@ -70,6 +72,7 @@ typedef struct { unsigned abort:1; unsigned missing_crc32:1; // used in subrequest, if true = reads file into memory and calculates it; also to indicate presence of such file unsigned zip64_used:1; + unsigned unicode_path:1; } ngx_http_zip_ctx_t; typedef struct { diff --git a/ngx_http_zip_parsers.c b/ngx_http_zip_parsers.c index 46f3134..cd3d5c3 100644 --- a/ngx_http_zip_parsers.c +++ b/ngx_http_zip_parsers.c @@ -16,6 +16,9 @@ ngx_http_zip_file_init(ngx_http_zip_file_t *parsing_file) parsing_file->filename.data = NULL; parsing_file->filename.len = 0; + + parsing_file->filename_utf8.data = NULL; + parsing_file->filename_utf8.len = 0; parsing_file->header_sent = 0; parsing_file->trailer_sent = 0; @@ -88,7 +91,7 @@ ngx_http_zip_clean_range(ngx_http_zip_range_t *range, } -#line 92 "ngx_http_zip_parsers.c" +#line 95 "ngx_http_zip_parsers.c" static const char _request_actions[] = { 0, 1, 1, 1, 2, 1, 3, 1, 4, 1, 5, 1, 6, 1, 7, 1, @@ -152,7 +155,7 @@ static const int request_error = 0; static const int request_en_main = 1; -#line 91 "ngx_http_zip_parsers.rl" +#line 94 "ngx_http_zip_parsers.rl" ngx_int_t @@ -164,12 +167,12 @@ ngx_http_zip_parse_request(ngx_http_zip_ctx_t *ctx) ngx_http_zip_file_t *parsing_file = NULL; -#line 168 "ngx_http_zip_parsers.c" +#line 171 "ngx_http_zip_parsers.c" { cs = request_start; } -#line 173 "ngx_http_zip_parsers.c" +#line 176 "ngx_http_zip_parsers.c" { int _klen; unsigned int _trans; @@ -244,7 +247,7 @@ ngx_http_zip_parse_request(ngx_http_zip_ctx_t *ctx) switch ( *_acts++ ) { case 0: -#line 103 "ngx_http_zip_parsers.rl" +#line 106 "ngx_http_zip_parsers.rl" { parsing_file = ngx_array_push(&ctx->files); ngx_http_zip_file_init(parsing_file); @@ -253,38 +256,38 @@ ngx_http_zip_parse_request(ngx_http_zip_ctx_t *ctx) } break; case 1: -#line 110 "ngx_http_zip_parsers.rl" +#line 113 "ngx_http_zip_parsers.rl" { parsing_file->uri.data = p; parsing_file->uri.len = 1; } break; case 2: -#line 115 "ngx_http_zip_parsers.rl" +#line 118 "ngx_http_zip_parsers.rl" { parsing_file->uri.len = destructive_url_decode_len(parsing_file->uri.data, p); } break; case 3: -#line 118 "ngx_http_zip_parsers.rl" +#line 121 "ngx_http_zip_parsers.rl" { parsing_file->args.data = p; } break; case 4: -#line 121 "ngx_http_zip_parsers.rl" +#line 124 "ngx_http_zip_parsers.rl" { parsing_file->args.len = p - parsing_file->args.data; } break; case 5: -#line 124 "ngx_http_zip_parsers.rl" +#line 127 "ngx_http_zip_parsers.rl" { parsing_file->size = parsing_file->size * 10 + ((*p) - '0'); } break; case 6: -#line 127 "ngx_http_zip_parsers.rl" +#line 130 "ngx_http_zip_parsers.rl" { if ((*p) == '-') { ctx->missing_crc32 = 1; @@ -304,18 +307,18 @@ ngx_http_zip_parse_request(ngx_http_zip_ctx_t *ctx) } break; case 7: -#line 144 "ngx_http_zip_parsers.rl" +#line 147 "ngx_http_zip_parsers.rl" { parsing_file->filename.data = p; } break; case 8: -#line 147 "ngx_http_zip_parsers.rl" +#line 150 "ngx_http_zip_parsers.rl" { parsing_file->filename.len = p - parsing_file->filename.data; } break; -#line 319 "ngx_http_zip_parsers.c" +#line 322 "ngx_http_zip_parsers.c" } } @@ -328,7 +331,7 @@ ngx_http_zip_parse_request(ngx_http_zip_ctx_t *ctx) _out: {} } -#line 166 "ngx_http_zip_parsers.rl" +#line 169 "ngx_http_zip_parsers.rl" if (cs < request_first_final) { @@ -341,7 +344,7 @@ ngx_http_zip_parse_request(ngx_http_zip_ctx_t *ctx) } -#line 345 "ngx_http_zip_parsers.c" +#line 348 "ngx_http_zip_parsers.c" static const char _range_actions[] = { 0, 1, 0, 1, 1, 1, 2, 2, 0, 1, 2, 3, 1 @@ -394,7 +397,7 @@ static const int range_error = 0; static const int range_en_main = 1; -#line 180 "ngx_http_zip_parsers.rl" +#line 183 "ngx_http_zip_parsers.rl" ngx_int_t @@ -407,12 +410,12 @@ ngx_http_zip_parse_range(ngx_http_request_t *r, ngx_str_t *range_str, ngx_http_z u_char *pe = range_str->data + range_str->len; -#line 411 "ngx_http_zip_parsers.c" +#line 414 "ngx_http_zip_parsers.c" { cs = range_start; } -#line 416 "ngx_http_zip_parsers.c" +#line 419 "ngx_http_zip_parsers.c" { int _klen; unsigned int _trans; @@ -486,7 +489,7 @@ ngx_http_zip_parse_range(ngx_http_request_t *r, ngx_str_t *range_str, ngx_http_z switch ( *_acts++ ) { case 0: -#line 192 "ngx_http_zip_parsers.rl" +#line 195 "ngx_http_zip_parsers.rl" { if (range) { if (ngx_http_zip_clean_range(range, prefix, suffix, ctx) == NGX_ERROR) { @@ -502,18 +505,18 @@ ngx_http_zip_parse_range(ngx_http_request_t *r, ngx_str_t *range_str, ngx_http_z } break; case 1: -#line 206 "ngx_http_zip_parsers.rl" +#line 209 "ngx_http_zip_parsers.rl" { range->start = range->start * 10 + ((*p) - '0'); } break; case 2: -#line 208 "ngx_http_zip_parsers.rl" +#line 211 "ngx_http_zip_parsers.rl" { range->end = range->end * 10 + ((*p) - '0'); prefix = 0; } break; case 3: -#line 210 "ngx_http_zip_parsers.rl" +#line 213 "ngx_http_zip_parsers.rl" { suffix = 1; } break; -#line 517 "ngx_http_zip_parsers.c" +#line 520 "ngx_http_zip_parsers.c" } } @@ -526,7 +529,7 @@ ngx_http_zip_parse_range(ngx_http_request_t *r, ngx_str_t *range_str, ngx_http_z _out: {} } -#line 223 "ngx_http_zip_parsers.rl" +#line 226 "ngx_http_zip_parsers.rl" if (cs < range_first_final) { diff --git a/ngx_http_zip_parsers.rl b/ngx_http_zip_parsers.rl index df40a38..9fca4af 100644 --- a/ngx_http_zip_parsers.rl +++ b/ngx_http_zip_parsers.rl @@ -14,6 +14,9 @@ ngx_http_zip_file_init(ngx_http_zip_file_t *parsing_file) parsing_file->filename.data = NULL; parsing_file->filename.len = 0; + + parsing_file->filename_utf8.data = NULL; + parsing_file->filename_utf8.len = 0; parsing_file->header_sent = 0; parsing_file->trailer_sent = 0;