From 978165a4fd10558126803cd810a8d57d34580f00 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Th=C3=A9ophile=20Diot?= Date: Thu, 16 Jan 2025 14:35:41 +0100 Subject: [PATCH] Squashed 'src/deps/src/zstd-nginx-module/' content from commit f4ba115e0b git-subtree-dir: src/deps/src/zstd-nginx-module git-subtree-split: f4ba115e0b0eaecde545e5f37db6aa18917d8f4b --- .gitattributes | 1 + .gitignore | 54 + LICENSE | 25 + README.md | 155 ++ config | 11 + filter/config | 144 ++ filter/ngx_http_zstd_filter_module.c | 1035 +++++++++++++ static/config | 111 ++ static/ngx_http_zstd_static_module.c | 383 +++++ t/00-filter.t | 8 + t/01-static.t | 198 +++ t/suite/test | 2040 ++++++++++++++++++++++++++ t/suite/test.zst | Bin 0 -> 20706 bytes valgrind.suppress | 218 +++ 14 files changed, 4383 insertions(+) create mode 100644 .gitattributes create mode 100644 .gitignore create mode 100644 LICENSE create mode 100644 README.md create mode 100644 config create mode 100644 filter/config create mode 100644 filter/ngx_http_zstd_filter_module.c create mode 100644 static/config create mode 100644 static/ngx_http_zstd_static_module.c create mode 100644 t/00-filter.t create mode 100644 t/01-static.t create mode 100644 t/suite/test create mode 100644 t/suite/test.zst create mode 100644 valgrind.suppress diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 000000000..6fe6f35ce --- /dev/null +++ b/.gitattributes @@ -0,0 +1 @@ +*.t linguist-language=Text diff --git a/.gitignore b/.gitignore new file mode 100644 index 000000000..e3bcd3cf6 --- /dev/null +++ b/.gitignore @@ -0,0 +1,54 @@ +# Prerequisites +*.d + +# Object files +*.o +*.ko +*.obj +*.elf + +# Linker output +*.ilk +*.map +*.exp + +# Precompiled Headers +*.gch +*.pch + +# Libraries +*.lib +*.a +*.la +*.lo + +# Shared objects (inc. Windows DLLs) +*.dll +*.so +*.so.* +*.dylib + +# Executables +*.exe +*.out +*.app +*.i*86 +*.x86_64 +*.hex + +# Debug files +*.dSYM/ +*.su +*.idb +*.pdb + +# Kernel Module Compile Results +*.mod* +*.cmd +.tmp_versions/ +modules.order +Module.symvers +Mkfile.old +dkms.conf + +t/servroot/* diff --git a/LICENSE b/LICENSE new file mode 100644 index 000000000..b4d1280c9 --- /dev/null +++ b/LICENSE @@ -0,0 +1,25 @@ +BSD 2-Clause License + +Copyright (c) 2018, Alex Zhang +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/README.md b/README.md new file mode 100644 index 000000000..a1a7f4e90 --- /dev/null +++ b/README.md @@ -0,0 +1,155 @@ +# Name +zstd-nginx-module - Nginx module for the [Zstandard compression](https://facebook.github.io/zstd/). + +# Table of Content + +* [Name](#name) +* [Status](#status) +* [Synopsis](#synopsis) +* [Installation](#installation) +* [Directives](#directives) + * [ngx_http_zstd_filter_module](#ngx_http_zstd_filter_module) + * [zstd_dict_file](#zstd_dict_file) + * [zstd](#zstd) + * [zstd_comp_level](#zstd_comp_level) + * [zstd_min_length](#zstd_min_length) + * [zstd_types](#zstd_types) + * [zstd_buffers](#zstd_buffers) + * [ngx_http_zstd_static_module](#ngx_http_zstd_static_module) + * [zstd_static](#zstd_static) +* [Variables](#variables) + * [ngx_http_zstd_filter_module](#ngx_http_zstd_filter_module) + * [$zstd_ratio](#$zstd_ratio) +* [Author](#author) + +# Status + +This Nginx module is currently considered experimental. Issues and PRs are welcome if you encounter any problems. + +# Synopsis + +```nginx + +# specify the dictionary +zstd_dict_file /path/to/dict; + +server { + listen 127.0.0.1:8080; + server_name localhost; + + location / { + # enable zstd compression + zstd on; + zstd_min_length 256; # no less than 256 bytes + zstd_comp_level 3; # set the level to 3 + + proxy_pass http://foo.com; + } +} + +server { + listen 127.0.0.1:8081; + server_name localhost; + + location / { + zstd_static on; + root html; + } +} +``` + +# Installation + +To use theses modules, configure your nginx branch with `--add-module=/path/to/zstd-nginx-module`. Several points should be taken care. + +* You can set environment variables `ZSTD_INC` and `ZSTD_LIB` to specify the path to `zstd.h` and the path to zstd shared library represently. +* static library will be tried prior to dynamic library, since this Nginx module uses some **advanced APIs** where static linking is recommended. +* System's zstd bundle will be linked if `ZSTD_INC` and `ZSTD_LIB` are not specified. +* Both `ngx_http_zstd_static_module` and `ngx_http_zstd_filter_module` will be configured. + +# Directives + +## ngx_http_zstd_filter_module + +The `ngx_http_zstd_filter_module` module is a filter that compresses responses using the "zstd" method. This often helps to reduce the size of transmitted data by half or even more. + +### zstd_dict_file + +**Syntax:** *zstd_dict_file /path/to/dict;* +**Default:** *-* +**Context:** *http* + +Specifies the external dictionary. + +**WARNING:** Be careful! The content-coding registration only specifies a means to signal the use of the zstd format, and does not additionally specify any mechanism for advertising/negotiating/synchronizing the use of a specific dictionary between client and server. Use the `zstd_dict_file` only if you can insure that both ends (server and client) are capable of using the same dictionary (e.g. advertise with a HTTP header). See https://github.com/tokers/zstd-nginx-module/issues/2 for the details. + +### zstd + +**Syntax:** *zstd on | off;* +**Default:** *zstd off;* +**Context:** *http, server, location, if in location* + +Enables or disables zstd compression for response. + +### zstd_comp_level + +**Syntax:** *zstd_comp_level level;* +**Default:** *zstd_comp_level 1;* +**Context:** *http, server, location* + +Sets a zstd compression level of a response. Acceptable values are in the range from 1 to `ZSTD_maxCLevel()`. + +### zstd_min_length + +**Syntax:** *zstd_min_length length;* +**Default:** *zstd_min_length 20;* +**Context:** *http, server, location* + +Sets the minimum length of a response that will be compressed by zstd. The length is determined only from the "Content-Length" response header field. + +### zstd_types + +**Syntax:** *zstd_types mime-type ...;* +**Default:** *zstd_types text/html;* +**Context:** *http, server, location* + +Enables ztd of responses for the specified MIME types in addition to "text/html". The special value "*" matches any MIME type. + +### zstd_buffers + +**Syntax:** *zstd_buffers number size;* +**Default:** *zstd_buffers 32 4k | 16 8k;* +**Context:** *http, server, location* + +Sets the number and size of buffers used to compress a response. By default, the buffer size is equal to one memory page. This is either 4K or 8K, depending on a platform. + +## ngx_http_zstd_static_module + +The `ngx_http_zstd_static_module` module allows sending precompressed files with the ".zst" filename extension instead of regular files. + +### zstd_static + +**Syntax:** *zstd_static on | off | always;* +**Default:** *zstd_static off;* +**Context:** *http, server, location* + +Enables ("on") or disables ("off") checking the existence of precompressed files. The following directives are also taken into account: gzip_vary. + +With the "always" value, "zsted" file is used in all cases, without checking if the client supports it. + + +# Variables + +## ngx_http_zstd_filter_module + +### $zstd_ratio + +Achieved compression ratio, computed as the ratio between the original and compressed response sizes. + +# Author + +Alex Zhang (张超) zchao1995@gmail, UPYUN Inc. + +# License + +This Nginx module is licensed under [BSD 2-Clause License](LICENSE). diff --git a/config b/config new file mode 100644 index 000000000..3d138836f --- /dev/null +++ b/config @@ -0,0 +1,11 @@ +# Make sure the module knows it is a submodule. +ngx_addon_name=ngx_zstd +. $ngx_addon_dir/filter/config + +# Make sure the module knows it is a submodule. +ngx_addon_name=ngx_zstd +. $ngx_addon_dir/static/config + +# The final name for reporting. +ngx_addon_name=ngx_zstd + diff --git a/filter/config b/filter/config new file mode 100644 index 000000000..2942125c6 --- /dev/null +++ b/filter/config @@ -0,0 +1,144 @@ +ngx_feature_incs="#include " +ngx_feature_test="(void) ZSTD_createCCtx();" +ngx_feature_libs= +ngx_feature_run=yes + +ngx_zstd_opt_I= +ngx_zstd_opt_L= + +if [ -n "$ZSTD_INC" -o -n "$ZSTD_LIB" ]; then + ngx_feature="ZStandard static library in $ZSTD_INC and $ZSTD_LIB" + ngx_feature_path=$ZSTD_INC + + # we try the static shared library firstly + ngx_zstd_opt_I="-I$ZSTD_INC -DZSTD_STATIC_LINKING_ONLY" + ngx_zstd_opt_L="$ZSTD_LIB/libzstd.a" + SAVED_CC_TAST_FLAGS=$CC_TEST_FLAGS + CC_TEST_FLAGS="$ngx_zstd_opt_I $CC_TEST_FLAGS" + SAVED_NGX_TEST_LD_OPT=$NGX_TEST_LD_OPT + NGX_TEST_LD_OPT="$ngx_zstd_opt_L $NGX_TEST_LD_OPT" + + . auto/feature + + # restore + CC_TEST_FLAGS=$SAVED_CC_TAST_FLAGS + NGX_TEST_LD_OPT=$SAVED_NGX_TEST_LD_OPT + + if [ $ngx_found = no ]; then + # then try the dynamic shared library + ngx_feature="ZStandard dynamic library in $ZSTD_INC and $ZSTD_LIB" + ngx_zstd_opt_L="-L$ZSTD_LIB -lzstd -Wl,-rpath, $ZSTD_LIB" + + SAVED_CC_TAST_FLAGS=$CC_TEST_FLAGS + CC_TEST_FLAGS="$ngx_zstd_opt_I $CC_TEST_FLAGS" + SAVED_NGX_TEST_LD_OPT=$NGX_TEST_LD_OPT + NGX_TEST_LD_OPT="$ngx_zstd_opt_L $NGX_TEST_LD_OPT" + + . auto/feature + + # restore + CC_TEST_FLAGS=$SAVED_CC_TAST_FLAGS + NGX_TEST_LD_OPT=$SAVED_NGX_TEST_LD_OPT + + if [ $ngx_found = no ]; then + cat << END + $0: error: ngx_http_zstd_filter_module requires the ZStandard library, please be sure that "\$ZSTD_INC" and "\$ZSTD_LIB" are set correctly. +END + exit 1 + fi + + fi +else + # auto-discovery + ngx_feature="ZStandard static library" + ngx_zstd_opt_I="-DZSTD_STATIC_LINKING_ONLY" + ngx_zstd_opt_L="-l:libzstd.a" + + # still we consider the static library firstly + SAVED_CC_TAST_FLAGS=$CC_TEST_FLAGS + CC_TEST_FLAGS="$ngx_zstd_opt_I $CC_TEST_FLAGS" + SAVED_NGX_TEST_LD_OPT=$NGX_TEST_LD_OPT + NGX_TEST_LD_OPT="$ngx_zstd_opt_L $NGX_TEST_LD_OPT" + + . auto/feature + + # restore + CC_TEST_FLAGS=$SAVED_CC_TAST_FLAGS + NGX_TEST_LD_OPT=$SAVED_NGX_TEST_LD_OPT + + if [ $ngx_found = no ]; then + + ngx_feature="ZStandard dynamic library" + ngx_zstd_opt_L="-lzstd" + SAVED_CC_TAST_FLAGS=$CC_TEST_FLAGS + CC_TEST_FLAGS="$ngx_zstd_opt_I $CC_TEST_FLAGS" + SAVED_NGX_TEST_LD_OPT=$NGX_TEST_LD_OPT + NGX_TEST_LD_OPT="$ngx_zstd_opt_L $NGX_TEST_LD_OPT" + + . auto/feature + + if [ $ngx_found = no ]; then + cat << END + $0: error: ngx_http_zstd_filter_module requires the ZStandard library. +END + exit 1 + fi + + # restore + CC_TEST_FLAGS=$SAVED_CC_TAST_FLAGS + NGX_TEST_LD_OPT=$SAVED_NGX_TEST_LD_OPT + + cat << END + $0: warning: ngx_http_zstd_filter_module uses advanced ZStandard APIs (which are still considered experimental) while you are trying to link the dynamic shared library. +END + fi + + # TODO we need more tries for the different OS port. +fi + +NGX_LD_OPT="$ngx_zstd_opt_L $NGX_LD_OPT" + +HTTP_ZSTD_SRCS="$ngx_addon_dir/filter/ngx_http_zstd_filter_module.c" + +ngx_addon_name=ngx_http_zstd_filter_module +ngx_module_type=HTTP_FILTER +ngx_module_name=ngx_http_zstd_filter_module +ngx_module_incs="$ngx_zstd_opt_I" +ngx_module_srcs=$HTTP_ZSTD_SRCS +ngx_module_libs=$NGX_LD_OPT +ngx_module_order="$ngx_module_name \ + ngx_pagespeed \ + ngx_http_postpone_filter_module \ + ngx_http_ssi_filter_module \ + ngx_http_charset_filter_module \ + ngx_http_xslt_filter_module \ + ngx_http_image_filter_module \ + ngx_http_sub_filter_module \ + ngx_http_addition_filter_module \ + ngx_http_gunzip_filter_module \ + ngx_http_userid_filter_module \ + ngx_http_headers_filter_module \ + ngx_http_copy_filter_module \ + ngx_http_range_body_filter_module \ + ngx_http_not_modified_filter_module \ + ngx_http_slice_filter_module" + +. auto/module + +if [ "$ngx_module_link" != DYNAMIC ]; then + # ngx_module_order doesn't work with static modules, + # so we must re-order filters here. + + if [ "$HTTP_GZIP" = YES ]; then + next=ngx_http_gzip_filter_module + elif echo $HTTP_FILTER_MODULES | grep pagespeed_etag_filter >/dev/null; then + next=ngx_pagespeed_etag_filter + else + next=ngx_http_range_header_filter_module + fi + + HTTP_FILTER_MODULES=`echo $HTTP_FILTER_MODULES \ + | sed "s/$ngx_module_name//" \ + | sed "s/$next/$next $ngx_module_name/"` +fi + diff --git a/filter/ngx_http_zstd_filter_module.c b/filter/ngx_http_zstd_filter_module.c new file mode 100644 index 000000000..50ec55f12 --- /dev/null +++ b/filter/ngx_http_zstd_filter_module.c @@ -0,0 +1,1035 @@ + +/* + * Copyright (C) Alex Zhang + */ + + +#include +#include +#include + +#include + + +#define NGX_HTTP_ZSTD_FILTER_COMPRESS 0 +#define NGX_HTTP_ZSTD_FILTER_FLUSH 1 +#define NGX_HTTP_ZSTD_FILTER_END 2 + + +typedef struct { + ngx_str_t dict_file; +} ngx_http_zstd_main_conf_t; + + +typedef struct { + ngx_flag_t enable; + ngx_int_t level; + ssize_t min_length; + + ngx_hash_t types; + + ngx_bufs_t bufs; + + ngx_array_t *types_keys; + + ZSTD_CDict *dict; +} ngx_http_zstd_loc_conf_t; + + +typedef struct { + ngx_chain_t *in; + ngx_chain_t *free; + ngx_chain_t *busy; + ngx_chain_t *out; + ngx_chain_t **last_out; + + ngx_buf_t *in_buf; + ngx_buf_t *out_buf; + ngx_int_t bufs; + + ZSTD_inBuffer buffer_in; + ZSTD_outBuffer buffer_out; + + ZSTD_CStream *cstream; + + ngx_http_request_t *request; + + size_t bytes_in; + size_t bytes_out; + + unsigned action:2; + unsigned last:1; + unsigned redo:1; + unsigned flush:1; + unsigned done:1; + unsigned nomem:1; +} ngx_http_zstd_ctx_t; + + +typedef struct { + ngx_conf_post_handler_pt post_handler; +} ngx_http_zstd_comp_level_bounds_t; + + +static ngx_http_output_header_filter_pt ngx_http_next_header_filter; +static ngx_http_output_body_filter_pt ngx_http_next_body_filter; + +static ngx_str_t ngx_http_zstd_ratio = ngx_string("zstd_ratio"); + + +static ngx_int_t ngx_http_zstd_header_filter(ngx_http_request_t *r); +static ngx_int_t ngx_http_zstd_body_filter(ngx_http_request_t *r, + ngx_chain_t *in); +static ngx_int_t ngx_http_zstd_filter_add_data(ngx_http_request_t *r, + ngx_http_zstd_ctx_t *ctx); +static ngx_int_t ngx_http_zstd_filter_get_buf(ngx_http_request_t *r, + ngx_http_zstd_ctx_t *ctx); +static ZSTD_CStream *ngx_http_zstd_filter_create_cstream(ngx_http_request_t *r, + ngx_http_zstd_ctx_t *ctx); +static ngx_int_t ngx_http_zstd_filter_compress(ngx_http_request_t *r, + ngx_http_zstd_ctx_t *ctx); +static ngx_int_t ngx_http_zstd_accept_encoding(ngx_str_t *ae); +static ngx_int_t ngx_http_zstd_ok(ngx_http_request_t *r); +static ngx_int_t ngx_http_zstd_filter_init(ngx_conf_t *cf); +static void * ngx_http_zstd_create_main_conf(ngx_conf_t *cf); +static char *ngx_http_zstd_init_main_conf(ngx_conf_t *cf, void *conf); +static void *ngx_http_zstd_create_loc_conf(ngx_conf_t *cf); +static char *ngx_http_zstd_merge_loc_conf(ngx_conf_t *cf, void *parent, + void *child); +static ngx_int_t ngx_http_zstd_add_variables(ngx_conf_t *cf); +static ngx_int_t ngx_http_zstd_ratio_variable(ngx_http_request_t *r, + ngx_http_variable_value_t *vv, uintptr_t data); +static void * ngx_http_zstd_filter_alloc(void *opaque, size_t size); +static void ngx_http_zstd_filter_free(void *opaque, void *address); +static char *ngx_http_zstd_comp_level(ngx_conf_t *cf, void *post, void *data); +static char *ngx_conf_zstd_set_num_slot_with_negatives(ngx_conf_t *cf, ngx_command_t *cmd, void *conf); + + +static ngx_http_zstd_comp_level_bounds_t ngx_http_zstd_comp_level_bounds = { + ngx_http_zstd_comp_level +}; + + +static ngx_command_t ngx_http_zstd_filter_commands[] = { + + { ngx_string("zstd"), + NGX_HTTP_MAIN_CONF|NGX_HTTP_SRV_CONF|NGX_HTTP_LOC_CONF|NGX_HTTP_LIF_CONF + |NGX_CONF_FLAG, + ngx_conf_set_flag_slot, + NGX_HTTP_LOC_CONF_OFFSET, + offsetof(ngx_http_zstd_loc_conf_t, enable), + NULL }, + + { ngx_string("zstd_comp_level"), + NGX_HTTP_MAIN_CONF|NGX_HTTP_SRV_CONF|NGX_HTTP_LOC_CONF|NGX_CONF_TAKE1, + ngx_conf_zstd_set_num_slot_with_negatives, + NGX_HTTP_LOC_CONF_OFFSET, + offsetof(ngx_http_zstd_loc_conf_t, level), + &ngx_http_zstd_comp_level_bounds }, + + { ngx_string("zstd_types"), + NGX_HTTP_MAIN_CONF|NGX_HTTP_SRV_CONF|NGX_HTTP_LOC_CONF|NGX_CONF_1MORE, + ngx_http_types_slot, + NGX_HTTP_LOC_CONF_OFFSET, + offsetof(ngx_http_zstd_loc_conf_t, types_keys), + &ngx_http_html_default_types[0] }, + + { ngx_string("zstd_buffers"), + NGX_HTTP_MAIN_CONF|NGX_HTTP_SRV_CONF|NGX_HTTP_LOC_CONF|NGX_CONF_TAKE2, + ngx_conf_set_bufs_slot, + NGX_HTTP_LOC_CONF_OFFSET, + offsetof(ngx_http_zstd_loc_conf_t, bufs), + NULL }, + + { ngx_string("zstd_min_length"), + NGX_HTTP_MAIN_CONF|NGX_HTTP_SRV_CONF|NGX_HTTP_LOC_CONF|NGX_CONF_1MORE, + ngx_conf_set_size_slot, + NGX_HTTP_LOC_CONF_OFFSET, + offsetof(ngx_http_zstd_loc_conf_t, min_length), + NULL }, + + { ngx_string("zstd_dict_file"), + NGX_HTTP_MAIN_CONF|NGX_CONF_TAKE1, + ngx_conf_set_str_slot, + NGX_HTTP_MAIN_CONF_OFFSET, + offsetof(ngx_http_zstd_main_conf_t, dict_file), + NULL }, + + ngx_null_command +}; + + +static ngx_http_module_t ngx_http_zstd_filter_module_ctx = { + ngx_http_zstd_add_variables, /* preconfiguration */ + ngx_http_zstd_filter_init, /* postconfiguration */ + + ngx_http_zstd_create_main_conf, /* create main configuration */ + ngx_http_zstd_init_main_conf, /* init main configuration */ + + NULL, /* create server configuration */ + NULL, /* merge server configuration */ + + ngx_http_zstd_create_loc_conf, /* create location configuration */ + ngx_http_zstd_merge_loc_conf, /* merge location configuration */ +}; + + +ngx_module_t ngx_http_zstd_filter_module = { + NGX_MODULE_V1, + &ngx_http_zstd_filter_module_ctx, /* module context */ + ngx_http_zstd_filter_commands, /* module directives */ + NGX_HTTP_MODULE, /* module type */ + NULL, /* init master */ + NULL, /* init module */ + NULL, /* init process */ + NULL, /* init thread */ + NULL, /* exit thread */ + NULL, /* exit process */ + NULL, /* exit master */ + NGX_MODULE_V1_PADDING +}; + + +static ngx_int_t +ngx_http_zstd_header_filter(ngx_http_request_t *r) +{ + ngx_table_elt_t *h; + ngx_http_zstd_loc_conf_t *zlcf; + ngx_http_zstd_ctx_t *ctx; + + zlcf = ngx_http_get_module_loc_conf(r, ngx_http_zstd_filter_module); + + if (!zlcf->enable + || (r->headers_out.status != NGX_HTTP_OK + && r->headers_out.status != NGX_HTTP_FORBIDDEN + && r->headers_out.status != NGX_HTTP_NOT_FOUND) + || (r->headers_out.content_encoding + && r->headers_out.content_encoding->value.len) + || (r->headers_out.content_length_n != -1 + && r->headers_out.content_length_n < zlcf->min_length) + || ngx_http_test_content_type(r, &zlcf->types) == NULL + || r->header_only) + { + return ngx_http_next_header_filter(r); + } + + r->gzip_vary = 1; + + if (ngx_http_zstd_ok(r) != NGX_OK) { + return ngx_http_next_header_filter(r); + } + + ctx = ngx_pcalloc(r->pool, sizeof(ngx_http_zstd_ctx_t)); + if (ctx == NULL) { + return NGX_ERROR; + } + + ngx_http_set_ctx(r, ctx, ngx_http_zstd_filter_module); + + ctx->request = r; + ctx->last_out = &ctx->out; + + h = ngx_list_push(&r->headers_out.headers); + if (h == NULL) { + return NGX_ERROR; + } + + h->hash = 1; + ngx_str_set(&h->key, "Content-Encoding"); + ngx_str_set(&h->value, "zstd"); + r->headers_out.content_encoding = h; + + r->main_filter_need_in_memory = 1; + + ngx_http_clear_content_length(r); + ngx_http_clear_accept_ranges(r); + ngx_http_weak_etag(r); + + return ngx_http_next_header_filter(r); +} + + +static ngx_int_t +ngx_http_zstd_body_filter(ngx_http_request_t *r, ngx_chain_t *in) +{ + size_t rv; + ngx_int_t flush, rc; + ngx_chain_t *cl; + ngx_http_zstd_ctx_t *ctx; + + + ctx = ngx_http_get_module_ctx(r, ngx_http_zstd_filter_module); + + if (ctx == NULL || ctx->done || r->header_only) { + return ngx_http_next_body_filter(r, in); + } + + ngx_log_debug0(NGX_LOG_DEBUG_HTTP, r->connection->log, 0, + "http zstd filter"); + + if (ctx->cstream == NULL) { + ctx->cstream = ngx_http_zstd_filter_create_cstream(r, ctx); + if (ctx->cstream == NULL) { + goto failed; + } + } + + if (in) { + if (ngx_chain_add_copy(r->pool, &ctx->in, in) != NGX_OK) { + goto failed; + } + + r->connection->buffered |= NGX_HTTP_GZIP_BUFFERED; + } + + if (ctx->nomem) { + + /* flush busy buffers */ + + if (ngx_http_next_body_filter(r, NULL) == NGX_ERROR) { + goto failed; + } + + cl = NULL; + + ngx_chain_update_chains(r->pool, &ctx->free, &ctx->busy, &cl, + (ngx_buf_tag_t) &ngx_http_zstd_filter_module); + + flush = 0; + ctx->nomem = 0; + + } else { + flush = ctx->busy ? 1 : 0; + } + + for ( ;; ) { + + /* cycle while we can write to a client */ + + for ( ;; ) { + + rc = ngx_http_zstd_filter_add_data(r, ctx); + + if (rc == NGX_DECLINED) { + break; + } + + if (rc == NGX_AGAIN) { + continue; + } + + rc = ngx_http_zstd_filter_get_buf(r, ctx); + + if (rc == NGX_ERROR) { + goto failed; + } + + if (rc == NGX_DECLINED) { + break; + } + + rc = ngx_http_zstd_filter_compress(r, ctx); + + if (rc == NGX_ERROR) { + goto failed; + } + + if (rc == NGX_OK) { + break; + } + + /* rc == NGX_AGAIN */ + } + + if (ctx->out == NULL && !flush) { + return ctx->busy ? NGX_AGAIN : NGX_OK; + } + + rc = ngx_http_next_body_filter(r, ctx->out); + + if (rc == NGX_ERROR) { + goto failed; + } + + ngx_chain_update_chains(r->pool, &ctx->free, &ctx->busy, &ctx->out, + (ngx_buf_tag_t) &ngx_http_zstd_filter_module); + + ctx->last_out = &ctx->out; + ctx->nomem = 0; + flush = 0; + + if (ctx->done) { + rv = ZSTD_freeCStream(ctx->cstream); + if (ZSTD_isError(rv)) { + ngx_log_error(NGX_LOG_ALERT, r->connection->log, 0, + "ZSTD_freeCStream() failed: %s", + ZSTD_getErrorName(rc)); + + rc = NGX_ERROR; + } + + return rc; + } + } + +failed: + + ctx->done = 1; + rv = ZSTD_freeCStream(ctx->cstream); + if (ZSTD_isError(rv)) { + ngx_log_error(NGX_LOG_ALERT, r->connection->log, 0, + "ZSTD_freeCStream() failed: %s", ZSTD_getErrorName(rv)); + } + + return NGX_ERROR; +} + + +static ngx_int_t +ngx_http_zstd_filter_compress(ngx_http_request_t *r, ngx_http_zstd_ctx_t *ctx) +{ + size_t rc, pos_in, pos_out; + char *hint; + ngx_chain_t *cl; + ngx_buf_t *b; + + ngx_log_debug8(NGX_LOG_DEBUG_HTTP, r->connection->log, 0, + "zstd compress in: src:%p pos:%ud size: %ud, " + "dst:%p pos:%ud size:%ud flush:%d redo:%d", + ctx->buffer_in.src, ctx->buffer_in.pos, ctx->buffer_in.size, + ctx->buffer_out.dst, ctx->buffer_out.pos, + ctx->buffer_out.size, ctx->flush, ctx->redo); + + pos_in = ctx->buffer_in.pos; + pos_out = ctx->buffer_out.pos; + + switch (ctx->action) { + + case NGX_HTTP_ZSTD_FILTER_FLUSH: + hint = "ZSTD_flushStream() "; + rc = ZSTD_flushStream(ctx->cstream, &ctx->buffer_out); + break; + + case NGX_HTTP_ZSTD_FILTER_END: + hint = "ZSTD_endStream() "; + rc = ZSTD_endStream(ctx->cstream, &ctx->buffer_out); + break; + + default: + hint = "ZSTD_compressStream() "; + rc = ZSTD_compressStream(ctx->cstream, &ctx->buffer_out, + &ctx->buffer_in); + break; + } + + if (ZSTD_isError(rc)) { + ngx_log_error(NGX_LOG_ALERT, r->connection->log, 0, + "%s failed: %s", hint, ZSTD_getErrorName(rc)); + + return NGX_ERROR; + } + + ngx_log_debug6(NGX_LOG_DEBUG_HTTP, r->connection->log, 0, + "zstd compress out: src:%p pos:%ud size: %ud, " + "dst:%p pos:%ud size:%ud", + ctx->buffer_in.src, ctx->buffer_in.pos, ctx->buffer_in.size, + ctx->buffer_out.dst, ctx->buffer_out.pos, + ctx->buffer_out.size); + + ctx->in_buf->pos += ctx->buffer_in.pos - pos_in; + ctx->out_buf->last += ctx->buffer_out.pos - pos_out; + ctx->redo = 0; + + if (rc > 0) { + if (ctx->action == NGX_HTTP_ZSTD_FILTER_COMPRESS) { + ctx->action = NGX_HTTP_ZSTD_FILTER_FLUSH; + } + + ctx->redo = 1; + + } else if (ctx->last && ctx->action != NGX_HTTP_ZSTD_FILTER_END) { + ctx->redo = 1; + ctx->action = NGX_HTTP_ZSTD_FILTER_END; + + /* pending to call the ZSTD_endStream() */ + + return NGX_AGAIN; + + } else { + ctx->action = NGX_HTTP_ZSTD_FILTER_COMPRESS; /* restore */ + } + + if (ngx_buf_size(ctx->out_buf) == 0) { + return NGX_AGAIN; + } + + cl = ngx_alloc_chain_link(r->pool); + if (cl == NULL) { + return NGX_ERROR; + } + + b = ctx->out_buf; + + if (rc == 0 && (ctx->flush || ctx->last)) { + r->connection->buffered &= ~NGX_HTTP_GZIP_BUFFERED; + + b->flush = ctx->flush; + b->last_buf = ctx->last; + + ctx->done = ctx->last; + ctx->flush = 0; + } + + ctx->bytes_out += ngx_buf_size(b); + + cl->next = NULL; + cl->buf = b; + + *ctx->last_out = cl; + ctx->last_out = &cl->next; + + ngx_memzero(&ctx->buffer_out, sizeof(ZSTD_outBuffer)); + + return ctx->last && rc == 0 ? NGX_OK : NGX_AGAIN; +} + + +static ngx_int_t +ngx_http_zstd_filter_add_data(ngx_http_request_t *r, ngx_http_zstd_ctx_t *ctx) +{ + if (ctx->buffer_in.pos < ctx->buffer_in.size + || ctx->flush + || ctx->last + || ctx->redo) + { + return NGX_OK; + } + + ngx_log_debug1(NGX_LOG_DEBUG_HTTP, r->connection->log, 0, + "zstd in: %p", ctx->in); + + if (ctx->in == NULL) { + return NGX_DECLINED; + } + + ctx->in_buf = ctx->in->buf; + ctx->in = ctx->in->next; + + if (ctx->in_buf->flush) { + ctx->flush = 1; + + } else if (ctx->in_buf->last_buf) { + ctx->last = 1; + } + + ctx->buffer_in.src = ctx->in_buf->pos; + ctx->buffer_in.pos = 0; + ctx->buffer_in.size = ngx_buf_size(ctx->in_buf); + + ctx->bytes_in += ngx_buf_size(ctx->in_buf); + + if (ctx->buffer_in.size == 0) { + return NGX_AGAIN; + } + + return NGX_OK; +} + + +static ngx_int_t +ngx_http_zstd_filter_get_buf(ngx_http_request_t *r, ngx_http_zstd_ctx_t *ctx) +{ + ngx_chain_t *cl; + ngx_http_zstd_loc_conf_t *zlcf; + + if (ctx->buffer_out.pos < ctx->buffer_out.size) { + return NGX_OK; + } + + zlcf = ngx_http_get_module_loc_conf(r, ngx_http_zstd_filter_module); + + if (ctx->free) { + cl = ctx->free; + ctx->free = ctx->free->next; + ctx->out_buf = cl->buf; + ngx_free_chain(r->pool, cl); + + } else if (ctx->bufs < zlcf->bufs.num) { + ctx->out_buf = ngx_create_temp_buf(r->pool, zlcf->bufs.size); + if (ctx->out_buf == NULL) { + return NGX_ERROR; + } + + ctx->out_buf->tag = (ngx_buf_tag_t) &ngx_http_zstd_filter_module; + ctx->out_buf->recycled = 1; + ctx->bufs++; + + } else { + ctx->nomem = 1; + return NGX_DECLINED; + } + + ctx->buffer_out.dst = ctx->out_buf->pos; + ctx->buffer_out.pos = 0; + ctx->buffer_out.size = ctx->out_buf->end - ctx->out_buf->start; + + return NGX_OK; +} + + +static ZSTD_CStream * +ngx_http_zstd_filter_create_cstream(ngx_http_request_t *r, + ngx_http_zstd_ctx_t *ctx) +{ + size_t rc; + ZSTD_CStream *cstream; + ZSTD_customMem cmem; + ngx_http_zstd_loc_conf_t *zlcf; + + zlcf = ngx_http_get_module_loc_conf(r, ngx_http_zstd_filter_module); + + cmem.customAlloc = ngx_http_zstd_filter_alloc; + cmem.customFree = ngx_http_zstd_filter_free; + cmem.opaque = ctx; + + cstream = ZSTD_createCStream_advanced(cmem); + if (cstream == NULL) { + ngx_log_error(NGX_LOG_ALERT, r->connection->log, 0, + "ZSTD_createCStream_advanced() failed"); + + return NULL; + } + + /* TODO use the advanced initialize functions */ + + if (zlcf->dict) { +#if ZSTD_VERSION_NUMBER >= 10500 + rc = ZSTD_CCtx_reset(cstream, ZSTD_reset_session_only); + if (ZSTD_isError(rc)) { + ngx_log_error(NGX_LOG_ALERT, r->connection->log, 0, + "ZSTD_CCtx_reset() failed: %s", + ZSTD_getErrorName(rc)); + goto failed; + } + + rc = ZSTD_CCtx_refCDict(cstream, zlcf->dict); + if (ZSTD_isError(rc)) { + ngx_log_error(NGX_LOG_ALERT, r->connection->log, 0, + "ZSTD_CCtx_refCDict() failed: %s", + ZSTD_getErrorName(rc)); + goto failed; + } +#else + rc = ZSTD_initCStream_usingCDict(cstream, zlcf->dict); +#endif + if (ZSTD_isError(rc)) { + ngx_log_error(NGX_LOG_ALERT, r->connection->log, 0, + "ZSTD_initCStream_usingCDict() failed: %s", + ZSTD_getErrorName(rc)); + + goto failed; + } + + } else { + rc = ZSTD_initCStream(cstream, zlcf->level); + if (ZSTD_isError(rc)) { + ngx_log_error(NGX_LOG_ALERT, r->connection->log, 0, + "ZSTD_initCStream() failed: %s", + ZSTD_getErrorName(rc)); + + goto failed; + } + } + + return cstream; + +failed: + rc = ZSTD_freeCStream(cstream); + if (ZSTD_isError(rc)) { + ngx_log_error(NGX_LOG_ALERT, r->connection->log, 0, + "ZSTD_freeCStream() failed: %s", ZSTD_getErrorName(rc)); + } + + return NULL; +} + + +static ngx_int_t +ngx_http_zstd_accept_encoding(ngx_str_t *ae) +{ + u_char *p; + + p = ngx_strcasestrn(ae->data, "zstd", sizeof("zstd") - 2); + if (p == NULL) { + return NGX_DECLINED; + } + + if (p == ae->data || (*(p - 1) == ',' || *(p - 1) == ' ')) { + + p += sizeof("zstd") - 1; + + if (p == ae->data + ae->len || *p == ',' || *p == ' ' || *p == ';') { + return NGX_OK; + } + } + + return NGX_DECLINED; +} + + +static ngx_int_t +ngx_http_zstd_ok(ngx_http_request_t *r) +{ + ngx_table_elt_t *ae; + + if (r != r->main) { + return NGX_DECLINED; + } + + ae = r->headers_in.accept_encoding; + if (ae == NULL) { + return NGX_DECLINED; + } + + if (ae->value.len < sizeof("zstd") - 1) { + return NGX_DECLINED; + } + + if (ngx_memcmp(ae->value.data, "zstd", 4) != 0 + && ngx_http_zstd_accept_encoding(&ae->value) != NGX_OK) + { + return NGX_DECLINED; + } + + + r->gzip_tested = 1; + r->gzip_ok = 0; + + return NGX_OK; +} + + +static void * +ngx_http_zstd_create_main_conf(ngx_conf_t *cf) +{ + ngx_http_zstd_main_conf_t *zmcf; + + zmcf = ngx_pcalloc(cf->pool, sizeof(ngx_http_zstd_main_conf_t)); + if (zmcf == NULL) { + return NULL; + } + + return zmcf; +} + + +static char * +ngx_http_zstd_init_main_conf(ngx_conf_t *cf, void *conf) +{ + ngx_http_zstd_main_conf_t *zmcf = conf; + + if (zmcf->dict_file.len == 0) { + return NGX_CONF_OK; + } + + if (ngx_conf_full_name(cf->cycle, &zmcf->dict_file, 1) != NGX_OK) { + return NGX_CONF_ERROR; + } + + return NGX_CONF_OK; +} + + +static void * +ngx_http_zstd_create_loc_conf(ngx_conf_t *cf) +{ + ngx_http_zstd_loc_conf_t *conf; + + conf = ngx_pcalloc(cf->pool, sizeof(ngx_http_zstd_loc_conf_t)); + if (conf == NULL) { + return NULL; + } + + /* + * set by ngx_pcalloc(): + * + * conf->bufs.num = 0; + * conf->types = { NULL }; + * conf->types_keys = NULL; + * conf->dict = NULL; + */ + + conf->enable = NGX_CONF_UNSET; + conf->level = NGX_CONF_UNSET; + conf->min_length = NGX_CONF_UNSET; + + return conf; +} + + +static char * +ngx_http_zstd_merge_loc_conf(ngx_conf_t *cf, void *parent, void *child) +{ + ngx_http_zstd_loc_conf_t *prev = parent; + ngx_http_zstd_loc_conf_t *conf = child; + + ngx_fd_t fd; + size_t size; + ssize_t n; + char *rc; + u_char *buf; + ngx_file_info_t info; + ngx_http_zstd_main_conf_t *zmcf; + + rc = NGX_OK; + buf = NULL; + fd = NGX_INVALID_FILE; + + ngx_conf_merge_value(conf->enable, prev->enable, 0); + ngx_conf_merge_value(conf->level, prev->level, 1); + ngx_conf_merge_value(conf->min_length, prev->min_length, 20); + + if (ngx_http_merge_types(cf, &conf->types_keys, &conf->types, + &prev->types_keys, &prev->types, + ngx_http_html_default_types)) + { + return NGX_CONF_ERROR; + } + + ngx_conf_merge_ptr_value(conf->dict, prev->dict, NULL); + ngx_conf_merge_bufs_value(conf->bufs, prev->bufs, + (128 * 1024) / ngx_pagesize, ngx_pagesize); + + zmcf = ngx_http_conf_get_module_main_conf(cf, ngx_http_zstd_filter_module); + + if (conf->enable && zmcf->dict_file.len > 0) { + + if (conf->level == prev->level) { + conf->dict = prev->dict; + + } else { + /* + * compression level is different from the outer block, + * so we should create a seperate dict object. + */ + + fd = ngx_open_file(zmcf->dict_file.data, NGX_FILE_RDONLY, + NGX_FILE_OPEN, 0); + + if (fd == NGX_INVALID_FILE) { + ngx_conf_log_error(NGX_LOG_EMERG, cf, ngx_errno, + ngx_open_file_n " \"%V\" failed", + &zmcf->dict_file); + + return NGX_CONF_ERROR; + } + + if (ngx_fd_info(fd, &info) == NGX_FILE_ERROR) { + ngx_conf_log_error(NGX_LOG_EMERG, cf, ngx_errno, + ngx_fd_info_n " \"%V\" failed", + &zmcf->dict_file); + + rc = NGX_CONF_ERROR; + goto close; + } + + size = ngx_file_size(&info); + buf = ngx_palloc(cf->pool, size); + if (buf == NULL) { + rc = NGX_CONF_ERROR; + goto close; + } + + n = ngx_read_fd(fd, (void *) buf, size); + if (n < 0) { + ngx_conf_log_error(NGX_LOG_EMERG, cf, ngx_errno, + ngx_read_fd_n " %V\" failed", + &zmcf->dict_file); + + rc = NGX_CONF_ERROR; + goto close; + + } else if ((size_t) n != size) { + ngx_conf_log_error(NGX_LOG_EMERG, cf, ngx_errno, + ngx_read_fd_n "\"%V incomplete\"", + &zmcf->dict_file); + + rc = NGX_CONF_ERROR; + goto close; + } + + conf->dict = ZSTD_createCDict_byReference(buf, size, conf->level); + if (conf->dict == NULL) { + ngx_conf_log_error(NGX_LOG_EMERG, cf, 0, + "ZSTD_createCDict_byReference() failed"); + rc = NGX_CONF_ERROR; + goto close; + } + } + } + +close: + + if (fd != NGX_INVALID_FILE && ngx_close_file(fd) == NGX_FILE_ERROR) { + ngx_conf_log_error(NGX_LOG_EMERG, cf, ngx_errno, + ngx_close_file_n " \"%V\" failed", + &zmcf->dict_file); + + rc = NGX_CONF_ERROR; + } + + return rc; +} + + +static ngx_int_t +ngx_http_zstd_filter_init(ngx_conf_t *cf) +{ + ngx_http_next_header_filter = ngx_http_top_header_filter; + ngx_http_top_header_filter = ngx_http_zstd_header_filter; + + ngx_http_next_body_filter = ngx_http_top_body_filter; + ngx_http_top_body_filter = ngx_http_zstd_body_filter; + + return NGX_OK; +} + + +static void * +ngx_http_zstd_filter_alloc(void *opaque, size_t size) +{ + ngx_http_zstd_ctx_t *ctx = opaque; + + void *p; + + p = ngx_palloc(ctx->request->pool, size); + + ngx_log_debug2(NGX_LOG_DEBUG_HTTP, ctx->request->connection->log, 0, + "zstd alloc: %p, size: %uz", p, size); + + return p; +} + + +static ngx_int_t +ngx_http_zstd_add_variables(ngx_conf_t *cf) +{ + ngx_http_variable_t *v; + + v = ngx_http_add_variable(cf, &ngx_http_zstd_ratio, + NGX_HTTP_VAR_NOCACHEABLE); + if (v == NULL) { + return NGX_ERROR; + } + + v->get_handler = ngx_http_zstd_ratio_variable; + + return NGX_OK; +} + + +static ngx_int_t +ngx_http_zstd_ratio_variable(ngx_http_request_t *r, + ngx_http_variable_value_t *vv, uintptr_t data) +{ + ngx_uint_t ratio_int, ratio_frac; + ngx_http_zstd_ctx_t *ctx; + + ctx = ngx_http_get_module_ctx(r, ngx_http_zstd_filter_module); + if (ctx == NULL || !ctx->done || ctx->bytes_out == 0) { + vv->not_found = 1; + return NGX_OK; + } + + vv->data = ngx_pnalloc(r->pool, NGX_INT32_LEN + 3); + if (vv->data == NULL) { + return NGX_ERROR; + } + + ratio_int = (ngx_uint_t) ctx->bytes_in / ctx->bytes_out; + ratio_frac = (ngx_uint_t) (ctx->bytes_in * 1000 / ctx->bytes_out % 1000); + + vv->len = ngx_sprintf(vv->data, "%ui.%03ui", ratio_int, ratio_frac) + - vv->data; + + vv->valid = 1; + vv->no_cacheable = 1; + + return NGX_OK; +} + + +static void +ngx_http_zstd_filter_free(void *opaque, void *address) +{ +#if (NGX_DEBUG) + + ngx_http_zstd_ctx_t *ctx = opaque; + + ngx_log_debug1(NGX_LOG_DEBUG_HTTP, ctx->request->connection->log, 0, + "zstd free: %p", address); + +#endif +} + + +static char * +ngx_http_zstd_comp_level(ngx_conf_t *cf, void *post, void *data) +{ + ngx_int_t *np = data; + + if (*np == 0 || *np < (ngx_int_t)ZSTD_minCLevel() || *np > ZSTD_maxCLevel()) { + ngx_conf_log_error(NGX_LOG_EMERG, cf, 0, + "zstd compress level must between %i and %i excluding 0", + (ngx_int_t)ZSTD_minCLevel(), ZSTD_maxCLevel()); + + return NGX_CONF_ERROR; + } + + return NGX_CONF_OK; +} + +static char * +ngx_conf_zstd_set_num_slot_with_negatives(ngx_conf_t *cf, ngx_command_t *cmd, void *conf) +{ + char *p = conf; + + ngx_int_t *np; + ngx_str_t *value; + ngx_conf_post_t *post; + + + np = (ngx_int_t *) (p + cmd->offset); + + if (*np != NGX_CONF_UNSET) { + return "is duplicate"; + } + + value = cf->args->elts; + + if (*(value[1].data) == '-') { + // Parse ignoring the leading '-' character + *np = ngx_atoi(value[1].data + 1, value[1].len - 1); + + // NGX_ERROR is -1 so we need to check for that before making the parsed + // result negative + if (*np == NGX_ERROR) { + return "invalid number"; + } + + *np = -*np; + } else { + *np = ngx_atoi(value[1].data, value[1].len); + + if (*np == NGX_ERROR) { + return "invalid number"; + } + } + + if (cmd->post) { + post = cmd->post; + return post->post_handler(cf, post, np); + } + + return NGX_CONF_OK; +} diff --git a/static/config b/static/config new file mode 100644 index 000000000..ed6e66ee3 --- /dev/null +++ b/static/config @@ -0,0 +1,111 @@ +ngx_feature_incs="#include " +ngx_feature_test="(void) ZSTD_createCCtx();" +ngx_feature_libs= +ngx_feature_run=yes + +ngx_zstd_opt_I= +ngx_zstd_opt_L= + +if [ -n "$ZSTD_INC" -o -n "$ZSTD_LIB" ]; then + ngx_feature="ZStandard static library in $ZSTD_INC and $ZSTD_LIB" + ngx_feature_path=$ZSTD_INC + + # we try the static shared library firstly + ngx_zstd_opt_I="-I$ZSTD_INC -DZSTD_STATIC_LINKING_ONLY" + ngx_zstd_opt_L="$ZSTD_LIB/libzstd.a" + SAVED_CC_TAST_FLAGS=$CC_TEST_FLAGS + CC_TEST_FLAGS="$ngx_zstd_opt_I $CC_TEST_FLAGS" + SAVED_NGX_TEST_LD_OPT=$NGX_TEST_LD_OPT + NGX_TEST_LD_OPT="$ngx_zstd_opt_L $NGX_TEST_LD_OPT" + + . auto/feature + + # restore + CC_TEST_FLAGS=$SAVED_CC_TAST_FLAGS + NGX_TEST_LD_OPT=$SAVED_NGX_TEST_LD_OPT + + if [ $ngx_found = no ]; then + # then try the dynamic shared library + ngx_feature="ZStandard dynamic library in $ZSTD_INC and $ZSTD_LIB" + ngx_zstd_opt_L="-L$ZSTD_LIB -lzstd -Wl,-rpath, $ZSTD_LIB" + + SAVED_CC_TAST_FLAGS=$CC_TEST_FLAGS + CC_TEST_FLAGS="$ngx_zstd_opt_I $CC_TEST_FLAGS" + SAVED_NGX_TEST_LD_OPT=$NGX_TEST_LD_OPT + NGX_TEST_LD_OPT="$ngx_zstd_opt_L $NGX_TEST_LD_OPT" + + . auto/feature + + # restore + CC_TEST_FLAGS=$SAVED_CC_TAST_FLAGS + NGX_TEST_LD_OPT=$SAVED_NGX_TEST_LD_OPT + + if [ $ngx_found = no ]; then + cat << END + $0: error: ngx_http_zstd_filter_module requires the ZStandard library, please be sure that "\$ZSTD_INC" and "\$ZSTD_LIB" are set correctly. +END + exit 1 + fi + + fi +else + # auto-discovery + ngx_feature="ZStandard static library" + ngx_zstd_opt_I="-DZSTD_STATIC_LINKING_ONLY" + ngx_zstd_opt_L="-l:libzstd.a" + + # still we consider the static library firstly + SAVED_CC_TAST_FLAGS=$CC_TEST_FLAGS + CC_TEST_FLAGS="$ngx_zstd_opt_I $CC_TEST_FLAGS" + SAVED_NGX_TEST_LD_OPT=$NGX_TEST_LD_OPT + NGX_TEST_LD_OPT="$ngx_zstd_opt_L $NGX_TEST_LD_OPT" + + . auto/feature + + # restore + CC_TEST_FLAGS=$SAVED_CC_TAST_FLAGS + NGX_TEST_LD_OPT=$SAVED_NGX_TEST_LD_OPT + + if [ $ngx_found = no ]; then + + ngx_feature="ZStandard dynamic library" + ngx_zstd_opt_L="-lzstd" + SAVED_CC_TAST_FLAGS=$CC_TEST_FLAGS + CC_TEST_FLAGS="$ngx_zstd_opt_I $CC_TEST_FLAGS" + SAVED_NGX_TEST_LD_OPT=$NGX_TEST_LD_OPT + NGX_TEST_LD_OPT="$ngx_zstd_opt_L $NGX_TEST_LD_OPT" + + . auto/feature + + if [ $ngx_found = no ]; then + cat << END + $0: error: ngx_http_zstd_filter_module requires the ZStandard library. +END + exit 1 + fi + + # restore + CC_TEST_FLAGS=$SAVED_CC_TAST_FLAGS + NGX_TEST_LD_OPT=$SAVED_NGX_TEST_LD_OPT + + cat << END + $0: warning: ngx_http_zstd_filter_module uses advanced ZStandard APIs (which are still considered experimental) while you are trying to link the dynamic shared library. +END + fi + + # TODO we need more tries for the different OS port. +fi + +CFLAGS="$ngx_zstd_opt_I $CFLAGS" +NGX_LD_OPT="$ngx_zstd_opt_L $NGX_LD_OPT" + +# build the ngx_http_zstd_static_module +HTTP_ZSTD_SRCS="$ngx_addon_dir/static/ngx_http_zstd_static_module.c" + +ngx_addon_name=ngx_http_zstd_static_module +ngx_module_type=HTTP +ngx_module_name=ngx_http_zstd_static_module +ngx_module_incs="$ngx_zstd_opt_I" +ngx_module_srcs=$HTTP_ZSTD_SRCS + +. auto/module diff --git a/static/ngx_http_zstd_static_module.c b/static/ngx_http_zstd_static_module.c new file mode 100644 index 000000000..3b247e95d --- /dev/null +++ b/static/ngx_http_zstd_static_module.c @@ -0,0 +1,383 @@ + +/* + * Copyright (C) Alex Zhang + */ + + +#include +#include +#include + + +#define NGX_HTTP_ZSTD_STATIC_OFF 0 +#define NGX_HTTP_ZSTD_STATIC_ON 1 +#define NGX_HTTP_ZSTD_STATIC_ALWAYS 2 + + +typedef struct { + ngx_uint_t enable; +} ngx_http_zstd_static_conf_t; + + +static ngx_conf_enum_t ngx_http_zstd_static[] = { + { ngx_string("off"), NGX_HTTP_ZSTD_STATIC_OFF }, + { ngx_string("on"), NGX_HTTP_ZSTD_STATIC_ON }, + { ngx_string("always"), NGX_HTTP_ZSTD_STATIC_ALWAYS }, +}; + + +static ngx_command_t ngx_http_zstd_static_commands[] = { + + { ngx_string("zstd_static"), + NGX_HTTP_MAIN_CONF|NGX_HTTP_SRV_CONF|NGX_HTTP_LOC_CONF|NGX_CONF_TAKE1, + ngx_conf_set_enum_slot, + NGX_HTTP_LOC_CONF_OFFSET, + offsetof(ngx_http_zstd_static_conf_t, enable), + &ngx_http_zstd_static }, + + ngx_null_command +}; + + +static ngx_int_t ngx_http_zstd_static_handler(ngx_http_request_t *r); +static ngx_int_t ngx_http_zstd_accept_encoding(ngx_str_t *ae); +static ngx_int_t ngx_http_zstd_ok(ngx_http_request_t *r); +static void * ngx_http_zstd_static_create_loc_conf(ngx_conf_t *cf); +static char * ngx_http_zstd_static_merge_loc_conf(ngx_conf_t *cf, void *parent, + void *child); +static ngx_int_t ngx_http_zstd_static_init(ngx_conf_t *cf); + + +static ngx_http_module_t ngx_http_zstd_static_module_ctx = { + NULL, /* preconfiguration */ + ngx_http_zstd_static_init, /* postconfiguration */ + + NULL, /* create main configuration */ + NULL, /* init main configuration */ + + NULL, /* create server configuration */ + NULL, /* merge server configuration */ + + ngx_http_zstd_static_create_loc_conf, /* create location configuration */ + ngx_http_zstd_static_merge_loc_conf, /* merge location configuration */ +}; + + +ngx_module_t ngx_http_zstd_static_module = { + NGX_MODULE_V1, + &ngx_http_zstd_static_module_ctx, /* module context */ + ngx_http_zstd_static_commands, /* module directives */ + NGX_HTTP_MODULE, /* module type */ + NULL, /* init master */ + NULL, /* init module */ + NULL, /* init process */ + NULL, /* init thread */ + NULL, /* exit thread */ + NULL, /* exit process */ + NULL, /* exit master */ + NGX_MODULE_V1_PADDING +}; + + +static ngx_int_t +ngx_http_zstd_static_handler(ngx_http_request_t *r) +{ + u_char *p; + ngx_int_t rc; + ngx_uint_t level; + size_t root; + ngx_str_t path; + ngx_buf_t *b; + ngx_log_t *log; + ngx_table_elt_t *h; + ngx_chain_t out; + ngx_open_file_info_t of; + ngx_http_core_loc_conf_t *clcf; + ngx_http_zstd_static_conf_t *zscf; + + if (!(r->method & (NGX_HTTP_GET|NGX_HTTP_HEAD))) { + return NGX_DECLINED; + } + + if (r->uri.data[r->uri.len - 1] == '/') { + return NGX_DECLINED; + } + + zscf = ngx_http_get_module_loc_conf(r, ngx_http_zstd_static_module); + + if (zscf->enable == NGX_HTTP_ZSTD_STATIC_OFF) { + return NGX_DECLINED; + } + + if (zscf->enable == NGX_HTTP_ZSTD_STATIC_ON) { + rc = ngx_http_zstd_ok(r); + + } else { + rc = NGX_OK; + } + + clcf = ngx_http_get_module_loc_conf(r, ngx_http_core_module); + + if (!clcf->gzip_vary && rc != NGX_OK) { + return NGX_DECLINED; + } + + log = r->connection->log; + + p = ngx_http_map_uri_to_path(r, &path, &root, sizeof(".zst") - 1); + if (p == NULL) { + return NGX_HTTP_INTERNAL_SERVER_ERROR; + } + + *p++ = '.'; + *p++ = 'z'; + *p++ = 's'; + *p++ = 't'; + *p = '\0'; + + path.len = p - path.data; + + ngx_log_debug1(NGX_LOG_DEBUG_HTTP, log, 0, + "http filename: \"%s\"", path.data); + + ngx_memzero(&of, sizeof(ngx_open_file_info_t)); + + of.read_ahead = clcf->read_ahead; + of.directio = clcf->directio; + of.valid = clcf->open_file_cache_valid; + of.min_uses = clcf->open_file_cache_min_uses; + of.errors = clcf->open_file_cache_errors; + of.events = clcf->open_file_cache_events; + + if (ngx_http_set_disable_symlinks(r, clcf, &path, &of) != NGX_OK) { + return NGX_HTTP_INTERNAL_SERVER_ERROR; + } + + if (ngx_open_cached_file(clcf->open_file_cache, &path, &of, r->pool) + != NGX_OK) + { + switch (of.err) { + + case 0: + return NGX_HTTP_INTERNAL_SERVER_ERROR; + + case NGX_ENOENT: + case NGX_ENOTDIR: + case NGX_ENAMETOOLONG: + + return NGX_DECLINED; + + case NGX_EACCES: +#if (NGX_HAVE_OPENAT) + case NGX_EMLINK: + case NGX_ELOOP: +#endif + + level = NGX_LOG_ERR; + break; + + default: + + level = NGX_LOG_CRIT; + break; + } + + ngx_log_error(level, log, of.err, + "%s \"%s\" failed", of.failed, path.data); + + return NGX_DECLINED; + } + + if (zscf->enable == NGX_HTTP_ZSTD_STATIC_ON) { + r->gzip_vary = 1; + + if (rc != NGX_OK) { + return NGX_DECLINED; + } + } + + ngx_log_debug1(NGX_LOG_DEBUG_HTTP, log, 0, "http static fd: %d", of.fd); + + if (of.is_dir) { + ngx_log_debug0(NGX_LOG_DEBUG_HTTP, log, 0, "http dir"); + return NGX_DECLINED; + } + +#if !(NGX_WIN32) /* the not regular files are probably Unix specific */ + + if (!of.is_file) { + ngx_log_error(NGX_LOG_CRIT, log, 0, + "\"%s\" is not a regular file", path.data); + + return NGX_HTTP_NOT_FOUND; + } + +#endif + + r->root_tested = !r->error_page; + + rc = ngx_http_discard_request_body(r); + if (rc != NGX_OK) { + return rc; + } + + log->action = "sending response to client"; + + r->headers_out.status = NGX_HTTP_OK; + r->headers_out.content_length_n = of.size; + r->headers_out.last_modified_time = of.mtime; + + if (ngx_http_set_etag(r) != NGX_OK) { + return NGX_HTTP_INTERNAL_SERVER_ERROR; + } + + if (ngx_http_set_content_type(r) != NGX_OK) { + return NGX_HTTP_INTERNAL_SERVER_ERROR; + } + + h = ngx_list_push(&r->headers_out.headers); + if (h == NULL) { + return NGX_HTTP_INTERNAL_SERVER_ERROR; + } + + h->hash = 1; + ngx_str_set(&h->key, "Content-Encoding"); + ngx_str_set(&h->value, "zstd"); + r->headers_out.content_encoding = h; + + b = ngx_calloc_buf(r->pool); + if (b == NULL) { + return NGX_HTTP_INTERNAL_SERVER_ERROR; + } + + b->file = ngx_pcalloc(r->pool, sizeof(ngx_file_t)); + if (b->file == NULL) { + return NGX_HTTP_INTERNAL_SERVER_ERROR; + } + + rc = ngx_http_send_header(r); + + if (rc == NGX_ERROR || rc > NGX_OK || r->header_only) { + return rc; + } + + b->file_pos = 0; + b->file_last = of.size; + + b->in_file = b->file_last ? 1 : 0; + b->last_buf = (r == r->main) ? 1 : 0; + b->last_in_chain = 1; + + b->file->fd = of.fd; + b->file->name = path; + b->file->log = log; + b->file->directio = of.is_directio; + + out.buf = b; + out.next = NULL; + + return ngx_http_output_filter(r, &out); +} + + +static ngx_int_t +ngx_http_zstd_ok(ngx_http_request_t *r) +{ + ngx_table_elt_t *ae; + + if (r != r->main) { + return NGX_DECLINED; + } + + ae = r->headers_in.accept_encoding; + if (ae == NULL) { + return NGX_DECLINED; + } + + if (ae->value.len < sizeof("zstd") - 1) { + return NGX_DECLINED; + } + + if (ngx_memcmp(ae->value.data, "zstd", 4) != 0 + && ngx_http_zstd_accept_encoding(&ae->value) != NGX_OK) + { + return NGX_DECLINED; + } + + + r->gzip_tested = 1; + r->gzip_ok = 0; + + return NGX_OK; +} + + +static ngx_int_t +ngx_http_zstd_accept_encoding(ngx_str_t *ae) +{ + u_char *p; + + p = ngx_strcasestrn(ae->data, "zstd", sizeof("zstd") - 1); + if (p == NULL) { + return NGX_DECLINED; + } + + if (p == ae->data || (*(p - 1) == ',' || *(p - 1) == ' ')) { + + p += sizeof("zstd") - 1; + + if (p == ae->data + ae->len || *p == ',' || *p == ' ' || *p == ';') { + return NGX_OK; + } + } + + return NGX_DECLINED; +} + + +static void * +ngx_http_zstd_static_create_loc_conf(ngx_conf_t *cf) +{ + ngx_http_zstd_static_conf_t *conf; + + conf = ngx_palloc(cf->pool, sizeof(ngx_http_zstd_static_conf_t)); + if (conf == NULL) { + return NULL; + } + + conf->enable = NGX_CONF_UNSET_UINT; + + return conf; +} + + +static char * +ngx_http_zstd_static_merge_loc_conf(ngx_conf_t *cf, void *parent, void *child) +{ + ngx_http_zstd_static_conf_t *prev = parent; + ngx_http_zstd_static_conf_t *conf = child; + + ngx_conf_merge_uint_value(conf->enable, prev->enable, + NGX_HTTP_ZSTD_STATIC_OFF); + + return NGX_CONF_OK; +} + + +static ngx_int_t +ngx_http_zstd_static_init(ngx_conf_t *cf) +{ + ngx_http_handler_pt *h; + ngx_http_core_main_conf_t *cmcf; + + cmcf = ngx_http_conf_get_module_main_conf(cf, ngx_http_core_module); + + h = ngx_array_push(&cmcf->phases[NGX_HTTP_CONTENT_PHASE].handlers); + if (h == NULL) { + return NGX_ERROR; + } + + *h = ngx_http_zstd_static_handler; + + return NGX_OK; +} diff --git a/t/00-filter.t b/t/00-filter.t new file mode 100644 index 000000000..1e4809807 --- /dev/null +++ b/t/00-filter.t @@ -0,0 +1,8 @@ +use Test::Nginx::Socket::Lua; + +no_long_string(); +run_tests(); + +__DATA__ + + diff --git a/t/01-static.t b/t/01-static.t new file mode 100644 index 000000000..74fa13076 --- /dev/null +++ b/t/01-static.t @@ -0,0 +1,198 @@ +use Test::Nginx::Socket; +use lib 'lib'; + +no_long_string(); +log_level 'debug'; +repeat_each(3); +plan tests => repeat_each() * ((blocks() - 3) * 5 + 3); +run_tests(); + + +__DATA__ + + +=== TEST 1: zstd_static off +--- config + location /test { + zstd_static off; + root ../../t/suite; + } +--- request +GET /test +--- response_headers +Content-Length: 59738 +ETag: "5be17d33-e95a" +!Content-Encoding +--- no_error_log +[error] + + + +=== TEST 2: zstd_static off (with accept-encoding header) +--- config + location /test { + zstd_static off; + root ../../t/suite; + } +--- request +GET /test +Accept-Encoding: gzip,zstd +--- response_headers +Content-Length: 59738 +ETag: "5be17d33-e95a" +!Content-Encoding +--- no_error_log +[error] + + + +=== TEST 3: zstd_static on +--- config + location /test { + zstd_static on; + root ../../t/suite; + } +--- request +GET /test +--- more_headers +Accept-Encoding: gzip, zstd +--- response_headers +Content-Length: 20706 +ETag: "5be17d33-50e2" +!Content-Encoding +Content-Encoding: zstd +--- no_error_log +[error] + + + +=== TEST 4: zstd_static on (without accept-encoding header) +--- config + location /test { + zstd_static on; + root ../../t/suite; + } +--- request +GET /test +--- response_headers +Content-Length: 59738 +ETag: "5be17d33-e95a" +Content-Encoding: zstd +!Content-Encoding +--- no_error_log +[error] + + + +=== TEST 5: zstd_static on (without zstd component in accept-encoding header) +--- config + location /test { + zstd_static on; + root ../../t/suite; + } +--- request +GET /test +--- more_headers +Accept-Encoding: gzip, br +--- response_headers +Content-Length: 59738 +ETag: "5be17d33-e95a" +!Content-Encoding +--- no_error_log +[error] + + + +=== TEST 6: zstd_static always +--- config + location /test { + zstd_static always; + root ../../t/suite; + } +--- request +GET /test +--- more_headers +Accept-Encoding: gzip, br +--- response_headers +Content-Length: 20706 +ETag: "5be17d33-50e2" +Content-Encoding: zstd +--- no_error_log +[error] + + + +=== TEST 6: zstd_static always (without accept-encoding header) +--- config + location /test { + zstd_static always; + root ../../t/suite; + } +--- request +GET /test +--- response_headers +Content-Length: 20706 +ETag: "5be17d33-50e2" +Content-Encoding: zstd +--- no_error_log +[error] + + + +=== TEST 7: zstd_static always (without zstd component in accept-encoding header) +--- config + location /test { + zstd_static always; + root ../../t/suite; + } +--- request +GET /test +--- more_headers +Accept-Encoding: gzip, br +--- response_headers +Content-Length: 20706 +ETag: "5be17d33-50e2" +Content-Encoding: zstd +--- no_error_log +[error] + + +=== TEST 8: zstd_static always (file does not exist) +--- config + location /test2 { + zstd_static always; + root ../../t/suite; + } +--- request +GET /test2 +--- more_headers +Accept-Encoding: gzip, br +--- error_code: 404 + + + +=== TEST 9: zstd_static on (file does not exist) +--- config + location /test2 { + zstd_static on; + root ../../t/suite; + } +--- request +GET /test2 +--- more_headers +Accept-Encoding: gzip, br +--- error_code: 404 + + + +=== TEST 10: zstd_static off (file does not exist) +--- config + location /test2 { + zstd_static off; + root ../../t/suite; + } +--- request +GET /test2 +--- more_headers +Accept-Encoding: gzip, br +--- error_code: 404 diff --git a/t/suite/test b/t/suite/test new file mode 100644 index 000000000..53ce3fa9b --- /dev/null +++ b/t/suite/test @@ -0,0 +1,2040 @@ + + + + +Regular Expression Matching Can Be Simple And Fast + + + + +

+Regular Expression Matching Can Be Simple And Fast +
+(but is slow in Java, Perl, PHP, Python, Ruby, ...) +

+

+Russ Cox +
+rsc@swtch.com +
+January 2007 +
+ +

+ + +

Introduction

+ +

+This is a tale of two approaches to regular expression matching. +One of them is in widespread use in the +standard interpreters for many languages, including Perl. +The other is used only in a few places, notably most implementations +of awk and grep. +The two approaches have wildly different +performance characteristics: +

+ +
+
+ +
Perl graphThompson NFA graph +
+
+Time to match a?nan against an +
+
+
+ +

+Let's use superscripts to denote string repetition, +so that +a?3a3 +is shorthand for +a?a?a?aaa. +The two graphs plot the time required by each approach +to match the regular expression +a?nan +against the string an. +

+ +

+Notice that Perl requires over sixty seconds to match +a 29-character string. +The other approach, labeled Thompson NFA for +reasons that will be explained later, +requires twenty microseconds to match the string. +That's not a typo. The Perl graph plots time in seconds, +while the Thompson NFA graph plots time in microseconds: +the Thompson NFA implementation +is a million times faster than Perl +when running on a miniscule 29-character string. +The trends shown in the graph continue: the +Thompson NFA handles a 100-character string in under 200 microseconds, +while Perl would require over 1015 years. +(Perl is only the most conspicuous example of a large +number of popular programs that use the same algorithm; +the above graph could have been Python, or PHP, or Ruby, +or many other languages. A more detailed +graph later in this article presents data for other implementations.) +

+ +

+It may be hard to believe the graphs: perhaps you've used Perl, +and it never seemed like regular expression matching was +particularly slow. +Most of the time, in fact, regular expression matching in Perl +is fast enough. +As the graph shows, though, it is possible +to write so-called “pathological” regular expressions that +Perl matches very very slowly. +In contrast, there are no regular expressions that are +pathological for the Thompson NFA implementation. +Seeing the two graphs side by side prompts the question, +“why doesn't Perl use the Thompson NFA approach?” +It can, it should, and that's what the rest of this article is about. +

+ +

+Historically, regular expressions are one of computer science's +shining examples of how using good theory leads to good programs. +They were originally developed by theorists as a +simple computational model, +but Ken Thompson introduced them to +programmers in his implementation of the text editor QED +for CTSS. +Dennis Ritchie followed suit in his own implementation +of QED, for GE-TSS. +Thompson and Ritchie would go on to create Unix, +and they brought regular expressions with them. +By the late 1970s, regular expressions were a key +feature of the Unix landscape, in tools such as +ed, sed, grep, egrep, awk, and lex. +

+ +

+Today, regular expressions have also become a shining +example of how ignoring good theory leads to bad programs. +The regular expression implementations used by +today's popular tools are significantly slower +than the ones used in many of those thirty-year-old Unix tools. +

+ +

+This article reviews the good theory: +regular expressions, finite automata, +and a regular expression search algorithm +invented by Ken Thompson in the mid-1960s. +It also puts the theory into practice, describing +a simple implementation of Thompson's algorithm. +That implementation, less than 400 lines of C, +is the one that went head to head with Perl above. +It outperforms the more complex real-world +implementations used by +Perl, Python, PCRE, and others. +The article concludes with a discussion of how +theory might yet be converted into practice +in the real-world implementations. +

+ +

+Regular Expressions +

+ + +

+Regular expressions are a notation for +describing sets of character strings. +When a particular string is in the set +described by a regular expression, +we often say that the regular expression +matches +the string. +

+ +

+The simplest regular expression is a single literal character. +Except for the special metacharacters +*+?()|, +characters match themselves. +To match a metacharacter, escape it with +a backslash: +\+ +matches a literal plus character. +

+ +

+Two regular expressions can be alternated or concatenated to form a new +regular expression: +if e1 matches +s +and e2 matches +t, +then e1|e2 matches +s +or +t, +and +e1e2 +matches +st. +

+ +

+The metacharacters +*, ++, +and +? +are repetition operators: +e1* +matches a sequence of zero or more (possibly different) +strings, each of which match e1; +e1+ +matches one or more; +e1? +matches zero or one. +

+ +

+The operator precedence, from weakest to strongest binding, is +first alternation, then concatenation, and finally the +repetition operators. +Explicit parentheses can be used to force different meanings, +just as in arithmetic expressions. +Some examples: +ab|cd +is equivalent to +(ab)|(cd); +ab* +is equivalent to +a(b*). +

+ +

+The syntax described so far is a subset of the traditional Unix +egrep +regular expression syntax. +This subset suffices to describe all regular +languages: loosely speaking, a regular language is a set +of strings that can be matched in a single pass through +the text using only a fixed amount of memory. +Newer regular expression facilities (notably Perl and +those that have copied it) have added +many new operators +and escape sequences. These additions make the regular +expressions more concise, and sometimes more cryptic, but usually +not more powerful: +these fancy new regular expressions almost always have longer +equivalents using the traditional syntax. +

+ +

+One common regular expression extension that +does provide additional power is called +backreferences. +A backreference like +\1 +or +\2 +matches the string matched +by a previous parenthesized expression, and only that string: +(cat|dog)\1 +matches +catcat +and +dogdog +but not +catdog +nor +dogcat. +As far as the theoretical term is concerned, +regular expressions with backreferences +are not regular expressions. +The power that backreferences add comes at great cost: +in the worst case, the best known implementations require +exponential search algorithms, +like the one Perl uses. +Perl (and the other languages) +could not now remove backreference support, +of course, but they could employ much faster algorithms +when presented with regular expressions that don't have +backreferences, like the ones considered above. +This article is about those faster algorithms. +

+ +

+Finite Automata +

+ + + +

+Another way to describe sets of character strings is with +finite automata. +Finite automata are also known as state machines, +and we will use “automaton” and “machine” interchangeably. +

+ +

+As a simple example, here is a machine recognizing +the set of strings matched by the regular expression +a(bb)+a: +

+ +

DFA for a(bb)+a

+ +

+A finite automaton is always in one of its states, +represented in the diagram by circles. +(The numbers inside the circles are labels to make this +discussion easier; they are not part of the machine's operation.) +As it reads the string, it switches from state to state. +This machine has two special states: the start state s0 +and the matching state s4. +Start states are depicted with lone arrowheads pointing at them, +and matching states are drawn as a double circle. +

+ +

+The machine reads an input string one character at a time, +following arrows corresponding to the input to move from +state to state. +Suppose the input string is +abbbba. +When the machine reads the first letter of the string, the +a, +it is in the start state s0. It follows the +a +arrow to state s1. +This process repeats as the machine reads the rest of the string: +b +to +s2, +b +to +s3, +b +to +s2, +b +to +s3, +and finally +a +to +s4. +

+

DFA execution on abbbba

+

+The machine ends in s4, a matching state, so it +matches the string. +If the machine ends in a non-matching state, it does not +match the string. +If, at any point during the machine's execution, there is no +arrow for it to follow corresponding to the current +input character, the machine stops executing early. +

+ +

+The machine we have been considering is called a +deterministic +finite automaton (DFA), +because in any state, each possible input letter +leads to at most one new state. +We can also create machines +that must choose between multiple possible next states. +For example, this machine is equivalent to the previous +one but is not deterministic: +

+

NFA for a(bb)+a

+

+The machine is not deterministic because if it reads a +b +in state s2, it has multiple choices for the next state: +it can go back to s1 in hopes of seeing another +bb, +or it can go on to s3 in hopes of seeing the final +a. +Since the machine cannot peek ahead to see the rest of +the string, it has no way to know which is the correct decision. +In this situation, it turns out to be interesting to +let the machine +always guess correctly. +Such machines are called non-deterministic finite automata +(NFAs or NDFAs). +An NFA matches an input string if there is some way +it can read the string and follow arrows to a matching state. +

+ +

+Sometimes it is convenient to let NFAs have arrows with no +corresponding input character. We will leave these arrows unlabeled. +An NFA can, at any time, choose to follow an unlabeled arrow +without reading any input. +This NFA is equivalent to the previous two, but the unlabeled arrow +makes the correspondence with +a(bb)+a +clearest: +

+

Another NFA for a(bb)+a

+ +

+Converting Regular Expressions to NFAs +

+ +

+Regular expressions and NFAs turn out to be exactly +equivalent in power: every regular expression has an +equivalent NFA (they match the same strings) and vice versa. +(It turns out that DFAs are also equivalent in power +to NFAs and regular expressions; we will see this later.) +There are multiple ways to translate regular expressions into NFAs. +The method described here was first described by Thompson +in his 1968 CACM paper. +

+ +

+The NFA for a regular expression is built up from partial NFAs +for each subexpression, with a different construction for +each operator. The partial NFAs have +no matching states: instead they have one or more dangling arrows, +pointing to nothing. The construction process will finish by +connecting these arrows to a matching state. +

+ +

+The NFAs for matching single characters look like: +

+

Single-character NFA

+

+The NFA for the concatenation e1e2 +connects the final arrow of the e1 +machine to the start of the e2 machine: +

+

Concatenation NFA

+

+The NFA for the alternation e1|e2 +adds a new start state with a choice of either the +e1 machine or the e2 machine. +

+

Alternation NFA

+

+The NFA for e? alternates the e machine with an empty path: +

+

Zero or one NFA

+

+The NFA for e* uses the same alternation but loops a +matching e machine back to the start: +

+

Zero or more NFA

+

+The NFA for e+ also creates a loop, but one that +requires passing through e at least once: +

+

One or more NFA

+ +

+Counting the new states in the diagrams above, we can see +that this technique creates exactly one state per character +or metacharacter in the regular expression, +excluding parentheses. +Therefore the number of states in the final NFA is at most +equal to the length of the original regular expression. +

+ +

+Just as with the example NFA discussed earlier, it is always possible +to remove the unlabeled arrows, and it is also always possible to generate +the NFA without the unlabeled arrows in the first place. +Having the unlabeled arrows makes the NFA easier for us to read +and understand, and they also make the C representation +simpler, so we will keep them. +

+ +

+Regular Expression Search Algorithms +

+ +

+Now we have a way to test whether a regular expression +matches a string: convert the regular expression to an NFA +and then run the NFA using the string as input. +Remember that NFAs are endowed with the ability to guess +perfectly when faced with a choice of next state: +to run the NFA using an ordinary computer, we must find +a way to simulate this guessing. +

+ +

+One way to simulate perfect guessing is to guess +one option, and if that doesn't work, try the other. +For example, consider the NFA for +abab|abbb +run on the string +abbb: +

+

NFA for abab|abbb

+

Backtracking execution on abbb

+

+At step 0, the NFA must make a choice: try to match +abab +or +try to match +abbb? +In the diagram, the NFA tries +abab, +but that fails after step 3. +The NFA then tries the other choice, leading to step 4 and eventually a match. +This backtracking approach +has a simple recursive implementation +but can read the input string many times +before succeeding. +If the string does not match, +the machine must try +all +possible execution paths before +giving up. +The NFA tried only two different paths in the example, +but in the worst case, there can be exponentially +many possible execution paths, leading to very slow run times. +

+ +

+A more efficient but more complicated way to simulate perfect +guessing is to guess both options simultaneously. +In this approach, the simulation allows the machine +to be in multiple states at once. To process each letter, +it advances all the states along all the arrows that +match the letter. +

+

Parallel execution on abbb

+

+The machine starts in the start state and all the states +reachable from the start state by unlabeled arrows. +In steps 1 and 2, the NFA is in two states simultaneously. +Only at step 3 does the state set narrow down to a single state. +This multi-state approach tries both paths at the same time, +reading the input only once. +In the worst case, the NFA might be in +every +state at each step, but this results in at worst a constant amount +of work independent of the length of the string, +so arbitrarily +large input strings can be processed in linear time. +This is a dramatic improvement over the exponential time +required by the backtracking approach. +The efficiency comes from tracking the set of reachable +states but +not +which paths were used to reach them. +In an NFA with +n +nodes, there can only be +n +reachable states at any step, but there might be +2n paths through the NFA. +

+ +

+Implementation +

+ +

+Thompson introduced the multiple-state simulation approach +in his 1968 paper. +In his formulation, the states of the NFA were represented +by small machine-code sequences, and the list of possible states +was just a sequence of function call instructions. +In essence, Thompson compiled the regular expression into clever +machine code. +Forty years later, computers are much faster and the +machine code approach is not as necessary. +The following sections +present an implementation written in portable ANSI C. +The full source code (under 400 lines) +and the benchmarking scripts are +available online. +(Readers who are unfamiliar or uncomfortable with C or pointers should +feel free to read the descriptions and skip over the actual code.) +

+ +

+Implementation: Compiling to NFA +

+ +

+The first step is to compile the regular expression +into an equivalent NFA. +In our C program, we will represent an NFA as a +linked collection of +State +structures: +

+
+struct State
+{
+	int c;
+	State *out;
+	State *out1;
+	int lastlist;
+};
+

+Each +State +represents one of the following three NFA fragments, +depending on the value of +c. +

+

Possible per-State NFA fragments

+

+(Lastlist +is used during execution and is explained in the next section.) +

+ +

+Following Thompson's paper, +the compiler builds an NFA from a regular expression in +postfix +notation with dot +(.) added +as an explicit concatenation operator. +A separate function +re2post +rewrites infix regular expressions like +“a(bb)+a” +into equivalent postfix expressions like +“abb.+.a.”. +(A “real” implementation would certainly +need to use dot as the “any character” metacharacter +rather than as a concatenation operator. +A real implementation would also probably build the +NFA during parsing rather than build an explicit postfix expression. +However, the postfix version is convenient and follows +Thompson's paper more closely.) +

+ +

+As the compiler scans the postfix expression, it maintains +a stack of computed NFA fragments. +Literals push new NFA fragments onto the stack, while +operators pop fragments off the stack and then +push a new fragment. +For example, +after compiling the +abb in abb.+.a., +the stack contains NFA fragments for +a, +b, +and +b. +The compilation of the +. +that follows pops the two +b +NFA fragment from the stack and pushes an NFA fragment for the +concatenation +bb.. +Each NFA fragment is defined by its start state and its +outgoing arrows: +

+struct Frag
+{
+	State *start;
+	Ptrlist *out;
+};
+

+Start +points at the start state for the fragment, +and +out +is a list of pointers to +State* +pointers that are not yet connected to anything. +These are the dangling arrows in the NFA fragment. +

+ +

+Some helper functions manipulate pointer lists: +

+Ptrlist *list1(State **outp);
+Ptrlist *append(Ptrlist *l1, Ptrlist *l2);
+
+void patch(Ptrlist *l, State *s);
+

+List1 +creates a new pointer list containing the single pointer +outp. +Append +concatenates two pointer lists, returning the result. +Patch +connects the dangling arrows in the pointer list +l +to the state +s: +it sets +*outp += +s +for each pointer +outp +in +l. +

+ +

+Given these primitives and a fragment stack, +the compiler is a simple loop over the postfix expression. +At the end, there is a single fragment left: +patching in a matching state completes the NFA. +

+State*
+post2nfa(char *postfix)
+{
+	char *p;
+	Frag stack[1000], *stackp, e1, e2, e;
+	State *s;
+
+	#define push(s) *stackp++ = s
+	#define pop()   *--stackp
+
+	stackp = stack;
+	for(p=postfix; *p; p++){
+		switch(*p){
+		/* compilation cases, described below */
+		}
+	}
+	
+	e = pop();
+	patch(e.out, matchstate);
+	return e.start;
+}
+

+The specific compilation cases mimic the translation +steps described earlier. +

+ + +

+Literal characters: +

+default:
+	s = state(*p, NULL, NULL);
+	push(frag(s, list1(&s->out));
+	break;
+
+
+ +

+Catenation: +

+case '.':
+	e2 = pop();
+	e1 = pop();
+	patch(e1.out, e2.start);
+	push(frag(e1.start, e2.out));
+	break;
+
+
+ +

+Alternation: +

+case '|':
+	e2 = pop();
+	e1 = pop();
+	s = state(Split, e1.start, e2.start);
+	push(frag(s, append(e1.out, e2.out)));
+	break;
+
+
+ +

+Zero or one: +

+case '?':
+	e = pop();
+	s = state(Split, e.start, NULL);
+	push(frag(s, append(e.out, list1(&s->out1))));
+	break;
+
+
+ +

+Zero or more: +

+case '*':
+	e = pop();
+	s = state(Split, e.start, NULL);
+	patch(e.out, s);
+	push(frag(s, list1(&s->out1)));
+	break;
+
+
+ +

+One or more: +

+case '+':
+	e = pop();
+	s = state(Split, e.start, NULL);
+	patch(e.out, s);
+	push(frag(e.start, list1(&s->out1)));
+	break;
+
+
+
+ +

+Implementation: Simulating the NFA +

+ +

+Now that the NFA has been built, we need to simulate it. +The simulation requires tracking +State +sets, which are stored as a simple array list: +

+struct List
+{
+	State **s;
+	int n;
+};
+

+The simulation uses two lists: +clist +is the current set of states that the NFA is in, +and +nlist +is the next set of states that the NFA will be in, +after processing the current character. +The execution loop initializes +clist +to contain just the start state and then +runs the machine one step at a time. +

+int
+match(State *start, char *s)
+{
+	List *clist, *nlist, *t;
+
+	/* l1 and l2 are preallocated globals */
+	clist = startlist(start, &l1);
+	nlist = &l2;
+	for(; *s; s++){
+		step(clist, *s, nlist);
+		t = clist; clist = nlist; nlist = t;	/* swap clist, nlist */
+	}
+	return ismatch(clist);
+}
+

+To avoid allocating on every iteration of the loop, +match +uses two preallocated lists +l1 +and +l2 +as +clist +and +nlist, +swapping the two after each step. +

+ +

+If the final state list contains the matching state, +then the string matches. +

+int
+ismatch(List *l)
+{
+	int i;
+
+	for(i=0; i<l->n; i++)
+		if(l->s[i] == matchstate)
+			return 1;
+	return 0;
+}
+

+

+ +

+Addstate +adds a state to the list, +but not if it is already on the list. +Scanning the entire list for each add would be inefficient; +instead the variable +listid +acts as a list generation number. +When +addstate +adds +s +to a list, +it records +listid +in +s->lastlist. +If the two are already equal, +then +s +is already on the list being built. +Addstate +also follows unlabeled arrows: +if +s +is a +Split +state with two unlabeled arrows to new states, +addstate +adds those states to the list instead of +s. +

+void
+addstate(List *l, State *s)
+{
+	if(s == NULL || s->lastlist == listid)
+		return;
+	s->lastlist = listid;
+	if(s->c == Split){
+		/* follow unlabeled arrows */
+		addstate(l, s->out);
+		addstate(l, s->out1);
+		return;
+	}
+	l->s[l->n++] = s;
+}
+

+

+ +

+Startlist +creates an initial state list by adding just the start state: +

+List*
+startlist(State *s, List *l)
+{
+	listid++;
+	l->n = 0;
+	addstate(l, s);
+	return l;
+}
+

+

+ +

+Finally, +step +advances the NFA past a single character, using +the current list +clist +to compute the next list +nlist. +

+void
+step(List *clist, int c, List *nlist)
+{
+	int i;
+	State *s;
+
+	listid++;
+	nlist->n = 0;
+	for(i=0; i<clist->n; i++){
+		s = clist->s[i];
+		if(s->c == c)
+			addstate(nlist, s->out);
+	}
+}
+
+ +

+Performance +

+ +

+The C implementation just described was not written with performance in mind. +Even so, a slow implementation of a linear-time algorithm +can easily outperform a fast implementation of an +exponential-time algorithm once the exponent is large enough. +Testing a variety of popular regular expression engines on +a so-called pathological regular expression demonstrates this nicely. +

+ +

+Consider the regular expression +a?nan. +It matches the string +an +when the +a? +are chosen not to match any letters, +leaving the entire string to be matched by the +an. +Backtracking regular expression implementations +implement the zero-or-one +? +by first trying one and then zero. +There are +n +such choices to make, a total of +2n possibilities. +Only the very last +possibility—choosing zero for all the ?—will lead to a match. +The backtracking approach thus requires +O(2n) time, so it will not scale much beyond n=25. +

+ +

+In contrast, Thompson's algorithm maintains state lists of length +approximately n and processes the string, also of length n, +for a total of O(n2) time. +(The run time is superlinear, +because we are not keeping the regular expression constant +as the input grows. +For a regular expression of length m run on text of length n, +the Thompson NFA requires O(mn) time.) +

+ +

+The following graph plots time required to check whether +a?nan +matches +an: +

+ +
+
+
+
+
+Performance graph +
+regular expression and text size n +
+a?nan +matching +an +
+
+
+
+
+ +

+Notice that the graph's y-axis has a logarithmic scale, +in order to be able to see a wide variety of times on a single graph. +

+ +

+From the graph it is clear that Perl, PCRE, Python, and Ruby are +all using recursive backtracking. +PCRE stops getting the right answer at +n=23, +because it aborts the recursive backtracking after a maximum number +of steps. +As of Perl 5.6, Perl's regular expression engine is +said to memoize +the recursive backtracking search, which should, at some memory cost, +keep the search from taking exponential amounts of time +unless backreferences are being used. +As the performance graph shows, the memoization is not complete: +Perl's run time grows exponentially even though there +are no backreferences +in the expression. +Although not benchmarked here, Java uses a backtracking +implementation too. +In fact, the +java.util.regex +interface requires a backtracking +implementation, because arbitrary Java code +can be substituted into the matching path. +PHP uses the PCRE library. +

+ +

+The thick blue line is the C implementation of Thompson's algorithm given above. +Awk, Tcl, GNU grep, and GNU awk +build DFAs, either precomputing them or using the on-the-fly +construction described in the next section. +

+ +

+Some might argue that this test is unfair to +the backtracking implementations, since it focuses on an +uncommon corner case. +This argument misses the point: +given a choice between an implementation +with a predictable, consistent, fast running time on all inputs +or one that usually runs quickly but can take +years of CPU time (or more) on some inputs, +the decision should be easy. +Also, while examples as dramatic as this one +rarely occur in practice, less dramatic ones do occur. +Examples include using +(.*) +(.*) +(.*) +(.*) +(.*) +to split five space-separated fields, or using +alternations where the common cases +are not listed first. +As a result, programmers often learn which constructs are +expensive and avoid them, or they turn to so-called +optimizers. +Using Thompson's NFA simulation does not require such adaptation: +there are no expensive regular expressions. +

+ +

+Caching the NFA to build a DFA +

+ +

+Recall that DFAs are more efficient to execute than NFAs, +because DFAs are only ever in one state at a time: they never +have a choice of multiple next states. +Any NFA can be converted into an equivalent DFA +in which each DFA state corresponds to a +list of NFA states. +

+ +

+For example, here is the NFA we used earlier for +abab|abbb, +with state numbers added: +

+

NFA for abab|abbb

+

+The equivalent DFA would be: +

+

DFA for abab|abbb

+

+Each state in the DFA corresponds to a list of +states from the NFA. +

+ +

+In a sense, Thompson's NFA simulation is +executing the equivalent DFA: each +List +corresponds to some DFA state, +and the +step +function is computing, given a list and a next character, +the next DFA state to enter. +Thompson's algorithm simulates the DFA by +reconstructing each DFA state as it is needed. +Rather than throw away this work after each step, +we could cache the +Lists +in spare memory, avoiding the cost of repeating the computation +in the future +and essentially computing the equivalent DFA as it is needed. +This section presents the implementation of such an approach. +Starting with the NFA implementation from the previous section, +we need to add less than 100 lines to build a DFA implementation. +

+ +

+To implement the cache, we first introduce a new data type +that represents a DFA state: +

+struct DState
+{
+	List l;
+	DState *next[256];
+	DState *left;
+	DState *right;
+};
+

+A +DState +is the cached copy of the list +l. +The array +next +contains pointers to the next state for each +possible input character: +if the current state is +d +and the next input character is +c, +then +d->next[c] +is the next state. +If +d->next[c] +is null, then the next state has not been computed yet. +Nextstate +computes, records, and returns the next state +for a given state and character. +

+ +

+The regular expression match follows +d->next[c] +repeatedly, calling +nextstate +to compute new states as needed. +

+int
+match(DState *start, char *s)
+{
+	int c;
+	DState *d, *next;
+	
+	d = start;
+	for(; *s; s++){
+		c = *s & 0xFF;
+		if((next = d->next[c]) == NULL)
+			next = nextstate(d, c);
+		d = next;
+	}
+	return ismatch(&d->l);
+}
+

+

+ +

+All the +DStates +that have been computed need to be saved in a +structure that lets us look up a +DState +by its +List. +To do this, we arrange them +in a binary tree +using the sorted +List +as the key. +The +dstate +function returns the +DState +for a given +List, +allocating one if necessary: +

+DState*
+dstate(List *l)
+{
+	int i;
+	DState **dp, *d;
+	static DState *alldstates;
+
+	qsort(l->s, l->n, sizeof l->s[0], ptrcmp);
+
+	/* look in tree for existing DState */
+	dp = &alldstates;
+	while((d = *dp) != NULL){
+		i = listcmp(l, &d->l);
+		if(i < 0)
+			dp = &d->left;
+		else if(i > 0)
+			dp = &d->right;
+		else
+			return d;
+	}
+	
+	/* allocate, initialize new DState */
+	d = malloc(sizeof *d + l->n*sizeof l->s[0]);
+	memset(d, 0, sizeof *d);
+	d->l.s = (State**)(d+1);
+	memmove(d->l.s, l->s, l->n*sizeof l->s[0]);
+	d->l.n = l->n;
+
+	/* insert in tree */
+	*dp = d;
+	return d;
+}
+

+Nextstate runs the NFA +step +and returns the corresponding +DState: +

+DState*
+nextstate(DState *d, int c)
+{
+	step(&d->l, c, &l1);
+	return d->next[c] = dstate(&l1);
+}
+

+Finally, the DFA's start state is the +DState +corresponding to the NFA's start list: +

+DState*
+startdstate(State *start)
+{
+	return dstate(startlist(start, &l1));
+}
+

+(As in the NFA simulation, +l1 +is a preallocated +List.) +

+ +

+The +DStates +correspond to DFA states, but the DFA is only built as needed: +if a DFA state has not been encountered during the search, +it does not yet exist in the cache. +An alternative would be to compute the entire DFA at once. +Doing so would make +match +a little faster by removing the conditional branch, +but at the cost of increased startup time and +memory use. +

+ +

+One might also worry about bounding the amount of +memory used by the on-the-fly DFA construction. +Since the +DStates +are only a cache of the +step +function, the implementation of +dstate +could choose to throw away the entire DFA so far +if the cache grew too large. +This cache replacement policy +only requires a few extra lines of code in +dstate +and in +nextstate, +plus around 50 lines of code for memory management. +An implementation is +available online. +(Awk +uses a similar limited-size cache strategy, +with a fixed limit of 32 cached states; this explains the discontinuity +in its performance at n=28 in the graph above.) +

+ +

+NFAs derived from regular expressions +tend to exhibit good locality: they visit the same states +and follow the same transition arrows over and over +when run on most texts. +This makes the caching worthwhile: the first time an arrow +is followed, the next state must be computed as in the NFA +simulation, but future traversals of the arrow are just +a single memory access. +Real DFA-based implementations can make use +of additional optimizations to run even faster. +A companion article (not yet written) will explore +DFA-based regular expression implementations in more detail. +

+ + +

+Real world regular expressions +

+ +

+Regular expression usage in real programs +is somewhat more complicated than what the regular expression +implementations described above can handle. +This section briefly describes the common complications; +full treatment of any of these is beyond the scope of this +introductory article. +

+ +

+Character classes. +A character class, whether +[0-9] +or +\w +or +. (dot), +is just a concise representation of an alternation. +Character classes can be expanded into alternations +during compilation, though it is more efficient to add +a new kind of NFA node to represent them explicitly. +POSIX +defines special character classes +like [[:upper:]] that change meaning +depending on the current locale, but the hard part of +accommodating these is determining their meaning, +not encoding that meaning into an NFA. +

+ +

+Escape sequences. +Real regular expression syntaxes need to handle +escape sequences, both as a way to match metacharacters +(\(, +\), +\\, +etc.) +and to specify otherwise difficult-to-type characters such as +\n. +

+ +

+Counted repetition. +Many regular expression implementations provide a counted +repetition operator +{n} +to match exactly +n +strings matching a pattern; +{n,m} +to match at least +n +but no more than +m; +and +{n,} +to match +n +or more. +A recursive backtracking implementation can implement +counted repetition using a loop; an NFA or DFA-based +implementation must expand the repetition: +e{3} +expands to +eee; +e{3,5} +expands to +eeee?e?, +and +e{3,} +expands to +eee+. +

+ +

+Submatch extraction. +When regular expressions are used for splitting or parsing strings, +it is useful to be able to find out which sections of the input string +were matched by each subexpression. +After a regular expression like +([0-9]+-[0-9]+-[0-9]+) +([0-9]+:[0-9]+) +matches a string (say a date and time), +many regular expression engines make the +text matched by each parenthesized expression +available. +For example, one might write in Perl: +

+if(/([0-9]+-[0-9]+-[0-9]+) ([0-9]+:[0-9]+)/){
+	print "date: $1, time: $2\n";
+}
+

+The extraction of submatch boundaries has been mostly ignored +by computer science theorists, and it is perhaps the most +compelling argument for using recursive backtracking. +However, Thompson-style algorithms can be adapted to +track submatch boundaries without giving up efficient performance. +The Eighth Edition Unix +regexp(3) +library implemented such an algorithm as early as 1985, +though as explained below, +it was not very widely used or even noticed. +

+ +

+Unanchored matches. +This article has assumed that regular expressions +are matched against an entire input string. +In practice, one often wishes to find a substring +of the input that matches the regular expression. +Unix tools traditionally return the longest matching substring +that starts at the leftmost possible point in the input. +An unanchored search for +e +is a special case +of submatch extraction: it is like searching for +.*(e).* +where the first +.* +is constrained to match as short a string as possible. +

+ +

+Non-greedy operators. +In traditional Unix regular expressions, the repetition operators +?, +*, +and ++ +are defined to match as much of the string as possible while +still allowing the entire regular expression to match: +when matching +(.+)(.+) +against +abcd, +the first +(.+) +will match +abc, +and the second +will match +d. +These operators are now called +greedy. +Perl introduced +??, +*?, +and ++? +as non-greedy versions, which match as little of the string +as possible while preserving the overall match: +when matching +(.+?)(.+?) +against +abcd, +the first +(.+?) +will match only +a, +and the second +will match +bcd. +By definition, whether an operator is greedy +cannot affect whether a regular expression matches a +particular string as a whole; it only affects the +choice of submatch boundaries. +The backtracking algorithm admits a simple implementation +of non-greedy operators: +try the shorter match before the longer one. +For example, in a standard backtracking implementation, +e? +first tries using +e +and then tries not using it; +e?? +uses the other order. +The submatch-tracking variants of Thompson's algorithm +can be adapted to accommodate non-greedy operators. +

+ +

+Assertions. +The traditional regular expression metacharacters +^ +and +$ +can be viewed as +assertions +about the text around them: +^ +asserts that the previous character +is a newline (or the beginning of the string), +while +$ +asserts that the next character is a newline +(or the end of the string). +Perl added more assertions, like +the word boundary +\b, +which asserts that +the previous character is alphanumeric but the next +is not, or vice versa. +Perl also generalized the idea to arbitrary +conditions called lookahead assertions: +(?=re) +asserts that the text after the current input position matches +re, +but does not actually advance the input position; +(?!re) +is similar but +asserts that the text does not match +re. +The lookbehind assertions +(?<=re) +and +(?<!re) +are similar but make assertions about the text +before the current input position. +Simple assertions like +^, +$, +and +\b +are easy to accommodate in an NFA, +delaying the match one byte for forward assertions. +The generalized assertions +are harder to accommodate but in principle could +be encoded in the NFA. +

+ +

+Backreferences. +As mentioned earlier, no one knows how to +implement regular expressions with backreferences efficiently, +though no one can prove that it's impossible either. +(Specifically, the +problem is NP-complete, meaning that if +someone did find an efficient implementation, that would +be major news to computer scientists and would +win a million dollar prize.) +The simplest, most effective strategy for backreferences, +taken by the original awk and egrep, is not to implement them. +This strategy is no longer practical: users have come to +rely on backreferences for at least occasional use, +and backreferences are part of +the +POSIX standard for regular expressions. +Even so, it would be reasonable to use Thompson's NFA simulation +for most regular expressions, and only bring out +backtracking when it is needed. +A particularly clever implementation could combine the two, +resorting to backtracking only to accommodate the backreferences. +

+ +

+Backtracking with memoization. +Perl's approach of using memoization to avoid exponential blowup +during backtracking +when possible is a good one. At least in theory, it should make +Perl's regular expressions behave more like an NFA and +less like backtracking. +Memoization does not completely solve the problem, though: +the memoization itself requires a memory footprint roughly +equal to the size of the text times the size of the regular expression. +Memoization also does not address the issue of the stack space used +by backtracking, which is linear in the size of the text: +matching long strings typically causes a backtracking +implementation to run out of stack space: +

+$ perl -e '("a" x 100000) =~ /^(ab?)*$/;'
+Segmentation fault (core dumped)
+$
+
+ +

+Character sets. +Modern regular expression implementations must deal with +large non-ASCII character sets such as Unicode. +The +Plan 9 regular expression library +incorporates Unicode by running an NFA with a +single Unicode character as the input character for each step. +That library separates the running of the NFA from decoding +the input, so that the same regular expression matching code +is used for both +UTF-8 +and wide-character inputs. +

+ +

+History and References +

+ + +

+Michael Rabin and Dana Scott +introduced non-deterministic finite automata +and the concept of non-determinism in 1959 +[7], +showing that NFAs can be simulated by +(potentially much larger) DFAs in which +each DFA state corresponds to a set of NFA states. +(They won the Turing Award in 1976 for the introduction +of the concept of non-determinism in that paper.) +

+ +

+R. McNaughton and H. Yamada +[4] +and +Ken Thompson +[9] +are commonly credited with giving the first constructions +to convert regular expressions into NFAs, +even though neither paper mentions the +then-nascent concept of an NFA. +McNaughton and Yamada's construction +creates a DFA, +and Thompson's construction creates IBM 7094 machine code, +but reading between the lines one can +see latent NFA constructions underlying both. +Regular expression to NFA constructions differ only in how they encode +the choices that the NFA must make. +The approach used above, mimicking Thompson, +encodes the choices with explicit choice +nodes +(the +Split +nodes above) +and unlabeled arrows. +An alternative approach, +the one most commonly credited to McNaughton and Yamada, +is to avoid unlabeled arrows, instead allowing NFA states to +have multiple outgoing arrows with the same label. +McIlroy +[3] +gives a particularly elegant implementation of this approach +in Haskell. +

+ +

+Thompson's regular expression implementation +was for his QED editor running on the CTSS +[10] +operating +system on the IBM 7094. +A copy of the editor can be found in archived CTSS sources +[5]. +L. Peter Deutsch and Butler Lampson +[1] +developed the first QED, but +Thompson's reimplementation was the first to use +regular expressions. +Dennis Ritchie, author of yet another QED implementation, +has documented the early history of the QED editor +[8] +(Thompson, Ritchie, and Lampson later won +Turing awards for work unrelated to QED or finite automata.) +

+ +

+Thompson's paper marked the +beginning of a long line of regular expression implementations. +Thompson chose not to use his algorithm when +implementing the text editor ed, which appeared in +First Edition Unix (1971), or in its descendant grep, +which first appeared in the Fourth Edition (1973). +Instead, these venerable Unix tools used +recursive backtracking! +Backtracking was justifiable because the +regular expression syntax was quite limited: +it omitted grouping parentheses and the +|, +?, +and ++ +operators. +Al Aho's egrep, +which first appeared in the Seventh Edition (1979), +was the first Unix tool to provide +the full regular expression syntax, using a +precomputed DFA. +By the Eighth Edition (1985), egrep computed the DFA on the fly, +like the implementation given above. +

+ +

+While writing the text editor sam +[6] +in the early 1980s, +Rob Pike wrote a new regular expression implementation, +which Dave Presotto extracted into a library that +appeared in the Eighth Edition. +Pike's implementation +incorporated submatch tracking into an efficient NFA simulation +but, like the rest of the Eighth Edition source, was not widely +distributed. +Pike himself did not realize that his technique was anything new. +Henry Spencer reimplemented the Eighth Edition library +interface from scratch, but using backtracking, +and +released his implementation +into the public domain. +It became very widely used, eventually serving as the basis +for the slow regular expression implementations +mentioned earlier: Perl, PCRE, Python, and so on. +(In his defense, +Spencer knew the routines could be slow, +and he didn't know that a more efficient algorithm existed. +He even warned in the documentation, +“Many users have found the speed perfectly adequate, +although replacing the insides of egrep with this code +would be a mistake.”) +Pike's regular expression implementation, extended to +support Unicode, was made freely available +with sam in +late 1992, +but the particularly efficient +regular expression search algorithm went unnoticed. +The code is now available in many forms: as +part of sam, +as +Plan 9's regular expression library, +or +packaged separately for Unix. +Ville Laurikari independently discovered Pike's algorithm +in 1999, developing a theoretical foundation as well +[2]. +

+ + +

+Finally, any discussion of regular expressions +would be incomplete without mentioning +Jeffrey Friedl's book +Mastering Regular Expressions, +perhaps the most popular reference among today's programmers. +Friedl's book teaches programmers how best to use today's +regular expression implementations, but not how best to implement them. +What little text it devotes to implementation +issues perpetuates the widespread belief that recursive backtracking +is the only way to simulate an NFA. +Friedl makes it clear that he +neither understands nor respects +the underlying theory. +

+ +

+Summary +

+ +

+Regular expression matching can be simple and fast, using +finite automata-based techniques that have been known for decades. +In contrast, Perl, PCRE, Python, Ruby, Java, +and many other languages +have regular expression implementations based on +recursive backtracking that are simple but can be +excruciatingly slow. +With the exception of backreferences, the features +provided by the slow backtracking implementations +can be provided by the automata-based implementations +at dramatically faster, more consistent speeds. +

+ +

+The next article in this series, +“Regular Expression Matching: the Virtual Machine Approach,” discusses NFA-based submatch extraction. +The third article, “Regular Expression Matching in the Wild,” examines a production implementation. +The fourth article, “Regular Expression Matching with a Trigram Index,” explains how Google Code Search was implemented. +

+ +

+Acknowledgements +

+ +

+Lee Feigenbaum, +James Grimmelmann, +Alex Healy, +William Josephson, +and +Arnold Robbins +read drafts of this article and made many helpful suggestions. +Rob Pike clarified some of the history surrounding his +regular expression implementation. +Thanks to all. +

+ +

+References +

+ +

+ +[1] +L. Peter Deutsch and Butler Lampson, +“An online editor,” +Communications of the ACM 10(12) (December 1967), pp. 793–799. +http://doi.acm.org/10.1145/363848.363863 +

+ +[2] +Ville Laurikari, +“NFAs with Tagged Transitions, +their Conversion to Deterministic Automata +and +Application to Regular Expressions,” +in Proceedings of the Symposium on String Processing and +Information Retrieval, September 2000. +http://laurikari.net/ville/spire2000-tnfa.ps +

+ +[3] +M. Douglas McIlroy, +“Enumerating the strings of regular languages,” +Journal of Functional Programming 14 (2004), pp. 503–518. +http://www.cs.dartmouth.edu/~doug/nfa.ps.gz (preprint) +

+ +[4] +R. McNaughton and H. Yamada, +“Regular expressions and state graphs for automata,” +IRE Transactions on Electronic Computers EC-9(1) (March 1960), pp. 39–47. +

+ +[5] +Paul Pierce, +“CTSS source listings.” +http://www.piercefuller.com/library/ctss.html +(Thompson's QED is in the file +com5 +in the source listings archive and is marked as +0QED) +

+ +[6] +Rob Pike, +“The text editor sam,” +Software—Practice & Experience 17(11) (November 1987), pp. 813–845. +http://plan9.bell-labs.com/sys/doc/sam/sam.html +

+ +[7] +Michael Rabin and Dana Scott, +“Finite automata and their decision problems,” +IBM Journal of Research and Development 3 (1959), pp. 114–125. +http://www.research.ibm.com/journal/rd/032/ibmrd0302C.pdf +

+ +[8] +Dennis Ritchie, +“An incomplete history of the QED text editor.” +http://plan9.bell-labs.com/~dmr/qed.html +

+ +[9] +Ken Thompson, +“Regular expression search algorithm,” +Communications of the ACM 11(6) (June 1968), pp. 419–422. +http://doi.acm.org/10.1145/363347.363387 +(PDF) +

+ +[10] +Tom Van Vleck, +“The IBM 7094 and CTSS.” +http://www.multicians.org/thvv/7094.html +

+ +
+

+Discussion on reddit and perlmonks and +LtU +

+ +
+

+Copyright © 2007 Russ Cox. All Rights Reserved. +
+http://swtch.com/~rsc/regexp/ +

+
+ + + + + diff --git a/t/suite/test.zst b/t/suite/test.zst new file mode 100644 index 0000000000000000000000000000000000000000..9d4e0c45e6f977558b8bcafaa0e69cfc07251433 GIT binary patch literal 20706 zcmV)kK%l=UwJ-f-TIi*Q0!qDZcQt?tjTJXbvwZ9fH3^kSti9}}{WXGFE#zp9kTMMC z(5|OAO~D{gyl`~4r_y~6!*p+hTMA}62MY(32N#VVi?BHADm6ROe*SH-o6c6pzj5-z zRfaTfv%G?G*@6Z{Ouo=HZ6RGY{j>C+vO2559_I~YXAT4)j|U4Fe4iiLv?&1YSm>b#Rt9FoSiz{-nX4n z`{UE=EBO%E;}ZrDl!$xON8ZH8rZ$&-)!ARFzgp#g@kQz|jj;G_cskgTxqL;gpRcFa zD8A|0-yORsat4vlpfPnrn)ht?Qqw%#s));wXBZg*wt~&e06G- ztHbV>+uX&4I;&rf*u@%MpEQ7=r~>Sv{#idsRZmDSo_^y+2b6z#HtpBg+LDVKsVIO$ zQE>o`nnkPsRj!V$hOi^$H?@Bg)L)ED!Jdo^jR~C{F^Y-<=0zaZT9X1~7$zG`UIAJ3hvcO0yG^0-YO0(-C!o!jX(SxlYS@0}|NlqagE zydY)wT#Ao5gFP%N&sCnz`_2e~Jr}eZ`Zaljc^ADVZ`7I11mLzOHSFnETDp?J!35MX zgxezR%KxUN{fr^>+*>|%CaLP_)p4sQSI;ojXOgOptdGIT7|yqiG(L6STLOD7I2Tru z`C6)ZP#sgu9QB{NxQsUoRbT3Ir^!+ye8Ueir2XpVPRfv8OX)*^Jifn9{UbY1sSMlu zx$2KOhHsyp@@z^ost=nzJ2FEKR-1;_LQVf>OK_E`b+4$MQco#cYScOzqN>PPnhSL` z>fK=Vs;l;-$8J2mzXyAKE?@dUnNS?8+L}x%I@!fa z^A}^Q6km+h)X0;?;!dxV-#1c^{UCf4V-XDBWHBc`SDrLfpO)9_>UUIM_?_*90Txe{UR8JZXlZwNE1H~bs4$7p#Au+vsmO3W6Q&Kn?!y)l- zRf_VF4U(rMQSXA0*r@vpX8wg?CKArAoJdIVwUrJbH)ROzUl``XFw%=Q&s`Y9NXdmo zczf?}^0(y{mzo_-mh2->j-u!Ef(|}Qd9#z`DXELA8f!Ib=8-9VZ%Qu+?RQkixh<4x zaipv9=>=n}W3SYoPM*G%Ck^&U@wp1N_|$QhwvysPz2Z_jJ5rzgJmut%Pu*M>_nqXxT;cF)rI)Sps3#Rk|z@zj2Gp4#@5a<#43sFmTquKH7dlLo^`DKD|#x61YW z2mv!WW0;EMf^l%{=#Ys0e!pW}bvPt=g6GPyy@E1Ax+rEP7mUUN1%ObofI1jZ91;s? zP(XM<9a5AJ14<%)4M#a@pt! zb+0C19FpcU0urj*Gqtx|BH}seUKpm@%5-5E$|Sik$n!kvTu?PZo!HY~80ugwfT0eY zjG@kMmH~}Vt%k1t^egDWFcZ2khJi2PzOX4}x-f>hTtkuz!$>ZeNCd*A{F2=s`S4KS zQnO>V+yMWfOc2k6LXi(oxgjYUF}-`1#qs3pG$JzD&E9C}VXT74=DEoqxk)c_upw*m zO#W0R#wy^p(mOUqfwJrN(D2C0^B*z1qW4}!pb=4ea3>n^kzIU|-{4nZ6sUxcWUK>6>wM{N}O z*pULWXxc(Zfm!(9WNE}~*e&8>)l<^6g%lVCdbx&JBm#g7#^K&rUat(V_Y0F;-q%81 z%qT-iy$odqH132-@lEYlg2sOCJmp4(&V`l46EF<_{e@v14k8vB4oE!v+^MPMH~1YA z>R@?&FvmrOL;^SV`{!||hEgVv1BC{KL^9FvfJ72;=mdla3OInF5hQ3VpcNPj z(3og|0sxnk_Xp*ROQ}zmkSArTGgyONaXbD_RsWoip};H@FlnbnOJ6nfzo3Yn`qKwu zJb5MkS*Tll%2?F2jar#Hnig9pucUee_Bc)3cU|W3?Z>y&HrJVfjj2!GY}o1r*fUpV zEG`&)6$JH^aCz=zSNwL$UU6g>jBkH(+tO zbG4m0EZN1Un%R~{%|7nx`@+h3sz1s9%OdY#Wwq0&&#ny(u(m1`b;<7+q*Gg-%*CZp z9vUD+xN|Wmzf;3?ZgDqmTSN6(2H~nc7xbz*?)&r&DQgc(>MN+dC}L;ou?zBSi{F&h zlbHJSE8#WYF}GGZ#5T`o)p#=KBX8KDod%Ws(W=9@5x@Bm*aL{oW*gy5RPwuihG8V6 zFo38u(6pVeX{!JQqJaa0h6_qH83aKK0cfFXzB-!O*01!Z92=10tElJ9#TQ+3<8!k$ zZhzZI|G@{Xi+sTpPhihiaIsIClHxFdXaIs+ zY<*vb`m4TDb}w+ibk$`y)UP0~l);081Ihv#OutgEmlSKPFsMu_83s>9A8XXQ?%c9T z#i8;h9x6;Ja3iJCK%rzTE;N$ipy`I{jbrS_E=VVT@kLU6(|6Q<=0GW~(~GC~_%y;h zviTmnsRLC$J7REUr4L^PsXv}HCd-v7VIni34j4dCn6Kq}dZscK7yb)9?tJanV323} zvp7@FSWxKi1mVG+^~oFM_BIfw_MsVzi@54LDQiT0dckkI!EsivF7{VU-o&1OXlQun zxeOa)%~EZ>W-KnQs;7Szi|a7;-f^sa89t!;u#Wl+^~DUP+Fyn}5YJdpT$k3c=7xJ~&1_E;R1 zWB;-%?ly*)1IbuWJT!KDKl#~V-i58QHJ4S2vfrIqwWqVAj_Pk#POs=h5ZJ#+?5GQY zJ^!>_9mgR)SaE3Z(4g_F#v4p=Xs9$)pmpcgQ&N~e1l8xCq4M+(K@cQmmBFZkzRdi&=kcj4PZ|&QIKwty(+g5+r?k)UWRCySD83BCNQrSc zJUD1jQHqK5BI!j7AQ1G>3zFiS;6BTrOxq9Z7Jp7clEVk4|{s8 zr7@GSa6H*PSBEXccI;EDD7_j=irHt?O`~Sc&z7=s-ZaHit&aK~8RYRea~$sb$g$-H zGmjd5XGqz%>HBOe;=P;x@|5&fovCi>+-gU69LLEmcANYPx4q}}411(@@jL2Qkkn3U z_uGCOL}~|t-}Z1k!Skx$hNslk3u2W9P&C6H%;TwP3t`@csXvA5-lnHi=XNaeZ?I}? zDmLJ3vJ@L$Yr%!9EpDXju!<&IEx#kXAl%9;{R&d2y_eT)OtrJW7P~pqET!(4m0aB7 zh;cybr-vt;EG4d`1a1|VlC*YkvQ)>;E##j%ezMyybMc|h?;m_^Bb{vV1$(a96jQNk zYSr<^i*)sZv44YqB|{oDd)6#V*9&R{gK)LID60HnaNSOJsB+znv|Sxqmy`8U&zqraFT??LF`6dldnT%an3s=jkKQmHMweC^stD!+84l`6A{| z9VtE+AEfUr#h&llukWl`wv_4_slzR@i*}3b%;(xNxUC_Gm@H)#!2kwm9%Rm*00+K_m}5(tQf2M7ib zlZnKkO3JFI6j}CTS=Mtw*U`nNVa_e{*e))6{6W6mL#+ltL?#SyCNS@E&3?wRn@9c2 z^A*A>Viu46E*SBf&`uUxubA;{C^s1Lh%2dX9INTrD=3n&pisCxfj#cr+A^LC*C%gw zzOszP1;o?OwwlLQ7x$mP&Gip%Yxoa((+2Z1$D23VIjBhd#>cLI{+z`t4H~sNm!uOs zk2wBEJ*acxqn_((tPI8>(R4!^YbD+!RC8!y1VV-$s!1D{au2%d;@kNftQJR1ul8U| zp4$Ct@s#F9+P3(k=Yo#eZ}0_Q z>Ue^%iV)ZXLBm8`W4*!g+PM@N2pBw681Ne$8B~E;Acd~H-f9hsb1TIM-I-_gRkSR5 zYSbL*8XtRq5!$xqZ(6l_PCtP?jvaGco=h)DGZqxuuTBK>#~vdD%~(*ht!O}D06{^? zSWr+PDh?Ivf^Eiv;<1bc#T7H`@u=Aq+xr_&pwJ1qZZ%_Z;SR9II4Jo@pa&%nlxGr| zJi(*h%LXEHgC(9Ic`LOhu8Tli*`9uir;jW*n8mpaoqeDFps%IQ9Cwgkq%NYod6E8u z(zX9#SHTuv1$)}fs@{O&KhdO{Up;0Si&Az6+N&@Dwosaz9o?{V@ zs-(G-WSvNn=6}Vl05cp6ORs|DGXs-7cqX^uYJkvzv{e``7u7FvA}2$dmPg zEMkJ^RjimY_5PXj-c5gk*A!E49pi8?ai~llB;0!dyN(Z_d^Xs{=bG3QoGksRFEDr> zG-{VjRn*&9N_RH2`mdoPVR>AAmMSJpV6X?mWl&|YlOJ>1lN##!+t>)^UDOwNg7=sS zIcR`r(15bh&;w25scAojc^CZ+R?4lO5@XirTa#s_j&ni&rQQa6s7S=V7=qgzLdc_m zYI=22e3r@AQ3+h^eW6~{N3C$bUsoO0JUw{=z2;v~?8xj$Po97|dl)juBYRKn_rV0W z7uDZHv5PH5zMfZuu{2fV>IJv4$g0z>kvA3Waq&?a-&zyOlLiK>K!HJ;TiQeNd#ohI zoRy5)@fhr-P&7#084C0=Igen9epb`@5 zkBGU~8}HY){vVX?5K_hy6Y<%NZS2?fy+4M~zPTG^mUnLF`^a%y27BHgyxj8qj_=Q% z%SHCFsQ?z?=OM^qkus?KFP6~{S!R6?_YfJ!PZuEhW8iY+Bvu1r{>T8FJp0G z^+krG;Q?X+rNIHk8D>)s8bBxyDhwzL)NytGL74%8NyP!>fx;E0P`JVhEMnnVQ1)#T z4C8R9FrY{;a(`i( z6Y9n?S37=`~*B0={qRG9StOJdm;$RnfmNWOz$6Tc zd6J9OkRf&K?AtK!q6`wr@K8`-6hIa%kZO?(YLSqSsS>KdSR}%rs*s0lR|#FP0OF4T zC@^MmR;R-1P*@$QX$}OF$%MiHP1|Xqz$^-k0tR8wxl?;=6f5s78N>4&pJeJHXV}vX z->TcZf7F{&*?IE{5Fe9Zcy@m{wO@=`J!j-sy_EMC0iw>-exgBL^Z=F6$(puPgQb-aM@7Q`Bohq-sAw3#!oq?_@T}zR{RKB} z#GwlosAEB*P1lKr0XQBUs|T|{91-expu0q(4-^8s_4kxd?Dt4Ws3XGV|KxZ)dI4u( zC7rur7{_ilf|qYLmux0GPf-6!dJ`1FjOwG7OLj{@FWD0ebIESZfscT3I53!0B~jI)5m>HL(>={K;t4n zsn+<|k>r9|$;E;KLz2?39txpQ9t|iuS#DHl0?HUX*ZQb4 zsCCuQJV{Y;fXM^ZEn0B@CV{YuT#CwV^21PAEe31g0MRgE0722I&6V&_2;~70nW(7R zi#@+FtkAd>6Cf}Yf$)?Moj`gdQq?X3lFIQjebv;@b&orsJWL)OCX>oT9VjZrhoNvS zG|s9Ss8C-hRZXFf1eec7@gtMTq{8z_moS8HBYBR`WHPC|SKap2&ffkA`h!GS_~CKRge z^V&f`G>8c_Ks0!yR0q<40R*Kn#X}(#%40%N2@@G_FIE)Q6OP6V6j@Dt)0~wv^`bWy zw#Ox@a)nhSAy4e0osM~Y5Y!XY!HzS-V*4c7n*pWvLHHhE&IOkj&<%ze=ES%ZWQuyT z0P0LF1I(eSg(6FjEtENeV`z2Mw9H!UdO{1JR!>lFu!|6s+9|!Mq3a-Zsoh`)-vbSf zn{VQbg!vF4p`s$4-(=TtqF=_GA9$^rZG#y@8e84jZDXgiqxX%Vq^K%`Vf-6Q=c%)! zI)*tMI8-DLlnBHm!eBC~NTi04q@(zr3r5wmWW8ah{~ln^ceHxqSN1fuDgpB@Ow$di zFF?m{TIj_yikN};JT68`Di{ZZY^kq05D;wB7mT0`;X}Zit9i3p2pvnSX)7_anFC>w z4#foGLJa6ZY8~T{Q0o#$HA$N;@yjLds}WAkj;u)|R{D{x3$h9X1V#B5VjPr*i35tO zti$A?!9fE`B$1qw@O-=Rd4{P@QQM!!X^pyncRvoi+jhr~wQeJ>P z8@vl~Qmdi2uXHOyg;9I<<_C$_dnFt-}|^MxFy9s+yfTdJM52cb$n$P?p`NT_bo z+^PM}<}I?!&}&9sfISvRwI+CQc|J$INgaO$z4`teb0D|Cnuoz2_U$||ls8LZ@%OZp zH4j6fp1>TRIDnuqz&Kd*B(?sg^?>Hd3r3I!2N0A8GpCB&+q5zV0+HbhJ!KST*z=(3 z`H$+gG3d86zQ*aK;306}@osx6eKmvLR>=IkJp1`w2ozRzdD=CYLQxZ|$Q zR*H%PK%TK$7TeP4JRD#g*QBkTLs4-6eZDkmRcE@z^DBr^Fpda18GfYzg-(9P&?mye z$khBRb`KbbJKnshMHz0BpQQNxawf$G!GwZwXhw#{M5L%V!0brV7;fTjAY_g20vaD% zi6Ki37{}EULTD1Mp71s5=~t(pISVdK%WIJngo3&U>s0n-BNo+V*eyn8ym%-K}O@jAoE*3 zCT9)=n3Y@_U>pHp0E2jdplEzbNp7**f42XI2LD`t7sM*}r#zk8=Qmg{_}x(i+5ahQ*9%Hust-dM)rVut z%4!IG)VeVJv-;b4GUq{5H@#qnebtax|2pEi?3Hh89a;ZmuQUSgq+ZpDtl4MIC9wnB zVH}dllG$>OPlALaJOE?*rLKkTu`J7C@#E|W+;hsYnQCXzb1*BpL@+KO)Kh9?G8!Ti z7#ETWjEl*ehBD*99_pzt{L_!yE~x!(b<}i>{XR0Ko>71y*5aG8f_9o~&5kbUTF8z$ z&zIQ)0L#M!9>g~>r0ItG6$F3>54|93sSkPZ&~!t#?4ZNPeuHCslc_51B%!hfxY-s{ z2s0G`b0AM*NFU1aL5rhDP&_@e_-zE>0FBA+8P0Syj*Pz(?V8Qc%JlM&blt-u(5@#~3nZ1^_@t00961J>wgcvH9RbJrT3Dhc=A{(S79B%1V#t%$>V0>p~d5ki~jMluvyff)1C(ldr|Dl1R6Q^-8ut%092`NMp{)=O&B z&xxU}P{uIqYAF4+6SM~|`CstCdye`7luf~F;LM1H=ow*(gjq3>N7b!YFex$OJ^NZm zKL|~m5wFD@nxf5s)M2@BnId=XSLfmuylY=bEArO01 zz=sSkAGrEa`%Bu@7`{yztghYd{FiII*+b!SzWBHD8`!cF>@gvE?@Pssv!@&SKBi|v z)?$$3(h3pH^qoseM8u)SGJ4)>Bv*IW0FktFXn7rUF&5#;e>MVBq3OB-`ZTK2-;&K#Qd<3W58QYD9xPqg^Yi>;q0W zsgs%-*ABUUMHhv6<`0Q8mR7NpnA5H7zn89I4F#nI!WHF5xt~Wh^MXRgw2J@zD^_s- zW6*@P9n@#!l6#C-hdXeX3l8QH)i)_Ke%BihB{s( z_Gl!wGcDB)qqL&xadAjw5@(>&V#%aL?<#7dw6j-Wo{jri2fAHah}65s?P3=Y?JDp{ zcZ58@u*qtX^i5;xEI9stc%yqHJtMywV0cQ?FVLLay_SqyB5^*iq|(c^5&M4;=(X3N z0QdHy^TQQI*>1u=*s2SUn6QDBWQ&m~H-O3(FvH&i>i`iGitXc~l&%UMSUA1$)Ico2 zvjIkHNbEkS-($JG4kbe3wX!{Lh8rOLeuQfnu_CY^k)|hTUUds*M5i_~><~(ng5G<^ z@7l>UI}jumL&>-}TkokmfUvkS zO}Ata?9^?!!AgHF$F=OO9bWoBU(W>^Cad2FhYhsJ#as|SK?juk|0N%A#OB;=35myW zaU3EVU*WAgSMQyaiZYo*GKEyNssxa{$T@s2)edL22s72<^YAab;6gmsjiKn-<<}M+ z{}fK4Ed{zcJe9c;*78Wd(JHNqX{`5u2QJMfSdclVi~{-EpP|LY`g3 zE7n?cP9Mrj%fqJCqeB#FKb0HD5-mf|b+O1*wGK?@37@O;LkN&W>2#(}X{?q}P1E|@ zHnQy<7Uai~H$#LA>DCjelbe7$r+wZcHeot{Lu=;LIh)KnLO3o<^8H!)ZLd%)R?IFV zJP{t*i`|H*M6>t%aEsJ&p^&cLl4WR>x$c=^ppYF@JcyK)O}P44l!KJTTfBm!q=16J zIN@MPseen||Nabu{0i!U;pzl^yYF^oO=tE1+7MRKJDXsrW(6l^sA}>=+j79aDcq{$ zXB=j=4@*Y>AZ3CQMOUEn2)|(MIrUXG&k2(R_*2;gpcGYw?ua1UEpp!Si5Ruyf}vaJ zeuT!<>^>xt0L&;{Pme&XLP`w!3XVL#K>cK<(|0%DpgQNf!=Fm!cdpEoFw+mXErhE# zpKvD*>o~KcR@>;_0;VU5ENy za2yUGj$rL`Dob$UPC6f!3;tZ|Elx)QVc9*RZwXELxr6+&zl9WHu=*arkp>NQ&>qAT zdMKmBj*Kshsh9?>)*HRID@?Q*=a6fX%XTP`N;fm3<3&E#CG-)!88o)PTF_6C5`-vy z2g{k027qlH(XH`RhWWsPSy;uBOJs{KuTjHHcfW@0u0( zQaKmXn;pEYJkQ79SZ*sgMO^Lid8`e9hIt>8)+ChU<)4Dpobxh+XvL`=gcCk}P>j&n z+JWUFy80j;EvUZ++`h@#Ob93RP&@{Vd8dH@i~^f?9&D^d*RTp&3)fHyU%mbgzG!!^ zZ=DX#1B>zc`9zNB*mX_K&@dx|kEXt0U3_=0_oZa*YUQPP-QY+Z@g))D{()Dsur_Zh z;aU=odw%fQrM6>YAnr1O-K)0dku>OH;b;f?wafFbvUWU>qr5!8W{r0p_PcCCzf5dz zc|YMHCzj&=DH$EPNpaUuyqMl)Smv)yhNF79|BVa)C{Rcz=FugqnP=iQNoey%pen$L z)6#5rv)D?X!YMBS2G_0VuzbX^WxaCiY&WcuiS4#o~^>x1FyZws?*>IG4+{5Rq(T* z@;LESjy5;4mOfHrys+t03v_G zGY9&bY3v}Mz-}zYYoTqubhFBRJY&>Ys0G`DaXx}}CiIwAZEjP83{s%Qa;Cm)*W9Uf zNc%93WtWpS5oy<8z)g$>RuxBquHM|V5`qtLK?ZP7274kni*Mr+j=*!GyV(;GLv=#U zP!!RMEe4Z3FFI(1PZ`{Z2P6Gaq*;%`T{S$)Sq5+#?FIaOif$)!uR}wcu{vYkLz~gW z`l0(h)SPG(rNOMAU@OVj@N0X5?5?o~c^C29O*zNvNL6%cfhbJJ!Z=6eQ`wX|FQ&S5 z1*=(Dl=@2d9JvI5{l!}!b9c4Kt^|QmsidG3DK3*JZLJhtGW9#jE)6jAuaiz`BAi#v zTV%NamziWg9OKnSiWSXHQ)0qfH{^v)b#%wbrV9c6M?F3QrY-Ig;}D!J8SMRg-kpQ; zkG<>dj6+J0-%7kx%opKhMU30^N}AWl#}Tw7&%MyOl$fX+Yz5j@97aqgD6XDX4jZy{ zj4F5(FaZgU6P@npdc~Ra&7N_?TVCFASK52lNrqz*$X%|M=f_7Nm~-qFT+TUS@OkE) z*qkIy!SPZ3E^?hy+CYI$CV*V_ol_2u9XyPp|I>h&Fga1A288j~(z+4l#*@`1CfYr? zKPG;;ZZqw*=r6T>hXa_+{jlyWpejkDWYFgv)mQTL8bfloHb)fvtoz)J1Bw@Dupz6 z_wsDL7Vr@tW46)NQn3*wzkY#SAzxWa^D#LCb3=tlpt!XI_b)a*Sk*K@&k~N34;h=k z`oLW$ahv@BnJOT#uF}hzywl4y^rvXNF@-cQ+tcA` z6g**sE%}ITC@mHOAQyCnb=Z_OsU@4KH^=MTNj1UAF(}?o1Fmr}F{)R)#(|}%x@^5; zo9RbW0CIrv*fx_0TjIQ`IP#8__r+8nE}o`cxn=QcI_aT>q5jG8^9vOv@HW_kWzbxz zAxUl;qWXS3-!^i=y%ICa>ijJzV<%@dM`T$0dPzA%nnB?gi|;P zce}zFc`Sz-#EeY;DTzDSdX5Ax5mm0rD4h3^5QWa4^mkH?u~bw<*mog2*kH5y1SNG5 ztW9w{hY%{k9sXOpt0HPqHJq!B{<*$?zptl1CGCxsdfj40sM_X#v)Me_fi*`r4UQB~ zC3&6>-n=M+{6$UTp0*}uo5y8Uf`c3_2%{%t7h-_QgvxnJbLzkoD`)%nJ^9HYE~cc) zpztoUj||Pctr!+|b!S%Mv2bfD<}MyM9CaNqAreTsgfz1WKG*KHh` zl^Cx2-(jqXK?OR2jQW&DHe7Rn;Z62tFOt-8v^H5I-WPsUVoiyKD>8tj5fxsCr47E@bR+ zd>lNB2YYnBi12YeC!$ecDQvwHU<%Br?WB&Ob;XDL7g*;%UI&M0wseN9Xnw5V=FQt( zmqZB4SqQ4vf-g0$Xg3=<25VToN%+l#`fCPXz8jbK2b6VAa!-LXie4`8hQuCbudZ}U zSH*w?2L}N2;JJ|%c-(ME1)b3Lo|-DQncC^gDO0rBj1(sh2w21gTMVb#Zb&}^y3)Xc z4A9BUzV)2$UhzhbK%upPUHpsq86{oN00GhjR&P9c_m3*Wq>a6unu)kg*%9jE%RU0g zcR{bL5Y0a%s&)k1_!X3f4+8L``1rww()YP z-3GDI&=%nYRk$#9Bc!FANd2Aa&u^>*lkrbWRp8O{;!zM2?W6K)ZYrwl%UYB zw(THV@=S@&S(3xVuLdN*!UMflG$&ZqOd-{r#Z|d_faTotF5FuS27ypZk`oGuYS!#g z@Ope6`4eQ)lx^srZd?m?24Q8T~E# zh5d}c#I~GEz`!{J{5`*aY9Rg)r9XuRV%>I38xeC|S_A^-1OD&=LwD}kOao9M1S+vL zesdH(_Ioj9gH`fLTJd%7HSj5TGiV`Lh&6kHE&-fbXs7?{0Lf4j>q z?IX-!+F2ADdoh~Y;9kgx$-ghQQWpkQD-l&|N>%h(sO3`ltl50ZydZUGI~V4z9=3#k z`qZ1QuK>aJl=nZ1V$%seS&IF|;V^9src<%sPb-a|E8AwAqmjy?4MBcY>9cVck8EOg z0P6Bg!G&^dVykt3EH|_(@umy`b}(ffPHlOuj3Yc*Kkvuc;YJQ?8OW(XggW%K)hRN; zzw@(<&%EH+5)9cxg=MN6A*HBXF}E>p+PDx`7;rc{;IMf)f*Fmg)KKi9LK&B(c!}wb z(KvGAe%Kj0wuzLgu>7gQq=^D|n#o5M3%F&_$B!;~lwd2E{U-+0nIglx<2-~a%vU)u zP9mP(1IWn5mv4@6b1V@x4Z0<15|@VlA}7D=hx4{(C2ho8-(%*=Fp5B zI8Inx4Z~?!{sJD9y++Y5#&pK657#E`R+%8Q(_oU7h!hz}k-SN3EKCP2MYxBP5K<^E zia}yU?Kno~^&LWx2}0zB3GcW%8t`y#aQc}LgbKTfcYNO>SM zo)qNpaKqlzQ)zG*o{}PHcPoeLup-uUEm~ZnBfn8_*dBhvwzX$$owka~yC}$o(3gNC zr~I4<%z25IAi#4cS}{$Vs^KXi*ASh@RDyw$${-N;dJy{pevkRn2NwD?d?Z5x&i8yV$e|D;uQV9V%CUUti$) z_px$X{&!F>63@hdvC>HZbI|5dS!?$>QDz=jqqSCoq)^xj;Pv}eza2A`H9IpB`~rAx zIR(;jPC0{2j8cFe%_Mt32juCTCc-iGqo{>tT_}9^5LKxWVhk-P>Z`E0Dw2H1W?*dKs65Y2EU7t*avMfj&h`(EScSPU7vvX*xdn! zcx`a7OVrp$R3O;?eGuFZ%9fCh`)^GAP8%^O4Qg5AgFl&HW4u}^6q?+*XdJRMGtZ@@ zBQCNcB4$)%!8wQx=4w{%kU@cT)Ji;@jip*|VMN($sI^d^T>#JxxMgCDW7;?SU_oGX zAx7fZ#27=E$i=k$O+8|aKlm%$&XO+7oPezJfK|?88@o>&IO>%Po_r7lI&J`j&P-%0 zlmaHJGPF=+=G09GV7SGQ)0l$Z{8oWnjs{dz5y3k?3|Y|EOXJ|5m%83Xyb)xa!LsDc zGQX)qNtFpv+9deT_=)vf)|=N9O9%Ll5dLg1p^W=ByZ?oExF|*-u5~-2#Mkvdc;BF4pqqT_m`H=bh{~fs-R$Kv4rn z>reO(O9+s)=;S`VH~7A2*94DZs7kv{`f~=ALTC^kZz}S;c{|`gMkY9^*#a%^y+*io za!*A)0s$f@H#=orgE&sNX~V6UH=w?A8OR<3G22J786QdR8eN&K01Q({>!e<_07Xc$ z&h0`stj3<){Vt9j(kNiqCJP68ATx<>9q^NBSL89Y8M8Y57cEHiXN(*ioy~-&DMyXb z{T)8@Q#N8Grt<{(^_hn&FzVZR)a;*|+4|5Wfp8`CEjX-5dpwpyT_RbwSI+#V04G!b z;74T(7{kJbXJnQ9mI_j5K2-SZN$gW)m_hX58I|&NFQh5G;R=KsJlu zS%UJUK2$!w8S%P2G>9@Vr-Wukl9VFijAHAwxXKbbiP2{Z;V z)&vikUD88CiaNbN_g1&^&u5T@N5bm`N5(nqrNIaxm&nc_M`!)FHPCVyC>+g*oe`nA z{By2WrLbr)G9cKw$=&Q+$?~_&mL~`4PReO~%n$n8;n=O%da@O|&K-M@qMz)%tE1e< z>H_PYtPp<3^!PgP$X4J~-&o*wUTGNp^^>5+{^QtAf@#(hYUH1+_^8dIT}GO#@dD^# zzP?1uyAJig*%ld3He$I}Rw!W5uA^x@Gk4s>Lr7R{!1lGBep`$rCa;w?Sx$tGy;mhP zk<)apLj3uy299Hd!l`P~#8r=EvEkvpC6NneB@(EdgyJZ#I;f>#U8n7O&2iNSN=!s@ zBy+c1>hPaVBd9RzPgU&oT4BG#ef=Af$v3UtOC;`o@_8d?76d8@Zwum?Ws%jKRm>zr z#q@X`iYHYRCV$+54V00B0Rm2H43+lN6vOaY1jmT>d1#GGK5(|ByJ zXdx|Bt^rVY(EmgL%D72hyhg6j#5@w*-<&P;KQ5y~l?x&{QlsSi?r&o})o+Wv+&4AP z2aLz@7oEIT(1T;*qpMy4tP#+VXaC(9W~kv27m7}(AdxhGLG=Y$Gy()s!80rKc29db z8e)a_wkpR`IDQAE-&d=H@g_^*7g#*Fd&k)_$)Ghk>Z4^BShD5dWKBp0z4#FT7?MOW z$VU{`&r+F3gU{?X1MbI+Pr;ijzb3Lwvqo0CS)3P^m?Am8rFm|etjs?2XTKc(C@R=t zbR00BIRKyxvR18l2?L0p0n!xB{1kPmH3W9-gV_! z+@>4b4-_rBQ9w6NS9n#{9)?xkf|VK|p{-Y2eKg;}qA0D!QcKCX6vro8IcF(N{-NWe z!ZLqT6wMG>DdFletMtB!fqg9I>IO3i6!y>5v`Gg4gA`_;zE(w0TIivjDE( zGT=)JfuNO9Wy|Yx1Vy^9qA9s@8URgCi05r0ULF#1a)yO7czvR2+F=`edV96TiQn?l zkwZY`ooyUQz87#vj1UFrK}_DurE(-Mh7Yf^DZCU>=A^3;85R>u{#VYYAX&k*maTv_ zFh2}-Gn7t?4pnaXo;1vnZW-+8AUM>6OkHp&x)+L5{-w7(oOnX*Ca9fcD%DdndVqeN z*epzr{Guk8qDZpsJ3*e~tPAaYkHtLK3Fojbxxc>Wk)Hp>!>H3>fule9;E(7`)Mt9y<-jwd?sm2o7c9RjgS?_`g%cjS; z{}1<2Hgh_iMH3};L}=Zx%!wj^lCt^EOT~913;!wx-(|(~0wUEd!%xZf`^$tPd}Mgu zGlX3a0S)@ZUM=|5t9a6KRk^8iX;sbAR-Bg%+HS|&Sdx#-tXe4FB{^MwqBiEMdGeTCZQJAjIU z&>2HJe>8+@OJ}=Q+rh+u%qvBg)ka$Dls@ zDTIzGZ}Pc7e(&V{AX4Lw?0==CbgairqdB`#yV^M10xMcVeU2K?b5?$1r6BYQhtRrJij9j zs@FKP3iz3(8+xmycjB$ObeEROvH*@m-Z`k(pw59N0Qo6Zv* zxT@&Rque}qcY~{Go`XR{ZG^9L7U~36 z*_;FHw6<~YrB=oV)d{UI^hbGw;(TLa;+A%Ih`tX0@jHc7=p>A=2Kwoe_N@%K5=P*% zpYI_)AdWny+f$rvr}WU!>VGNz1$)6bUHJsDPo+fn1nZhGZL`hnJra!ybJnimf6ou^ zCi^4k=*XAT>_3k_`#bR>@J$perE4%X(C~Ba4$(9XGTJe5g~|H30r*C2cnz0`EtxR; za%P+e08e8hoK`47{{xnWB~61JcqeqE3 zc`;DXU$HbhoH>r7p1l&+>IsxSQbx78^qqO|0#|K8A=vo>h7GQ3yIemdcFkcgwuF?y zm*o=WM*LfZ)?kKc2e#0loD2f&DjHF6EerpE@kJ>TDfyEVq=(_BL-Nn3jKv!Pe3mI{ z&>QIcb8SqRXEwcqcHS8-SppwB*BJ~ZCyZRwK)&h7!w$3K5l*q^>qF@j`p~{-KO53b zgB7EuSlIYSo}~s1q!=qR@Yui^o4?>$RM4Q48f(|3^wb92iZ|Z09OF&UgEO8 z|Lf2Ej6X7m#m5{z_|;hqn~C=DARrUkME?%5vL;E7q|TAzdo3LG`gp9cGlt62r2Mlg zOY6+X5$bFr??~pI>i%lo{VWp<_=W(5T_N%95JTFDFSv|#z+hy$<*`<<*7n?yvNA#l zEtpbT^;p4{TZsdwYX)=$+gVy=`q?LDEebTTnGX<+2MVajtg>X4@nMV#QprvqgakF@ z;`i`?9_x2E1%R_s0klva|B7?=fee;AF8K*YZZ5Tf=5X(8Eb<1^Whs46-DhX<46%4) zz<=bIX-9djvJU_CUkakz1G~@hi10AEOsY}P;w#&^XeF>k8@^Q0z)+wo1&k;sQrumS zbg}`15Pa`wHL>>|BrIq=a_K70KUGUI=KMWY5e+{Q@iEK(`5KIP8>9e*!a8w1k^%`a zp4a8LTDl6LX%vq|015=%hRV*ymH@xYq}ipcx3z**f+b1n&I3}}kMnIJ+uIt58O$us zc*l**SwdY5;ebYr8zP%(kJr$QeHClsHmO^nMJpgu+n%olm#ME1Jb)0ESBW)ZeTeXn zB*ry-u>ZyzDHIjY7uYnUQ7WgNCy4hYUkHt=sb=tNw`60YLoqkU%@jh)QK3pgK*sd9 z=5NR}ngSpQpLl=o8%h4Qu3+(NCr@2t^iw)KZ0 zVb{pScPf%v=iYkFnpek?{n-LlKAB*-XddbhZg6Gp(be%tEhg_9gHbB$kU5ZHuuS;T-8k5knN;_UjR{VPB@5{Gfzm) z_Y6{#N{aaZhc@Kz=NgXrFLs_tZA`(lWtN!bCADDDUgStp)OZDyvfv{1d)&#Z2I1K? zebU7hKcB+{Ov{4Sc&;(k#+fLVhF2Z7DaZE6Ztg$Te~3J4EM@+PY>0DlI6+jy^S-9n zi#AQKdZ$uAJ>7CVfcJoKSnoXNfGRhIw@{B9cP!N^n>Q+LAeJXG2#{m)E|o@eMe3KP|wLz-Mf z%1^6@^w-cF1(4JqfSnUg2-gMpYI49q5M2wdl}uw?2t5b)S|SUkJgPq05f=mM z4Bn$}OxL$o-uVWwaW{{bRad@Ip^tlQI-MYhP9(_T*RtAXWVwJyrZvmj2`V@``7SW- zjIZ4fJD`bL_^AkB$FLk@)+#sknQAA7>;6V)1}lv089F}f>{Y;a%l3#rF~lB)HYPDK0L%DM?yRD#H~-Sm0DB z>#SRNHi3E3gkNw$uSe849uB<%3i?809Pmkd;iKwn0!PT-N8+`pb^6SrJc^F!PB*A2 zDTf`NbKcbpQ+k-N_m2;@&NK7TFcv!T=B;ir89Ks~zaaD*O(B_OMjI&1t!j`-tYVO;Y3*Rh~_USU$cpRH8AZ`OLE%I&|X z{5v6s!oU+J@WhFI>;IeK5Mf}ooQX?_!saZJw zW0Ag{c^tY_g~L;Gl)tVSErP*I#Q-(?0h7JVk!#~cgyx9OcZLPGlXCdx%Z0BEurzDq zc<&VY9Kph3bYXB3hzf4`oE5ypojIXdl#`vscQYLL1smX=7?#xCj_d`#S0f@D%C@3r zBN9D#CTyl)0MFE67)~_~8K>Sit=*H*{mbi_&my!tW4nmKRYC|)Q$uJ^!9g~&Ql{P0 zxG^d;BUdj}uL#QJ(6~;GD8IgvR-)N-}iz${V^~FH_7J!H>@UmSo zdV_A7?)(3LsS8s$WM)tcY|@`IN>Z?727`H|wrN1J?k z5&-Ph9vmVkWQ zSOGbwE%f#lZ4^aIzV05dzgN#P7mE!GfQkCn_BQH+1F&uHK#DLccQZy}-nW9dK7D0bjU)LTMqVO?d} zf&1|w-JXl{r{dHP$73>poD5C4pjN9}BK0rahO@#U6*M>aq3Cm1_{FZF+9hnAF4eZn zCzGV8W}N2Qck98Wb_<~Ke)Xz+|JsPmH)UMwjeNs`yX85<7I(?45gV5FTM@AOo{imM zx8?M1NmPDL%VlIV=as+<+v~XJ09Jg%J)^`;pO5|+D zUYiZ_i30%^N0xc{?7Xme_jt9)-3DFtt)LzJ_A7t$9M|x_`Swj3fTytvZX(|SIOhm| zMQ|${B&|>Zgthh>abk4vZ5;L_EwkDX?c&{pgzuPCDoW{}JwhGlmcA(W{me>v6~uT- z`s{;OO5Ib1F?XvMMu$0|;rs-RipKBg#jvugJ=0VZPhb_- zSUR~&bnp+of5F_|>0j8<)->-P->+KHI*=F?#Ru2tjdj~D$(V38rqJ4&Y&z>Y! zh53H;&2Gwv=Wljlqumy=2%7NeJ#vTOwNb@T$K)a-2_0k1%oiylVl5g;Rp7J%6q~!j zL;#GA9M{UNSjgh#7?`3CXS^?Em|9)Z;8jzZ(HzMf>dJOdq_n5Y_SktZ4y*{0lp%3J zxs#?duHHT#q-cF3Bc8s-qq`qpAI93PL^zPM5&G{Yo>ih&3Oa02)@1O|Fhn-puNX`Yht9LMrK~#^eZ@GH;3ri+cPulKN3&p)aC!aS3 zAn=#QmWCLV8;8|RXN#|mU`U{@mE<3oV760aSu}i!!}+mpE5IX?NQKbx$H)k7rd->!D)FrocOCyKUrGm(Y(2_!n zBF;O^?#^OrH7+Ar6a9Yd(6e}1INHm*5Lx3~8{r;f*^CEVm`(9%N)pZbnb@Daxt>!# zlsCTKE>=Iyy~ep$9peXy3;68`Rd~-aTndfbGtd$C1?)|dR)aDyXGjm`VIse}W65A3 zu0I6n+(S}15mK{A;<{l#_20?Gp|E%6`B$w9+K3vp1^|PW!oL~<9;M&Ejq`wR?1Ig$ zZC#<8NFkVby&;8LCVz#+RMyjvrPVE8C!`TGW=yD|OmFU~ZU7&{cVP4St1a#K@3{ySYZ18=8b^2rd-x zOGM(>0VP6PdsTOQ2Q+uILs)U}W#+ise`t>V=RMZr#G9z+ zej10kdhV&W-Xk>bBKbi7oKh>ppswY=TCo4wazwByIb#?&JcV<=d0U#EUI)X3SFqiT z+)2W%?e}`u-F^F(E6=a^Hsz~?ck>iP>4ET`pyX2Cp%xrfOaU}E4i)A|g7#7D7FNsG z9pA_KXfW!Q27P+~nz6FH?^N2bWF0B@1&sV*!(h8_(E_Q}-<^80Y_;vGw}&Vm^$rZ@8_o>^@K$S(#Gz&|>vZpWuO{+$2- literal 0 HcmV?d00001 diff --git a/valgrind.suppress b/valgrind.suppress new file mode 100644 index 000000000..fe4f25545 --- /dev/null +++ b/valgrind.suppress @@ -0,0 +1,218 @@ +{ + + Memcheck:Addr1 + fun:ngx_init_cycle + fun:ngx_master_process_cycle + fun:main +} +{ + + Memcheck:Addr4 + fun:ngx_init_cycle + fun:ngx_master_process_cycle + fun:main +} +{ + + Memcheck:Cond + fun:ngx_vslprintf + fun:ngx_snprintf + fun:ngx_sock_ntop + fun:ngx_event_accept + fun:ngx_epoll_process_events + fun:ngx_process_events_and_timers +} +{ + + Memcheck:Addr1 + fun:ngx_vslprintf + fun:ngx_snprintf + fun:ngx_sock_ntop + fun:ngx_event_accept +} +{ + + exp-sgcheck:SorG + fun:ngx_http_lua_ndk_set_var_get +} +{ + + exp-sgcheck:SorG + fun:ngx_http_variables_init_vars + fun:ngx_http_block +} +{ + + exp-sgcheck:SorG + fun:ngx_conf_parse +} +{ + + exp-sgcheck:SorG + fun:ngx_vslprintf + fun:ngx_log_error_core +} +{ + + Memcheck:Param + epoll_ctl(event) + fun:epoll_ctl +} +{ + + Memcheck:Cond + fun:ngx_conf_flush_files + fun:ngx_single_process_cycle +} +{ + + Memcheck:Cond + fun:memcpy + fun:ngx_vslprintf + fun:ngx_log_error_core + fun:ngx_http_charset_header_filter +} +{ + + Memcheck:Param + socketcall.setsockopt(optval) + fun:setsockopt + fun:drizzle_state_connect +} +{ + + Memcheck:Cond + fun:ngx_conf_flush_files + fun:ngx_single_process_cycle + fun:main +} +{ + + Memcheck:Leak + fun:malloc + fun:ngx_alloc + fun:ngx_event_process_init +} +{ + + Memcheck:Param + sendmsg(mmsg[0].msg_hdr) + fun:sendmmsg + fun:__libc_res_nsend +} +{ + + Memcheck:Param + sendmsg(msg.msg_iov[0]) + fun:__sendmsg_nocancel + fun:ngx_write_channel + fun:ngx_pass_open_channel + fun:ngx_start_cache_manager_processes +} +{ + + Memcheck:Cond + fun:ngx_init_cycle + fun:ngx_master_process_cycle + fun:main +} +{ + + Memcheck:Cond + fun:index + fun:expand_dynamic_string_token + fun:_dl_map_object + fun:map_doit + fun:_dl_catch_error + fun:do_preload + fun:dl_main + fun:_dl_sysdep_start + fun:_dl_start +} +{ + + Memcheck:Param + sendmsg(mmsg[0].msg_hdr) + fun:sendmmsg + fun:__libc_res_nsend + fun:__libc_res_nquery + fun:__libc_res_nquerydomain + fun:__libc_res_nsearch +} +{ + + Memcheck:Leak + match-leak-kinds: definite + fun:malloc + fun:ngx_alloc + fun:ngx_set_environment + fun:ngx_single_process_cycle +} +{ + + Memcheck:Cond + obj:* +} +{ + + Memcheck:Leak + match-leak-kinds: definite + fun:malloc + fun:ngx_alloc + fun:ngx_set_environment + fun:ngx_worker_process_init +} +{ + + Memcheck:Leak + match-leak-kinds: definite + fun:malloc + fun:ngx_alloc + fun:ngx_create_pool + fun:main +} +{ + + Memcheck:Param + epoll_pwait(sigmask) + fun:epoll_pwait + fun:epoll_wait + fun:ngx_epoll_process_events + fun:ngx_process_events_and_timers +} +{ + + Memcheck:Param + epoll_pwait(sigmask) + fun:epoll_pwait + fun:epoll_wait + fun:ngx_epoll_test_rdhup + fun:ngx_epoll_init + fun:ngx_event_process_init +} +{ + + Memcheck:Param + epoll_pwait(sigmask) + fun:epoll_pwait + fun:ngx_epoll_process_events + fun:ngx_process_events_and_timers +} +{ + + Memcheck:Param + epoll_pwait(sigmask) + fun:epoll_pwait + fun:ngx_epoll_test_rdhup + fun:ngx_epoll_init + fun:ngx_event_process_init +} +{ + + Memcheck:Leak + match-leak-kinds: possible + fun:malloc + fun:ngx_alloc + fun:ngx_crc32_table_init + fun:main +}