Skip to content

Commit

Permalink
Merge pull request #13136 from crystal-lang/feature/backport-pcre2-fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
straight-shoota authored Mar 2, 2023
2 parents 9eab606 + 0f0eb1d commit c40f71c
Show file tree
Hide file tree
Showing 6 changed files with 26 additions and 18 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/aarch64.yml
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ jobs:
- name: Run stdlib specs
uses: docker://jhass/crystal:1.0.0-alpine-build
with:
args: make std_spec
args: make std_spec FLAGS=-Duse_pcre
aarch64-musl-test-compiler:
needs: aarch64-musl-build
runs-on: [linux, ARM64]
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/wasm32.yml
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ jobs:
rm wasm32-wasi-libs.tar.gz
- name: Build spec/wasm32_std_spec.cr
run: bin/crystal build spec/wasm32_std_spec.cr -o wasm32_std_spec.wasm --target wasm32-wasi
run: bin/crystal build spec/wasm32_std_spec.cr -o wasm32_std_spec.wasm --target wasm32-wasi -Duse_pcre
env:
CRYSTAL_LIBRARY_PATH: ${{ github.workspace }}/wasm32-wasi-libs

Expand Down
1 change: 1 addition & 0 deletions .github/workflows/win.yml
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ jobs:
with:
path: | # openssl and llvm take much longer to build so they are cached separately
libs/pcre.lib
libs/pcre2-8.lib
libs/iconv.lib
libs/gc.lib
libs/ffi.lib
Expand Down
4 changes: 3 additions & 1 deletion spec/std/regex_spec.cr
Original file line number Diff line number Diff line change
Expand Up @@ -211,7 +211,9 @@ describe "Regex" do
{% else %}
# Can't use regex literal because the *LIMIT_DEPTH verb is not supported in libpcre (only libpcre2)
# and thus the compiler doesn't recognize it.
str.matches?(Regex.new("(*LIMIT_DEPTH=8192)^(?:[A-Za-z0-9+/]{4})*(?:[A-Za-z0-9+/]{2}==|[A-Za-z0-9+/]{3}=)?$"))
regex = Regex.new("(*LIMIT_DEPTH=8192)^(?:[A-Za-z0-9+/]{4})*(?:[A-Za-z0-9+/]{2}==|[A-Za-z0-9+/]{3}=)?$")
pending! "PCRE2 JIT mode not available." unless regex.@jit
str.matches?(regex)
{% end %}
# We don't care whether this actually matches or not, it's just to make
# sure the engine does not stack overflow with a large string.
Expand Down
10 changes: 6 additions & 4 deletions src/regex/lib_pcre2.cr
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,7 @@ lib LibPCRE2

fun get_error_message = pcre2_get_error_message_8(errorcode : Int, buffer : UInt8*, bufflen : LibC::SizeT) : Int

fun compile = pcre2_compile_8(pattern : UInt8*, length : LibC::SizeT, options : UInt32, errorcode : LibC::SizeT*, erroroffset : Int*, ccontext : CompileContext*) : Code*
fun compile = pcre2_compile_8(pattern : UInt8*, length : LibC::SizeT, options : UInt32, errorcode : Int*, erroroffset : LibC::SizeT*, ccontext : CompileContext*) : Code*
fun code_free = pcre2_code_free_8(code : Code*) : Void

type MatchContext = Void*
Expand Down Expand Up @@ -207,8 +207,10 @@ lib LibPCRE2
fun get_ovector_pointer = pcre2_get_ovector_pointer_8(match_data : MatchData*) : LibC::SizeT*
fun get_ovector_count = pcre2_get_ovector_count_8(match_data : MatchData*) : UInt32

# void *private_malloc(Int, void *);
# void private_free(void *, void *);
fun general_context_create = pcre2_general_context_create_8(private_malloc : Void*, private_free : Void*, memory_data : Void*) : GeneralContext
fun general_context_create = pcre2_general_context_create_8(
private_malloc : LibC::SizeT, Void* -> Void,
private_free : Void*, Void* -> Void,
memory_data : Void*
) : GeneralContext
fun config = pcre2_config_8(what : UInt32, where : Void*) : Int
end
25 changes: 14 additions & 11 deletions src/regex/pcre2.cr
Original file line number Diff line number Diff line change
@@ -1,28 +1,32 @@
require "./lib_pcre2"
require "crystal/thread_local_value"

# :nodoc:
module Regex::PCRE2
@re : LibPCRE2::Code*
@jit : Bool

# :nodoc:
def initialize(*, _source @source : String, _options @options)
@re = PCRE2.compile(source, pcre2_options(options) | LibPCRE2::UTF | LibPCRE2::NO_UTF_CHECK | LibPCRE2::DUPNAMES | LibPCRE2::UCP) do |error_message|
raise ArgumentError.new(error_message)
end

jit_compile
@jit = jit_compile
end

private def jit_compile : Nil
private def jit_compile : Bool
ret = LibPCRE2.jit_compile(@re, LibPCRE2::JIT_COMPLETE)
if ret < 0
case error = LibPCRE2::Error.new(ret)
when .jit_badoption?
# okay
return false
else
raise ArgumentError.new("Regex JIT compile error: #{error}")
end
end
true
end

protected def self.compile(source, options)
Expand Down Expand Up @@ -134,26 +138,25 @@ module Regex::PCRE2
end

class_getter general_context do
LibPCRE2.general_context_create(->(size : LibC::Int, data : Void*) { GC.malloc(size) }.pointer, ->(pointer : Void*, data : Void*) { GC.free(pointer) }.pointer, nil)
LibPCRE2.general_context_create(->(size, _data) { GC.malloc(size) }, ->(pointer, _data) { GC.free(pointer) }, nil)
end

# Returns a JIT stack that's shared in the current thread.
#
# Only a single `match` function can run per thread at any given time, so there
# can't be any concurrent access to the JIT stack.
@[ThreadLocal]
class_getter jit_stack : LibPCRE2::JITStack do
jit_stack = LibPCRE2.jit_stack_create(32_768, 1_048_576, Regex::PCRE2.general_context)
if jit_stack.null?
raise "Error allocating JIT stack"
@@jit_stack = Crystal::ThreadLocalValue(LibPCRE2::JITStack).new

def self.jit_stack
@@jit_stack.get do
LibPCRE2.jit_stack_create(32_768, 1_048_576, general_context) || raise "Error allocating JIT stack"
end
jit_stack
end

private def match_data(str, byte_index, options)
match_data = LibPCRE2.match_data_create_from_pattern(@re, Regex::PCRE2.general_context)
match_context = LibPCRE2.match_context_create(nil)
LibPCRE2.jit_stack_assign(match_context, nil, Regex::PCRE2.jit_stack.as(Void*))
LibPCRE2.jit_stack_assign(match_context, nil, Regex::PCRE2.jit_stack.as(Void*)) if @jit
match_count = LibPCRE2.match(@re, str, str.bytesize, byte_index, pcre2_options(options) | LibPCRE2::NO_UTF_CHECK, match_data, match_context)

if match_count < 0
Expand All @@ -176,7 +179,7 @@ module Regex::PCRE2

module MatchData
# :nodoc:
def initialize(@regex : Regex, @code : LibPCRE2::Code*, @string : String, @pos : Int32, @ovector : UInt64*, @group_size : Int32)
def initialize(@regex : Regex, @code : LibPCRE2::Code*, @string : String, @pos : Int32, @ovector : LibC::SizeT*, @group_size : Int32)
end

private def byte_range(n, &)
Expand Down

0 comments on commit c40f71c

Please sign in to comment.