Skip to content

Commit

Permalink
feat: expose parse_regex/parse_hex_string in boreal-parser
Browse files Browse the repository at this point in the history
This makes it much easier to parse those in tests used in boreal.
  • Loading branch information
vthib committed Jul 30, 2023
1 parent 22202f8 commit d6a7afc
Show file tree
Hide file tree
Showing 3 changed files with 62 additions and 56 deletions.
48 changes: 35 additions & 13 deletions boreal-parser/src/hex_string.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,9 @@ use super::error::{Error, ErrorKind};
use super::nom_recipes::{map_res, rtrim};
use super::types::{Input, ParseResult};

const JUMP_LIMIT_IN_ALTERNATIVES: u32 = 200;
const MAX_HEX_TOKEN_RECURSION: usize = 10;

/// A token in an hex string.
#[derive(Clone, Debug, PartialEq)]
pub enum Token {
Expand Down Expand Up @@ -56,8 +59,32 @@ pub struct Jump {
pub to: Option<u32>,
}

const JUMP_LIMIT_IN_ALTERNATIVES: u32 = 200;
const MAX_HEX_TOKEN_RECURSION: usize = 10;
/// Parse a hex string.
///
/// The input is expected to look like `{ AB .. }`.
///
/// # Errors
///
/// Returns an error if the parsing fails.
pub fn parse_hex_string(input: &str) -> Result<Vec<Token>, Error> {
use nom::Finish;

let input = Input::new(input);
let (_, tokens) = hex_string(input).finish()?;

Ok(tokens)
}

/// Parse an hex string.
///
/// This looks like `{ AB .. }`.
///
/// This is equivalent to the `hex_string` rule in `hex_grammar.y` in libyara.
pub(crate) fn hex_string(input: Input) -> ParseResult<Vec<Token>> {
let (input, _) = rtrim(char('{'))(input)?;

cut(terminated(|input| tokens(input, false), rtrim(char('}'))))(input)
}

/// Parse an hex-digit, and return its value in [0-15].
fn hex_digit(mut input: Input) -> ParseResult<u8> {
Expand Down Expand Up @@ -298,17 +325,6 @@ fn tokens(mut input: Input, in_alternatives: bool) -> ParseResult<Vec<Token>> {
}
}

/// Parse an hex string.
///
/// This looks like `{ AB .. }`.
///
/// This is equivalent to the `hex_string` rule in `hex_grammar.y` in libyara.
pub(crate) fn hex_string(input: Input) -> ParseResult<Vec<Token>> {
let (input, _) = rtrim(char('{'))(input)?;

cut(terminated(|input| tokens(input, false), rtrim(char('}'))))(input)
}

#[cfg(test)]
mod tests {
use super::*;
Expand Down Expand Up @@ -617,6 +633,12 @@ mod tests {
assert_eq!(input.inner_recursion_counter, 0);
}

#[test]
fn test_parse_hex_string() {
assert!(parse_hex_string(r"{ AB }").is_ok());
assert!(parse_hex_string(r"AB").is_err());
}

#[test]
fn test_public_types() {
test_public_type(Token::Byte(3));
Expand Down
24 changes: 23 additions & 1 deletion boreal-parser/src/regex.rs
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,22 @@ pub enum AssertionKind {
NonWordBoundary,
}

/// Parse a regex.
///
/// The input is expected to look like `/<regex>/<modifiers>`.
///
/// # Errors
///
/// Returns an error if the parsing fails.
pub fn parse_regex(input: &str) -> Result<Regex, Error> {
use nom::Finish;

let input = Input::new(input);
let (_, res) = regex(input).finish()?;

Ok(res)
}

/// Parse a regular expression.
///
/// Similar to the _REGEX_ lexical pattern in libyara. but the parsing of the AST is done
Expand Down Expand Up @@ -535,7 +551,7 @@ mod tests {
use crate::test_helpers::{parse, parse_err, parse_err_type, test_public_type};

#[test]
fn test_parse_regex() {
fn test_parse() {
parse(
regex,
"/a/i",
Expand Down Expand Up @@ -1198,6 +1214,12 @@ mod tests {
assert_eq!(input.inner_recursion_counter, 0);
}

#[test]
fn test_parse_regex() {
assert!(parse_regex(r"/a{2}/").is_ok());
assert!(parse_regex(r"a{2}/").is_err());
}

#[test]
fn test_public_types() {
test_public_type(regex(Input::new(r"/a{2}[az]\b\s|.+$/")).unwrap());
Expand Down
46 changes: 4 additions & 42 deletions boreal/src/test_helpers.rs
Original file line number Diff line number Diff line change
@@ -1,52 +1,14 @@
use boreal_parser::hex_string::Token;
use boreal_parser::{parse, Regex, VariableDeclarationValue};
use boreal_parser::hex_string::parse_hex_string;
use boreal_parser::regex::parse_regex;

use crate::regex::Hir;

#[track_caller]
pub fn expr_to_hir(expr: &str) -> Hir {
if expr.starts_with('{') {
parse_hex_string(expr).into()
parse_hex_string(expr).unwrap().into()
} else {
parse_regex_string(expr).ast.into()
}
}

#[track_caller]
fn parse_hex_string(hex_string: &str) -> Vec<Token> {
let rule_str = format!("rule a {{ strings: $a = {hex_string} condition: $a }}");
let mut file = parse(&rule_str).unwrap();
let mut rule = file
.components
.pop()
.map(|v| match v {
boreal_parser::YaraFileComponent::Rule(v) => v,
_ => panic!(),
})
.unwrap();
let var = rule.variables.pop().unwrap();
match var.value {
VariableDeclarationValue::HexString(s) => s,
_ => panic!(),
}
}

#[track_caller]
fn parse_regex_string(hex_string: &str) -> Regex {
let rule_str = format!("rule a {{ strings: $a = /{hex_string}/ condition: $a }}");
let mut file = parse(&rule_str).unwrap();
let mut rule = file
.components
.pop()
.map(|v| match v {
boreal_parser::YaraFileComponent::Rule(v) => v,
_ => panic!(),
})
.unwrap();
let var = rule.variables.pop().unwrap();
match var.value {
VariableDeclarationValue::Regex(s) => s,
_ => panic!(),
parse_regex(&format!("/{expr}/")).unwrap().ast.into()
}
}

Expand Down

0 comments on commit d6a7afc

Please sign in to comment.