Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[OPA-1889] Fix some of the remaining grok discrepancies #906

Merged
merged 22 commits into from
Jun 26, 2024
Merged
Show file tree
Hide file tree
Changes from 14 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions src/datadog/grok/filters/keyvalue.rs
Original file line number Diff line number Diff line change
Expand Up @@ -207,7 +207,7 @@ fn parse_line<'a>(
}

/// Parses the field_delimiter between the key/value pairs, ignoring surrounding spaces
fn parse_field_delimiter<'a>(field_delimiter: &'a str) -> impl Fn(&'a str) -> SResult<&'a str> {
fn parse_field_delimiter(field_delimiter: &str) -> impl Fn(&str) -> SResult<&str> + '_ {
vladimir-dd marked this conversation as resolved.
Show resolved Hide resolved
move |input| {
if field_delimiter == " " {
space1(input)
Expand Down Expand Up @@ -281,7 +281,7 @@ fn parse_quoted<'a>(

/// A delimited value is all the text until our field_delimiter, or the rest of the string if it is the last value in the line,
#[inline]
fn parse_delimited<'a>(field_delimiter: &'a str) -> impl Fn(&'a str) -> SResult<&'a str> {
fn parse_delimited(field_delimiter: &str) -> impl Fn(&str) -> SResult<&str> + '_ {
move |input| map(alt((take_until(field_delimiter), rest)), |s: &str| s.trim())(input)
}

Expand Down Expand Up @@ -356,7 +356,7 @@ fn parse_value<'a>(
}
}

fn parse_number<'a>(field_delimiter: &'a str) -> impl Fn(&'a str) -> SResult<Value> {
fn parse_number(field_delimiter: &str) -> impl Fn(&str) -> SResult<Value> + '_ {
move |input| {
map(
terminated(
Expand Down
1 change: 1 addition & 0 deletions src/datadog/grok/grok_filter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -165,6 +165,7 @@ pub fn apply_filter(value: &Value, filter: &GrokFilter) -> Result<Value, GrokRun
)),
},
GrokFilter::Scale(scale_factor) => {
let scale_factor = scale_factor * 1000_f64 / 1000_f64;
let v = match value {
Value::Integer(v) => Ok(Value::Float(
NotNan::new((*v as f64) * scale_factor).expect("NaN"),
Expand Down
102 changes: 75 additions & 27 deletions src/datadog/grok/matchers/date.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
use std::fmt::Formatter;

use crate::value::Value;
use chrono::{DateTime, FixedOffset, NaiveDate, NaiveDateTime, NaiveTime, Offset, TimeZone, Utc};
use chrono::{
DateTime, Datelike, FixedOffset, NaiveDate, NaiveDateTime, NaiveTime, Offset, TimeZone, Utc,
};
use chrono_tz::{Tz, UTC};
use peeking_take_while::PeekableExt;
use regex::Regex;
Expand Down Expand Up @@ -241,76 +243,122 @@ pub fn time_format_to_regex(format: &str, with_captures: bool) -> Result<RegexRe
}

pub fn apply_date_filter(value: &Value, filter: &DateFilter) -> Result<Value, GrokRuntimeError> {
// we might have to correct the strp format
let mut strp_format = filter.strp_format.clone();

let year_is_missing =
!filter.original_format.contains('y') && !filter.original_format.contains('Y');
let month_is_missing = !filter.original_format.contains('M');
let day_is_missing = !strp_format.contains('d');

if day_is_missing {
strp_format = format!("%-d {}", strp_format);
}
if month_is_missing {
strp_format = format!("%-m {}", strp_format);
}
if year_is_missing {
strp_format = format!("%Y {}", strp_format);
}
vladimir-dd marked this conversation as resolved.
Show resolved Hide resolved

let timestamp = match value {
Value::Bytes(bytes) => {
let mut value = String::from_utf8_lossy(bytes).into_owned();
let original_value = String::from_utf8_lossy(bytes).into_owned();
vladimir-dd marked this conversation as resolved.
Show resolved Hide resolved

// use this value to parse the date with the strptime format -
// it may be modified if we have to correct the strp format(e.g. add year)
let mut value_to_parse = original_value.clone();

// append day if it's not present in the value
if day_is_missing {
value_to_parse = format!("{} {}", Utc::now().day(), value_to_parse);
}

// append month if it's not present in the value
if month_is_missing {
value_to_parse = format!("{} {}", Utc::now().month(), value_to_parse);
}

// append year if it's not present in the value
if year_is_missing {
value_to_parse = format!("{} {}", Utc::now().year(), value_to_parse);
}

// Ideally this Z should be quoted in the pattern, but DataDog supports this as a special case:
// yyyy-MM-dd'T'HH:mm:ss.SSSZ - e.g. 2016-09-02T15:02:29.648Z
if value.ends_with('Z') && filter.original_format.ends_with('Z') {
value.pop(); // drop Z
value.push_str("+0000");
if value_to_parse.ends_with('Z') && filter.original_format.ends_with('Z') {
value_to_parse.pop(); // drop Z
value_to_parse.push_str("+0000");
}

if let Some(tz) = filter
.regex
.captures(&value)
.captures(&original_value)
.and_then(|caps| caps.name("tz"))
{
let tz = tz.as_str();
let tz: Tz = tz.parse().map_err(|error| {
warn!(message = "Error parsing tz", tz = %tz, % error);
GrokRuntimeError::FailedToApplyFilter(filter.to_string(), value.to_string())
GrokRuntimeError::FailedToApplyFilter(
filter.to_string(),
original_value.to_string(),
)
})?;
replace_sec_fraction_with_dot(filter, &mut value);
let naive_date = NaiveDateTime::parse_from_str(&value, &filter.strp_format).map_err(|error|
replace_sec_fraction_with_dot(filter, &mut value_to_parse);
let naive_date = NaiveDateTime::parse_from_str(&value_to_parse, &strp_format).map_err(|error|
{
warn!(message = "Error parsing date", value = %value, format = %filter.strp_format, % error);
warn!(message = "Error parsing date", value = %original_value, format = %strp_format, % error);
GrokRuntimeError::FailedToApplyFilter(
filter.to_string(),
value.to_string(),
original_value.to_string(),
)
})?;
let dt = tz
.from_local_datetime(&naive_date)
.single()
.ok_or_else(|| {
GrokRuntimeError::FailedToApplyFilter(filter.to_string(), value.to_string())
GrokRuntimeError::FailedToApplyFilter(
filter.to_string(),
original_value.to_string(),
)
})?;
Ok(Utc.from_utc_datetime(&dt.naive_utc()).timestamp_millis())
} else {
replace_sec_fraction_with_dot(filter, &mut value);
replace_sec_fraction_with_dot(filter, &mut value_to_parse);
if filter.tz_aware {
// parse as a tz-aware complete date/time
let timestamp =
DateTime::parse_from_str(&value, &filter.strp_format).map_err(|error| {
warn!(message = "Error parsing date", date = %value, % error);
let timestamp = DateTime::parse_from_str(&value_to_parse, &strp_format)
.map_err(|error| {
warn!(message = "Error parsing date", date = %original_value, % error);
GrokRuntimeError::FailedToApplyFilter(
filter.to_string(),
value.to_string(),
original_value.to_string(),
)
})?;
Ok(timestamp.to_utc().timestamp_millis())
} else if let Ok(dt) = NaiveDateTime::parse_from_str(&value, &filter.strp_format) {
} else if let Ok(dt) = NaiveDateTime::parse_from_str(&value_to_parse, &strp_format)
{
// try parsing as a naive datetime
if let Some(tz) = &filter.target_tz {
let tzs = parse_timezone(tz).map_err(|error| {
warn!(message = "Error parsing tz", tz = %tz, % error);
GrokRuntimeError::FailedToApplyFilter(
filter.to_string(),
value.to_string(),
original_value.to_string(),
)
})?;
let dt = tzs.from_local_datetime(&dt).single().ok_or_else(|| {
warn!(message = "Error parsing date", date = %value);
warn!(message = "Error parsing date", date = %original_value);
GrokRuntimeError::FailedToApplyFilter(
filter.to_string(),
value.to_string(),
original_value.to_string(),
)
})?;
Ok(dt.to_utc().timestamp_millis())
} else {
Ok(dt.and_utc().timestamp_millis())
}
} else if let Ok(nt) = NaiveTime::parse_from_str(&value, &filter.strp_format) {
} else if let Ok(nt) = NaiveTime::parse_from_str(&value_to_parse, &strp_format) {
// try parsing as a naive time
Ok(NaiveDateTime::new(
NaiveDate::from_ymd_opt(1970, 1, 1).expect("invalid date"),
Expand All @@ -320,12 +368,12 @@ pub fn apply_date_filter(value: &Value, filter: &DateFilter) -> Result<Value, Gr
.timestamp_millis())
} else {
// try parsing as a naive date
let nd = NaiveDate::parse_from_str(&value, &filter.strp_format).map_err(
let nd = NaiveDate::parse_from_str(&value_to_parse, &strp_format).map_err(
|error| {
warn!(message = "Error parsing date", date = %value, % error);
warn!(message = "Error parsing date", date = %original_value, % error);
GrokRuntimeError::FailedToApplyFilter(
filter.to_string(),
value.to_string(),
original_value.to_string(),
)
},
)?;
Expand All @@ -336,10 +384,10 @@ pub fn apply_date_filter(value: &Value, filter: &DateFilter) -> Result<Value, Gr
))
.single()
.ok_or_else(|| {
warn!(message = "Error parsing date", date = %value);
warn!(message = "Error parsing date", date = %original_value);
GrokRuntimeError::FailedToApplyFilter(
filter.to_string(),
value.to_string(),
original_value.to_string(),
)
})?;
Ok(Utc
Expand Down
Loading
Loading