Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Don't use Floats to Parse Times #578

Merged
merged 1 commit into from
Oct 7, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 1 addition & 8 deletions src/timing/formatter/segment_time.rs
Original file line number Diff line number Diff line change
Expand Up @@ -106,12 +106,5 @@ impl Display for Inner {
fn test() {
let time = "4:20.69".parse::<TimeSpan>().unwrap();
let formatted = SegmentTime::new().format(time).to_string();
// FIXME: The parser should use integer parsing as well, and then this
// doesn't need to specialize bad floating point behavior anymore.
assert!(
// Modern processors
formatted == "4:20.69" ||
// Old x86 processors are apparently less precise
formatted == "4:20.68"
);
assert_eq!(formatted, "4:20.69");
}
112 changes: 78 additions & 34 deletions src/timing/time_span.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
use crate::platform::{prelude::*, Duration};
use crate::{
platform::{prelude::*, Duration},
util::ascii_char::AsciiChar,
};
use core::{
num::{ParseFloatError, ParseIntError},
num::ParseIntError,
ops::{Add, AddAssign, Neg, Sub, SubAssign},
str::FromStr,
};
Expand Down Expand Up @@ -73,15 +76,17 @@ impl TimeSpan {
pub enum ParseError {
/// An empty string is not a valid Time Span.
Empty,
/// The seconds need to be a finite number.
Finite,
/// Couldn't parse the seconds.
Seconds {
/// The time is too large to be represented.
Overflow,
/// The fractional part contains characters that are not digits.
FractionDigits,
/// Couldn't parse the fractional part.
Fraction {
/// The underlying error.
source: ParseFloatError,
source: ParseIntError,
},
/// Couldn't parse the minutes or hours.
MinutesOrHours {
/// Couldn't parse the seconds, minutes, or hours.
Time {
/// The underlying error.
source: ParseIntError,
},
Expand All @@ -94,27 +99,50 @@ impl FromStr for TimeSpan {
// It's faster to use `strip_prefix` with char literals if it's an ASCII
// char, otherwise prefer using string literals.
#[allow(clippy::single_char_pattern)]
let factor =
let negate =
if let Some(remainder) = text.strip_prefix('-').or_else(|| text.strip_prefix("−")) {
text = remainder;
-1.0
true
} else {
1.0
false
};

let mut pieces = text.split(':');
let (seconds_text, nanos) =
if let Some((seconds, mut nanos)) = AsciiChar::DOT.split_once(text) {
if nanos.len() > 9 {
nanos = nanos.get(..9).context(FractionDigits)?;
}
(
seconds,
nanos.parse::<u32>().context(Fraction)? * 10_u32.pow(9 - nanos.len() as u32),
)
} else {
(text, 0u32)
};

let last = pieces.next_back().context(Empty)?;
let seconds = last.parse::<f64>().context(Seconds)?;
ensure!(!seconds_text.is_empty(), Empty);

let mut seconds = 0u64;

for split in AsciiChar::COLON.split_iter(seconds_text) {
seconds = seconds
.checked_mul(60)
.context(Overflow)?
.checked_add(split.parse::<u64>().context(Time)?)
.context(Overflow)?;
}

ensure!(seconds.is_finite() && seconds >= 0.0, Finite);
let (mut seconds, mut nanos) = (
i64::try_from(seconds).ok().context(Overflow)?,
i32::try_from(nanos).ok().context(Overflow)?,
);

let mut minutes = 0.0;
for split in pieces {
minutes = minutes * 60.0 + split.parse::<u32>().context(MinutesOrHours)? as f64;
if negate {
seconds = -seconds;
nanos = -nanos;
}

Ok(TimeSpan::from_seconds(factor * (seconds + minutes * 60.0)))
Ok(Duration::new(seconds, nanos).into())
}
}

Expand Down Expand Up @@ -205,19 +233,35 @@ mod tests {

#[test]
fn parsing() {
"-12:37:30.12".parse::<TimeSpan>().unwrap();
"-37:30.12".parse::<TimeSpan>().unwrap();
"-30.12".parse::<TimeSpan>().unwrap();
"-10:30".parse::<TimeSpan>().unwrap();
"-30".parse::<TimeSpan>().unwrap();
"-100".parse::<TimeSpan>().unwrap();
"--30".parse::<TimeSpan>().unwrap_err();
"-".parse::<TimeSpan>().unwrap_err();
"".parse::<TimeSpan>().unwrap_err();
"-10:-30".parse::<TimeSpan>().unwrap_err();
"10:-30".parse::<TimeSpan>().unwrap_err();
"10.5:30.5".parse::<TimeSpan>().unwrap_err();
"NaN".parse::<TimeSpan>().unwrap_err();
"Inf".parse::<TimeSpan>().unwrap_err();
TimeSpan::from_str("-12:37:30.12").unwrap();
TimeSpan::from_str("-37:30.12").unwrap();
TimeSpan::from_str("-30.12").unwrap();
TimeSpan::from_str("-10:30").unwrap();
TimeSpan::from_str("-30").unwrap();
TimeSpan::from_str("-100").unwrap();
TimeSpan::from_str("--30").unwrap_err();
TimeSpan::from_str("-").unwrap_err();
TimeSpan::from_str("").unwrap_err();
TimeSpan::from_str("-10:-30").unwrap_err();
TimeSpan::from_str("10:-30").unwrap_err();
TimeSpan::from_str("10.5:30.5").unwrap_err();
TimeSpan::from_str("NaN").unwrap_err();
TimeSpan::from_str("Inf").unwrap_err();
assert!(matches!(
TimeSpan::from_str("1:1:1:1:1:1:1:1:1:1:1:1:1:1:1:1:1:1:1:1:1:1:1:1:1:1:1:1:1"),
Err(ParseError::Overflow),
));
assert_eq!(
TimeSpan::from_str("10.123456789")
.unwrap()
.to_seconds_and_subsec_nanoseconds(),
(10, 123456789)
);
assert_eq!(
TimeSpan::from_str("10.0987654321987654321")
.unwrap()
.to_seconds_and_subsec_nanoseconds(),
(10, 98765432)
);
}
}
21 changes: 21 additions & 0 deletions src/util/xml/ascii_char.rs → src/util/ascii_char.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
use core::iter;

#[derive(Copy, Clone, PartialEq, Eq)]
#[repr(transparent)]
pub struct AsciiChar(u8);
Expand All @@ -14,6 +16,8 @@ impl AsciiChar {
pub const SINGLE_QUOTE: Self = Self::new(b'\'');
pub const DOUBLE_QUOTE: Self = Self::new(b'"');
pub const EQUALITY_SIGN: Self = Self::new(b'=');
pub const COLON: Self = Self::new(b':');
pub const DOT: Self = Self::new(b'.');

pub const fn new(c: u8) -> Self {
if c > 127 {
Expand Down Expand Up @@ -41,4 +45,21 @@ impl AsciiChar {
// to be valid UTF-8.
unsafe { Some((text.get_unchecked(..pos), text.get_unchecked(pos + 1..))) }
}

pub fn split_iter(self, text: &str) -> impl Iterator<Item = &str> {
let mut slot = Some(text);
iter::from_fn(move || {
let rem = slot?;
Some(match self.split_once(rem) {
Some((before, after)) => {
slot = Some(after);
before
}
None => {
slot = None;
rem
}
})
})
}
}
4 changes: 2 additions & 2 deletions src/util/xml/ascii_set.rs → src/util/ascii_set.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use super::{ascii_char::AsciiChar, trim_end};
use super::ascii_char::AsciiChar;

#[repr(transparent)]
pub struct AsciiSet([bool; 256]);
Expand Down Expand Up @@ -78,7 +78,7 @@ impl AsciiSet {
// position is found by the find_not method. Also since we only
// skipped ASCII characters, splitting the string at the position
// will not result in a string that is not valid UTF-8.
trim_end(unsafe { text.get_unchecked(pos..) })
self.trim_end(unsafe { text.get_unchecked(pos..) })
} else {
""
}
Expand Down
2 changes: 2 additions & 0 deletions src/util/mod.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
//! Various utilities used in this crate.

pub(crate) mod ascii_char;
pub(crate) mod ascii_set;
pub(crate) mod byte_parsing;
mod clear_vec;
pub(crate) mod not_nan;
Expand Down
18 changes: 6 additions & 12 deletions src/util/xml/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,6 @@ use core::{
iter, str,
};

use self::ascii_char::AsciiChar;

mod ascii_char;
mod ascii_set;
pub mod helper;
mod reader;
mod writer;
Expand All @@ -18,6 +14,8 @@ pub use self::{
writer::{AttributeWriter, DisplayValue, Value, Writer, NO_ATTRIBUTES},
};

use super::{ascii_char::AsciiChar, ascii_set::AsciiSet};

#[derive(Copy, Clone)]
pub struct Tag<'a>(&'a str);

Expand Down Expand Up @@ -64,7 +62,7 @@ impl<'a> Attributes<'a> {
iter::from_fn(move || {
rem = trim_start(rem);
let (key, space_maybe, after) =
ascii_set::AsciiSet::EQUALITY_OR_WHITE_SPACE.split_three_way(rem)?;
AsciiSet::EQUALITY_OR_WHITE_SPACE.split_three_way(rem)?;
rem = after;
if space_maybe.get() != b'=' {
rem = trim_start(rem);
Expand Down Expand Up @@ -191,19 +189,15 @@ impl<'a> Text<'a> {
}

fn split_whitespace(rem: &str) -> Option<(&str, &str)> {
ascii_set::AsciiSet::WHITE_SPACE.split(rem)
AsciiSet::WHITE_SPACE.split(rem)
}

fn trim(rem: &str) -> &str {
ascii_set::AsciiSet::WHITE_SPACE.trim(rem)
AsciiSet::WHITE_SPACE.trim(rem)
}

fn trim_start(rem: &str) -> &str {
ascii_set::AsciiSet::WHITE_SPACE.trim_start(rem)
}

fn trim_end(rem: &str) -> &str {
ascii_set::AsciiSet::WHITE_SPACE.trim_end(rem)
AsciiSet::WHITE_SPACE.trim_start(rem)
}

fn parse_hexadecimal(bytes: &str) -> char {
Expand Down
4 changes: 3 additions & 1 deletion src/util/xml/reader.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
use super::{ascii_char::AsciiChar, trim, Tag, TagName, Text};
use crate::util::ascii_char::AsciiChar;

use super::{trim, Tag, TagName, Text};

enum TagState<'a> {
Closed,
Expand Down
8 changes: 4 additions & 4 deletions src/util/xml/writer.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
use core::fmt::{self, Write};

use super::{ascii_char::AsciiChar, ascii_set, Text};
use crate::util::{ascii_char::AsciiChar, ascii_set::AsciiSet};

use super::Text;

pub struct Writer<T> {
sink: T,
Expand Down Expand Up @@ -173,9 +175,7 @@ pub trait Value {

impl Value for &str {
fn write_escaped<T: fmt::Write>(mut self, sink: &mut T) -> fmt::Result {
while let Some((before, char, rem)) =
ascii_set::AsciiSet::CHARACTERS_TO_ESCAPE.split_three_way(self)
{
while let Some((before, char, rem)) = AsciiSet::CHARACTERS_TO_ESCAPE.split_three_way(self) {
self = rem;
sink.write_str(before)?;
sink.write_str(match char {
Expand Down