diff --git a/utils/ixdtf/README.md b/utils/ixdtf/README.md index c1ba21ad8af..c631c64b721 100644 --- a/utils/ixdtf/README.md +++ b/utils/ixdtf/README.md @@ -145,21 +145,13 @@ agnostic regarding the ambiguity caused by the criticality of an unknown key. Th be provided to the user to handle the unknown key's critical flag as they see fit. ```rust -use ixdtf::parsers::IxdtfParser; +use ixdtf::{parsers::IxdtfParser, ParserError}; let example_one = "2024-03-02T08:48:00-05:00[u-ca=iso8601][!answer-to-universe=fortytwo]"; -let mut ixdtf = IxdtfParser::new(example_one); - -let result = ixdtf.parse().unwrap(); - -let annotation = &result.annotations[0]; +let result = IxdtfParser::new(example_one).parse(); -// While an unknown annotation should not be critical, it is up to the user -// to act on that error. -assert!(annotation.critical); -assert_eq!(annotation.key, "answer-to-universe"); -assert_eq!(annotation.value, "fortytwo"); +assert_eq!(result, Err(ParserError::UnrecognizedCritical)); ``` (4) belongs to group (b) and shows an ambiguous Time Zone caused by a misalignment diff --git a/utils/ixdtf/src/error.rs b/utils/ixdtf/src/error.rs index 71427be6320..f15cab726b8 100644 --- a/utils/ixdtf/src/error.rs +++ b/utils/ixdtf/src/error.rs @@ -75,6 +75,8 @@ pub enum ParserError { // Duplicate calendar with critical. #[displaydoc("Duplicate calendars cannot be provided when one is critical.")] CriticalDuplicateCalendar, + #[displaydoc("Unrecognized annoation is marked as critical.")] + UnrecognizedCritical, // Time Zone Errors #[displaydoc("Invalid time zone leading character.")] diff --git a/utils/ixdtf/src/lib.rs b/utils/ixdtf/src/lib.rs index 708f7ddc62a..fa1fe11700c 100644 --- a/utils/ixdtf/src/lib.rs +++ b/utils/ixdtf/src/lib.rs @@ -145,21 +145,13 @@ //! be provided to the user to handle the unknown key's critical flag as they see fit. //! //! ```rust -//! use ixdtf::parsers::IxdtfParser; +//! use ixdtf::{parsers::IxdtfParser, ParserError}; //! //! let example_one = "2024-03-02T08:48:00-05:00[u-ca=iso8601][!answer-to-universe=fortytwo]"; //! -//! let mut ixdtf = IxdtfParser::new(example_one); -//! -//! let result = ixdtf.parse().unwrap(); -//! -//! let annotation = &result.annotations[0]; +//! let result = IxdtfParser::new(example_one).parse(); //! -//! // While an unknown annotation should not be critical, it is up to the user -//! // to act on that error. -//! assert!(annotation.critical); -//! assert_eq!(annotation.key, "answer-to-universe"); -//! assert_eq!(annotation.value, "fortytwo"); +//! assert_eq!(result, Err(ParserError::UnrecognizedCritical)); //! ``` //! //! (4) belongs to group (b) and shows an ambiguous Time Zone caused by a misalignment diff --git a/utils/ixdtf/src/parsers/annotations.rs b/utils/ixdtf/src/parsers/annotations.rs index 27d24547909..124a9344afe 100644 --- a/utils/ixdtf/src/parsers/annotations.rs +++ b/utils/ixdtf/src/parsers/annotations.rs @@ -18,17 +18,17 @@ use crate::{ ParserError, ParserResult, }; -use alloc::vec::Vec; - /// Strictly a parsing intermediary for the checking the common annotation backing. pub(crate) struct AnnotationSet<'a> { pub(crate) tz: Option>, pub(crate) calendar: Option<&'a str>, - pub(crate) annotations: Vec>, } /// Parse a `TimeZoneAnnotation` `Annotations` set -pub(crate) fn parse_annotation_set<'a>(cursor: &mut Cursor<'a>) -> ParserResult> { +pub(crate) fn parse_annotation_set<'a>( + cursor: &mut Cursor<'a>, + handler: impl FnMut(Annotation<'a>) -> Option>, +) -> ParserResult> { // Parse the first annotation. let tz_annotation = timezone::parse_ambiguous_tz_annotation(cursor)?; @@ -36,48 +36,58 @@ pub(crate) fn parse_annotation_set<'a>(cursor: &mut Cursor<'a>) -> ParserResult< let annotations = cursor.check_or(false, is_annotation_open); if annotations { - let annotations = parse_annotations(cursor)?; + let calendar = parse_annotations(cursor, handler)?; return Ok(AnnotationSet { tz: tz_annotation, - calendar: annotations.0, - annotations: annotations.1, + calendar, }); } Ok(AnnotationSet { tz: tz_annotation, calendar: None, - annotations: Vec::default(), }) } /// Parse any number of `KeyValueAnnotation`s pub(crate) fn parse_annotations<'a>( cursor: &mut Cursor<'a>, -) -> ParserResult<(Option<&'a str>, Vec>)> { - let mut annotations = Vec::default(); - let mut calendar = None; - let mut calendar_crit = false; + mut handler: impl FnMut(Annotation<'a>) -> Option>, +) -> ParserResult> { + let mut calendar: Option> = None; while cursor.check_or(false, is_annotation_open) { - let kv = parse_kv_annotation(cursor)?; - - if kv.key == "u-ca" { - if calendar.is_none() { - calendar = Some(kv.value); - calendar_crit = kv.critical; - continue; + let annotation = handler(parse_kv_annotation(cursor)?); + + match annotation { + // Check if the key is the registered key "u-ca". + Some(kv) if kv.key == "u-ca" => { + // Check the calendar + match calendar { + Some(calendar) + // if calendars do not match and one of them is critical + if calendar.value != kv.value && (calendar.critical || kv.critical) => + { + return Err(ParserError::CriticalDuplicateCalendar) + } + // If there is not yet a calendar, save it. + None => { + calendar = Some(kv); + } + _ => {} + } } - - if calendar_crit || kv.critical { - return Err(ParserError::CriticalDuplicateCalendar); + Some(unknown_kv) => { + // Throw an error on any unrecognized annotations that are marked as critical. + if unknown_kv.critical { + return Err(ParserError::UnrecognizedCritical); + } } + None => {} } - - annotations.push(kv); } - Ok((calendar, annotations)) + Ok(calendar.map(|a| a.value)) } /// Parse an annotation with an `AnnotationKey`=`AnnotationValue` pair. diff --git a/utils/ixdtf/src/parsers/datetime.rs b/utils/ixdtf/src/parsers/datetime.rs index c619fd1174c..5617d7a7f12 100644 --- a/utils/ixdtf/src/parsers/datetime.rs +++ b/utils/ixdtf/src/parsers/datetime.rs @@ -18,9 +18,7 @@ use crate::{ ParserError, ParserResult, }; -use alloc::vec::Vec; - -use super::records::UTCOffsetRecord; +use super::records::{Annotation, UTCOffsetRecord}; #[derive(Debug, Default, Clone)] /// A `DateTime` Parse Node that contains the date, time, and offset info. @@ -43,6 +41,7 @@ pub(crate) struct DateTimeRecord { /// [instant]: https://tc39.es/proposal-temporal/#prod-TemporalInstantString pub(crate) fn parse_annotated_date_time<'a>( cursor: &mut Cursor<'a>, + handler: impl FnMut(Annotation<'a>) -> Option>, ) -> ParserResult> { let date_time = parse_date_time(cursor)?; @@ -57,11 +56,10 @@ pub(crate) fn parse_annotated_date_time<'a>( offset: date_time.time_zone, tz: None, calendar: None, - annotations: Vec::default(), }); } - let annotation_set = annotations::parse_annotation_set(cursor)?; + let annotation_set = annotations::parse_annotation_set(cursor, handler)?; cursor.close()?; @@ -71,13 +69,13 @@ pub(crate) fn parse_annotated_date_time<'a>( offset: date_time.time_zone, tz: annotation_set.tz, calendar: annotation_set.calendar, - annotations: annotation_set.annotations, }) } /// Parses an AnnotatedMonthDay. pub(crate) fn parse_annotated_month_day<'a>( cursor: &mut Cursor<'a>, + handler: impl FnMut(Annotation<'a>) -> Option>, ) -> ParserResult> { let date = parse_month_day(cursor)?; @@ -90,11 +88,10 @@ pub(crate) fn parse_annotated_month_day<'a>( offset: None, tz: None, calendar: None, - annotations: Vec::default(), }); } - let annotation_set = annotations::parse_annotation_set(cursor)?; + let annotation_set = annotations::parse_annotation_set(cursor, handler)?; Ok(IxdtfParseRecord { date: Some(date), @@ -102,13 +99,13 @@ pub(crate) fn parse_annotated_month_day<'a>( offset: None, tz: annotation_set.tz, calendar: annotation_set.calendar, - annotations: annotation_set.annotations, }) } /// Parse an annotated YearMonth pub(crate) fn parse_annotated_year_month<'a>( cursor: &mut Cursor<'a>, + handler: impl FnMut(Annotation<'a>) -> Option>, ) -> ParserResult> { let year = parse_date_year(cursor)?; cursor.advance_if(cursor.check_or(false, is_hyphen)); @@ -129,11 +126,10 @@ pub(crate) fn parse_annotated_year_month<'a>( offset: None, tz: None, calendar: None, - annotations: Vec::default(), }); } - let annotation_set = annotations::parse_annotation_set(cursor)?; + let annotation_set = annotations::parse_annotation_set(cursor, handler)?; Ok(IxdtfParseRecord { date: Some(date), @@ -141,7 +137,6 @@ pub(crate) fn parse_annotated_year_month<'a>( offset: None, tz: annotation_set.tz, calendar: annotation_set.calendar, - annotations: annotation_set.annotations, }) } diff --git a/utils/ixdtf/src/parsers/mod.rs b/utils/ixdtf/src/parsers/mod.rs index f8f32775a17..a54aa668aeb 100644 --- a/utils/ixdtf/src/parsers/mod.rs +++ b/utils/ixdtf/src/parsers/mod.rs @@ -6,12 +6,12 @@ use crate::{ParserError, ParserResult}; -extern crate alloc; - #[cfg(feature = "duration")] use records::DurationParseRecord; use records::IxdtfParseRecord; +use self::records::Annotation; + pub mod records; mod annotations; @@ -62,22 +62,94 @@ impl<'a> IxdtfParser<'a> { /// /// [temporal-dt]: https://tc39.es/proposal-temporal/#prod-TemporalDateTimeString pub fn parse(&mut self) -> ParserResult> { - datetime::parse_annotated_date_time(&mut self.cursor) + self.parse_with_annotation_handler(Some) + } + + /// Parses the source as an annotated DateTime string with an Annotation handler. + /// + /// # Annotation Handling + /// + /// The annotation handler provides a parsed annotation to the callback and expects a return + /// of an annotation or None. `ixdtf` performs baseline annotation checks once the handler + /// returns. Returning None will ignore the standard checks for that annotation. + /// + /// Unless the user's application has a specific reason to bypass action on an annotation, such + /// as, not throwing an error on an unknown key's criticality or superceding a calendar based on + /// it's critical flag, it is recommended to return the annotation value. + pub fn parse_with_annotation_handler( + &mut self, + handler: impl FnMut(Annotation<'a>) -> Option>, + ) -> ParserResult> { + datetime::parse_annotated_date_time(&mut self.cursor, handler) } /// Parses the source as an annotated YearMonth string. pub fn parse_year_month(&mut self) -> ParserResult> { - datetime::parse_annotated_year_month(&mut self.cursor) + self.parse_year_month_with_annotation_handler(Some) + } + + /// Parses the source as an annotated YearMonth string with an Annotation handler. + /// + /// # Annotation Handling + /// + /// The annotation handler provides a parsed annotation to the callback and expects a return + /// of an annotation or None. `ixdtf` performs baseline annotation checks once the handler + /// returns. Returning None will ignore the standard checks for that annotation. + /// + /// Unless the user's application has a specific use case to bypass action on an annotation, such + /// as, not throwing an error on an unknown key's criticality or superceding a calendar based on + /// it's critical flag, it is recommended to return the annotation value. + pub fn parse_year_month_with_annotation_handler( + &mut self, + handler: impl FnMut(Annotation<'a>) -> Option>, + ) -> ParserResult> { + datetime::parse_annotated_year_month(&mut self.cursor, handler) } /// Parses the source as an annotated MonthDay string. pub fn parse_month_day(&mut self) -> ParserResult> { - datetime::parse_annotated_month_day(&mut self.cursor) + self.parse_month_day_with_annotation_handler(Some) + } + + /// Parses the source as an annotated MonthDay string with an Annotation handler. + /// + /// # Annotation Handling + /// + /// The annotation handler provides a parsed annotation to the callback and expects a return + /// of an annotation or None. `ixdtf` performs baseline annotation checks once the handler + /// returns. Returning None will ignore the standard checks for that annotation. + /// + /// Unless the user's application has a specific reason to bypass action on an annotation, such + /// as, not throwing an error on an unknown key's criticality or superceding a calendar based on + /// it's critical flag, it is recommended to return the annotation value. + pub fn parse_month_day_with_annotation_handler( + &mut self, + handler: impl FnMut(Annotation<'a>) -> Option>, + ) -> ParserResult> { + datetime::parse_annotated_month_day(&mut self.cursor, handler) } /// Parses the source as an annotated Time string. pub fn parse_time(&mut self) -> ParserResult> { - time::parse_annotated_time_record(&mut self.cursor) + self.parse_time_with_annotation_handler(Some) + } + + /// Parses the source as an annotated Time string with an Annotation handler. + /// + /// # Annotation Handling + /// + /// The annotation handler provides a parsed annotation to the callback and expects a return + /// of an annotation or None. `ixdtf` performs baseline annotation checks once the handler + /// returns. Returning None will ignore the standard checks for that annotation. + /// + /// Unless the user's application has a specific reason to bypass action on an annotation, such + /// as, not throwing an error on an unknown key's criticality or superceding a calendar based on + /// it's critical flag, it is recommended to return the annotation value. + pub fn parse_time_with_annotation_handler( + &mut self, + handler: impl FnMut(Annotation<'a>) -> Option>, + ) -> ParserResult> { + time::parse_annotated_time_record(&mut self.cursor, handler) } } diff --git a/utils/ixdtf/src/parsers/records.rs b/utils/ixdtf/src/parsers/records.rs index 72ca37b3068..656031d3862 100644 --- a/utils/ixdtf/src/parsers/records.rs +++ b/utils/ixdtf/src/parsers/records.rs @@ -4,8 +4,6 @@ //! The records that `ixdtf`'s contain the resulting values of parsing. -use alloc::vec::Vec; - /// An `IxdtfParseRecord` is an intermediary record returned by `IxdtfParser`. #[non_exhaustive] #[derive(Default, Debug, PartialEq)] @@ -20,8 +18,6 @@ pub struct IxdtfParseRecord<'a> { pub tz: Option>, /// The parsed calendar value. pub calendar: Option<&'a str>, - /// A collection of annotations provided on an IXDTF string. - pub annotations: Vec>, } #[non_exhaustive] diff --git a/utils/ixdtf/src/parsers/tests.rs b/utils/ixdtf/src/parsers/tests.rs index e05efa15fd9..76a2a5e356f 100644 --- a/utils/ixdtf/src/parsers/tests.rs +++ b/utils/ixdtf/src/parsers/tests.rs @@ -2,11 +2,15 @@ // called LICENSE at the top level of the ICU4X source tree // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). +extern crate alloc; use alloc::vec::Vec; use crate::{ parsers::{ - records::{DateRecord, IxdtfParseRecord, TimeRecord, TimeZoneAnnotation, TimeZoneRecord}, + records::{ + Annotation, DateRecord, IxdtfParseRecord, TimeRecord, TimeZoneAnnotation, + TimeZoneRecord, + }, IxdtfParser, }, ParserError, @@ -175,7 +179,7 @@ fn bad_extended_year() { fn good_annotations_date_time() { let mut basic = IxdtfParser::new("2020-11-08[!America/Argentina/ComodRivadavia][u-ca=iso8601][foo=bar]"); - let mut omitted = IxdtfParser::new("+0020201108[u-ca=iso8601][f-1a2b=a0sa-2l4s]"); + let mut omitted = IxdtfParser::new("+0020201108[!u-ca=iso8601][f-1a2b=a0sa-2l4s]"); let result = basic.parse().unwrap(); @@ -279,6 +283,121 @@ fn invalid_annotations() { Err(ParserError::InvalidAnnotation), "Invalid annotation parsing: \"{bad_value}\" should fail to parse." ); + + let bad_value = "2021-01-29 02:12:48+01:00:00[u-ca=iso8601][!foo=bar]"; + let err = IxdtfParser::new(bad_value).parse(); + assert_eq!( + err, + Err(ParserError::UnrecognizedCritical), + "Invalid annotation parsing: \"{bad_value}\" should fail to parse." + ); +} + +#[test] +fn invalid_calendar_annotations() { + let bad_value = "2021-01-29 02:12:48+01:00:00[!u-ca=iso8601][u-ca=japanese]"; + let err = IxdtfParser::new(bad_value).parse(); + assert_eq!( + err, + Err(ParserError::CriticalDuplicateCalendar), + "Invalid annotation parsing: \"{bad_value}\" should fail to parse." + ); + + let bad_value = "2021-01-29 02:12:48+01:00:00[u-ca=japanese][u-ca=iso8601][!u-ca=gregorian]"; + let err = IxdtfParser::new(bad_value).parse(); + assert_eq!( + err, + Err(ParserError::CriticalDuplicateCalendar), + "Invalid annotation parsing: \"{bad_value}\" should fail to parse." + ); +} + +#[test] +fn duplicate_same_calendar() { + let duplicate_calendars = [ + "2020-11-11[!u-ca=iso8601][u-ca=iso8601]", + "2020-11-11[u-ca=iso8601][!u-ca=iso8601]", + ]; + + for duplicate in duplicate_calendars { + let result = IxdtfParser::new(duplicate).parse().unwrap(); + let calendar = result.calendar.unwrap(); + assert_eq!( + calendar, "iso8601", + "Invalid Ixdtf parsing: \"{duplicate}\" should fail parsing." + ); + } +} + +#[test] +fn valid_calendar_annotations() { + let value = "2021-01-29 02:12:48+01:00:00[u-ca=japanese][u-ca=iso8601][u-ca=gregorian]"; + let mut annotations = Vec::default(); + let result = IxdtfParser::new(value) + .parse_with_annotation_handler(|annotation| { + annotations.push(annotation.clone()); + Some(annotation) + }) + .unwrap(); + assert_eq!( + result.calendar, + Some("japanese"), + "Valid annotation parsing: \"{value}\" should parse calendar as 'japanese'." + ); + + assert_eq!( + annotations[1], + Annotation { + critical: false, + key: "u-ca", + value: "iso8601" + }, + "Valid annotation parsing: \"{value}\" should parse first annotation as 'iso8601'." + ); + + assert_eq!( + annotations[2], + Annotation { + critical: false, + key: "u-ca", + value: "gregorian" + }, + "Valid annotation parsing: \"{value}\" should parse second annotation as 'gregorian'." + ); + + let value = "2021-01-29 02:12:48+01:00:00[u-ca=gregorian][u-ca=iso8601][u-ca=japanese]"; + let mut annotations = Vec::default(); + let result = IxdtfParser::new(value) + .parse_with_annotation_handler(|annotation| { + annotations.push(annotation.clone()); + Some(annotation) + }) + .unwrap(); + assert_eq!( + result.calendar, + Some("gregorian"), + "Valid annotation parsing: \"{value}\" should parse calendar as 'gregorian'." + ); + + assert_eq!( + annotations[1], + Annotation { + critical: false, + key: "u-ca", + value: "iso8601" + }, + "Valid annotation parsing: \"{value}\" should parse first annotation as 'iso8601'." + ); + + assert_eq!( + annotations[2], + Annotation { + critical: false, + key: "u-ca", + value: "japanese" + }, + "Valid annotation parsing: \"{value}\" should parse second annotation as 'japanese'." + ); } #[test] @@ -401,22 +520,6 @@ fn invalid_time() { ); } -#[test] -fn temporal_invalid_annotations() { - let invalid_annotations = [ - "2020-11-11[!u-ca=iso8601][u-ca=iso8601]", - "2020-11-11[u-ca=iso8601][!u-ca=iso8601]", - ]; - - for invalid in invalid_annotations { - let err_result = IxdtfParser::new(invalid).parse(); - assert!( - err_result.is_err(), - "Invalid ISO annotation parsing: \"{invalid}\" should fail parsing." - ); - } -} - #[test] fn temporal_valid_instant_strings() { let instants = [ @@ -605,7 +708,6 @@ fn test_correct_datetime() { offset: None, tz: None, calendar: None, - annotations: Vec::default(), }) ); @@ -623,7 +725,6 @@ fn test_correct_datetime() { time: None, tz: None, calendar: None, - annotations: Vec::default(), }) ); @@ -646,7 +747,6 @@ fn test_correct_datetime() { offset: None, tz: None, calendar: None, - annotations: Vec::default(), }) ); @@ -669,7 +769,6 @@ fn test_correct_datetime() { offset: None, tz: None, calendar: None, - annotations: Vec::default(), }) ); @@ -692,7 +791,6 @@ fn test_correct_datetime() { offset: None, tz: None, calendar: None, - annotations: Vec::default(), }) ); @@ -715,7 +813,6 @@ fn test_correct_datetime() { offset: None, tz: None, calendar: None, - annotations: Vec::default(), }) ); @@ -738,7 +835,6 @@ fn test_correct_datetime() { offset: None, tz: None, calendar: None, - annotations: Vec::default(), }) ); } diff --git a/utils/ixdtf/src/parsers/time.rs b/utils/ixdtf/src/parsers/time.rs index ee218fd9014..6c7c63e7aa8 100644 --- a/utils/ixdtf/src/parsers/time.rs +++ b/utils/ixdtf/src/parsers/time.rs @@ -4,8 +4,6 @@ //! Parsing of Time Values -use alloc::vec::Vec; - use crate::{ assert_syntax, parsers::{ @@ -13,7 +11,7 @@ use crate::{ is_annotation_open, is_decimal_separator, is_sign, is_time_designator, is_time_separator, is_utc_designator, }, - records::TimeRecord, + records::{Annotation, TimeRecord}, timezone::parse_date_time_utc, Cursor, }, @@ -26,6 +24,7 @@ use super::{annotations, records::IxdtfParseRecord}; /// value does not align pub(crate) fn parse_annotated_time_record<'a>( cursor: &mut Cursor<'a>, + handler: impl FnMut(Annotation<'a>) -> Option>, ) -> ParserResult> { let designator = cursor.check_or(false, is_time_designator); cursor.advance_if(designator); @@ -51,11 +50,10 @@ pub(crate) fn parse_annotated_time_record<'a>( offset, tz: None, calendar: None, - annotations: Vec::default(), }); } - let annotations = annotations::parse_annotation_set(cursor)?; + let annotations = annotations::parse_annotation_set(cursor, handler)?; cursor.close()?; @@ -65,7 +63,6 @@ pub(crate) fn parse_annotated_time_record<'a>( offset, tz: annotations.tz, calendar: annotations.calendar, - annotations: annotations.annotations, }) }