Skip to content

Commit

Permalink
[YouTube] Fix parsing short relative date formats (English only) (#1068)
Browse files Browse the repository at this point in the history
  • Loading branch information
Theta-Dev authored Jun 18, 2023
1 parent d294ccb commit ad97f08
Show file tree
Hide file tree
Showing 5 changed files with 195 additions and 27 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,8 @@ private boolean textualDateMatches(final String textualDate, final String agoPhr
final String escapedSeparator = patternsHolder.wordSeparator().equals(" ")
// From JDK8 → \h - Treat horizontal spaces as a normal one
// (non-breaking space, thin space, etc.)
? "[ \\t\\xA0\\u1680\\u180e\\u2000-\\u200a\\u202f\\u205f\\u3000]"
// Also split the string on numbers to be able to parse strings like "2wk"
? "[ \\t\\xA0\\u1680\\u180e\\u2000-\\u200a\\u202f\\u205f\\u3000\\d]"
: Pattern.quote(patternsHolder.wordSeparator());

// (^|separator)pattern($|separator)
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,154 @@
package org.schabi.newpipe.extractor.utils;

import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.Test;
import org.schabi.newpipe.extractor.exceptions.ParsingException;
import org.schabi.newpipe.extractor.localization.Localization;
import org.schabi.newpipe.extractor.localization.TimeAgoParser;
import org.schabi.newpipe.extractor.localization.TimeAgoPatternsManager;

import java.time.OffsetDateTime;
import java.time.ZoneOffset;
import java.time.temporal.ChronoUnit;

import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertTrue;

public class TimeagoTest {
private static TimeAgoParser parser;
private static OffsetDateTime now;

@BeforeAll
public static void setUp() {
parser = TimeAgoPatternsManager.getTimeAgoParserFor(Localization.DEFAULT);
now = OffsetDateTime.now(ZoneOffset.UTC);
}

@Test
void parseTimeago() throws ParsingException {
assertTimeWithin1s(
now.minus(1, ChronoUnit.SECONDS),
parser.parse("1 second ago").offsetDateTime()
);
assertTimeWithin1s(
now.minus(12, ChronoUnit.SECONDS),
parser.parse("12 second ago").offsetDateTime()
);
assertTimeWithin1s(
now.minus(1, ChronoUnit.MINUTES),
parser.parse("1 minute ago").offsetDateTime()
);
assertTimeWithin1s(
now.minus(23, ChronoUnit.MINUTES),
parser.parse("23 minutes ago").offsetDateTime()
);
assertTimeWithin1s(
now.minus(1, ChronoUnit.HOURS),
parser.parse("1 hour ago").offsetDateTime()
);
assertTimeWithin1s(
now.minus(8, ChronoUnit.HOURS),
parser.parse("8 hours ago").offsetDateTime()
);
assertEquals(
now.minus(1, ChronoUnit.DAYS).truncatedTo(ChronoUnit.HOURS),
parser.parse("1 day ago").offsetDateTime()
);
assertEquals(
now.minus(3, ChronoUnit.DAYS).truncatedTo(ChronoUnit.HOURS),
parser.parse("3 days ago").offsetDateTime()
);
assertEquals(
now.minus(1, ChronoUnit.WEEKS).truncatedTo(ChronoUnit.HOURS),
parser.parse("1 week ago").offsetDateTime()
);
assertEquals(
now.minus(3, ChronoUnit.WEEKS).truncatedTo(ChronoUnit.HOURS),
parser.parse("3 weeks ago").offsetDateTime()
);
assertEquals(
now.minus(1, ChronoUnit.MONTHS).truncatedTo(ChronoUnit.HOURS),
parser.parse("1 month ago").offsetDateTime()
);
assertEquals(
now.minus(3, ChronoUnit.MONTHS).truncatedTo(ChronoUnit.HOURS),
parser.parse("3 months ago").offsetDateTime()
);
assertEquals(
now.minus(1, ChronoUnit.YEARS).minusDays(1).truncatedTo(ChronoUnit.HOURS),
parser.parse("1 year ago").offsetDateTime()
);
assertEquals(
now.minus(3, ChronoUnit.YEARS).minusDays(1).truncatedTo(ChronoUnit.HOURS),
parser.parse("3 years ago").offsetDateTime()
);
}

@Test
void parseTimeagoShort() throws ParsingException {
final TimeAgoParser parser = TimeAgoPatternsManager.getTimeAgoParserFor(Localization.DEFAULT);
final OffsetDateTime now = OffsetDateTime.now(ZoneOffset.UTC);

assertTimeWithin1s(
now.minus(1, ChronoUnit.SECONDS),
parser.parse("1 sec ago").offsetDateTime()
);
assertTimeWithin1s(
now.minus(12, ChronoUnit.SECONDS),
parser.parse("12 sec ago").offsetDateTime()
);
assertTimeWithin1s(
now.minus(1, ChronoUnit.MINUTES),
parser.parse("1 min ago").offsetDateTime()
);
assertTimeWithin1s(
now.minus(23, ChronoUnit.MINUTES),
parser.parse("23 min ago").offsetDateTime()
);
assertTimeWithin1s(
now.minus(1, ChronoUnit.HOURS),
parser.parse("1 hr ago").offsetDateTime()
);
assertTimeWithin1s(
now.minus(8, ChronoUnit.HOURS),
parser.parse("8 hr ago").offsetDateTime()
);
assertEquals(
now.minus(1, ChronoUnit.DAYS).truncatedTo(ChronoUnit.HOURS),
parser.parse("1 day ago").offsetDateTime()
);
assertEquals(
now.minus(3, ChronoUnit.DAYS).truncatedTo(ChronoUnit.HOURS),
parser.parse("3 days ago").offsetDateTime()
);
assertEquals(
now.minus(1, ChronoUnit.WEEKS).truncatedTo(ChronoUnit.HOURS),
parser.parse("1 wk ago").offsetDateTime()
);
assertEquals(
now.minus(3, ChronoUnit.WEEKS).truncatedTo(ChronoUnit.HOURS),
parser.parse("3 wk ago").offsetDateTime()
);
assertEquals(
now.minus(1, ChronoUnit.MONTHS).truncatedTo(ChronoUnit.HOURS),
parser.parse("1 mo ago").offsetDateTime()
);
assertEquals(
now.minus(3, ChronoUnit.MONTHS).truncatedTo(ChronoUnit.HOURS),
parser.parse("3 mo ago").offsetDateTime()
);
assertEquals(
now.minus(1, ChronoUnit.YEARS).minusDays(1).truncatedTo(ChronoUnit.HOURS),
parser.parse("1 yr ago").offsetDateTime()
);
assertEquals(
now.minus(3, ChronoUnit.YEARS).minusDays(1).truncatedTo(ChronoUnit.HOURS),
parser.parse("3 yr ago").offsetDateTime()
);
}

void assertTimeWithin1s(final OffsetDateTime expected, final OffsetDateTime actual) {
final long delta = Math.abs(expected.toEpochSecond() - actual.toEpochSecond());
assertTrue(delta <= 1, String.format("Expected: %s\nActual: %s", expected, actual));
}
}
39 changes: 26 additions & 13 deletions timeago-parser/raw/unique_patterns.json
Original file line number Diff line number Diff line change
Expand Up @@ -415,62 +415,75 @@
"word_separator": " ",
"seconds": [
"second",
"seconds"
"seconds",
"sec"
],
"minutes": [
"minute",
"minutes"
"minutes",
"min"
],
"hours": [
"hour",
"hours"
"hours",
"h"
],
"days": [
"day",
"days"
"days",
"d"
],
"weeks": [
"week",
"weeks"
"weeks",
"w"
],
"months": [
"month",
"months"
"months",
"mo"
],
"years": [
"year",
"years"
"years",
"y"
]
},
"en-GB": {
"word_separator": " ",
"seconds": [
"second",
"seconds"
"seconds",
"sec"
],
"minutes": [
"minute",
"minutes"
"minutes",
"min"
],
"hours": [
"hour",
"hours"
"hours",
"hr"
],
"days": [
"day",
"days"
],
"weeks": [
"week",
"weeks"
"weeks",
"wk"
],
"months": [
"month",
"months"
"months",
"mo"
],
"years": [
"year",
"years"
"years",
"yr"
]
},
"es": {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,13 @@
public class en extends PatternsHolder {
private static final String WORD_SEPARATOR = " ";
private static final String[]
SECONDS /**/ = {"second", "seconds"},
MINUTES /**/ = {"minute", "minutes"},
HOURS /**/ = {"hour", "hours"},
DAYS /**/ = {"day", "days"},
WEEKS /**/ = {"week", "weeks"},
MONTHS /**/ = {"month", "months"},
YEARS /**/ = {"year", "years"};
SECONDS /**/ = {"second", "seconds", "sec"},
MINUTES /**/ = {"minute", "minutes", "min"},
HOURS /**/ = {"hour", "hours", "h"},
DAYS /**/ = {"day", "days", "d"},
WEEKS /**/ = {"week", "weeks", "w"},
MONTHS /**/ = {"month", "months", "mo"},
YEARS /**/ = {"year", "years", "y"};

private static final en INSTANCE = new en();

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,13 @@
public class en_GB extends PatternsHolder {
private static final String WORD_SEPARATOR = " ";
private static final String[]
SECONDS /**/ = {"second", "seconds"},
MINUTES /**/ = {"minute", "minutes"},
HOURS /**/ = {"hour", "hours"},
SECONDS /**/ = {"second", "seconds", "sec"},
MINUTES /**/ = {"minute", "minutes", "min"},
HOURS /**/ = {"hour", "hours", "hr"},
DAYS /**/ = {"day", "days"},
WEEKS /**/ = {"week", "weeks"},
MONTHS /**/ = {"month", "months"},
YEARS /**/ = {"year", "years"};
WEEKS /**/ = {"week", "weeks", "wk"},
MONTHS /**/ = {"month", "months", "mo"},
YEARS /**/ = {"year", "years", "yr"};

private static final en_GB INSTANCE = new en_GB();

Expand Down

0 comments on commit ad97f08

Please sign in to comment.