Skip to content

Commit

Permalink
Add a PREFER_MONTH_OF_YEAR setting (#1146)
Browse files Browse the repository at this point in the history
  • Loading branch information
adnan-awan authored May 16, 2023
1 parent 3173d73 commit 0056c88
Show file tree
Hide file tree
Showing 8 changed files with 156 additions and 6 deletions.
2 changes: 2 additions & 0 deletions dateparser/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ class Settings:
* `TIMEZONE`
* `TO_TIMEZONE`
* `RETURN_AS_TIMEZONE_AWARE`
* `PREFER_MONTH_OF_YEAR`
* `PREFER_DAY_OF_MONTH`
* `PREFER_DATES_FROM`
* `RELATIVE_BASE`
Expand Down Expand Up @@ -191,6 +192,7 @@ def check_settings(settings):
# It defaults to 'default', but it's not allowed to use it directly
"type": bool
},
"PREFER_MONTH_OF_YEAR": {"values": ("current", "first", "last"), "type": str},
"PREFER_DAY_OF_MONTH": {"values": ("current", "first", "last"), "type": str},
"PREFER_DATES_FROM": {
"values": ("current_period", "past", "future"),
Expand Down
14 changes: 13 additions & 1 deletion dateparser/date.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
apply_timezone_from_settings,
get_timezone_from_tz_string,
set_correct_day_from_settings,
set_correct_month_from_settings,
)

APOSTROPHE_LOOK_ALIKE_CHARS = [
Expand Down Expand Up @@ -186,7 +187,18 @@ def parse_with_formats(date_string, date_formats, settings):
except ValueError:
continue
else:
if "%d" not in date_format:
missing_month = not any(m in date_format for m in ["%m", "%b", "%B"])
missing_day = "%d" not in date_format
if missing_month and missing_day:
period = "year"
date_obj = set_correct_month_from_settings(date_obj, settings)
date_obj = set_correct_day_from_settings(date_obj, settings)

elif missing_month:
period = "year"
date_obj = set_correct_month_from_settings(date_obj, settings)

elif missing_day:
period = "month"
date_obj = set_correct_day_from_settings(date_obj, settings)

Expand Down
15 changes: 15 additions & 0 deletions dateparser/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
get_previous_leap_year,
get_timezone_from_tz_string,
set_correct_day_from_settings,
set_correct_month_from_settings,
)
from dateparser.utils.strptime import strptime

Expand Down Expand Up @@ -595,6 +596,17 @@ def _correct_for_day(self, dateobj):
)
return dateobj

def _correct_for_month(self, dateobj):
relative_base = getattr(self.settings, "RELATIVE_BASE", None)
relative_base_month = (
relative_base.month if hasattr(relative_base, "month") else relative_base
)
if getattr(self, "_token_month", None) or relative_base_month:
return dateobj

dateobj = set_correct_month_from_settings(dateobj, self.settings)
return dateobj

@classmethod
def parse(cls, datestring, settings, tz=None):
tokens = tokenizer(datestring)
Expand All @@ -606,6 +618,9 @@ def parse(cls, datestring, settings, tz=None):

# correction for preference of day: beginning, current, end
dateobj = po._correct_for_day(dateobj)

# correction for preference of month: beginning, current, end
dateobj = po._correct_for_month(dateobj)
period = po._get_period()

return dateobj, period
Expand Down
10 changes: 10 additions & 0 deletions dateparser/utils/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,16 @@ def set_correct_day_from_settings(date_obj, settings, current_day=None):
return date_obj.replace(day=options["last"])


def set_correct_month_from_settings(date_obj, settings, current_month=None):
"""Set correct month attending the `PREFER_MONTH_OF_YEAR` setting."""
options = {"first": 1, "last": 12, "current": current_month or datetime.now().month}

try:
return date_obj.replace(month=options[settings.PREFER_MONTH_OF_YEAR])
except ValueError:
return date_obj.replace(month=options["last"])


def registry(cls):
def choose(creator):
def constructor(cls, *args, **kwargs):
Expand Down
1 change: 1 addition & 0 deletions dateparser_data/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
"RETURN_AS_TIMEZONE_AWARE": "default",
# Incomplete dates
"PREFER_DAY_OF_MONTH": "current",
"PREFER_MONTH_OF_YEAR": "current",
"PREFER_DATES_FROM": "current_period",
"RELATIVE_BASE": False,
"STRICT_PARSING": False,
Expand Down
10 changes: 10 additions & 0 deletions docs/introduction.rst
Original file line number Diff line number Diff line change
Expand Up @@ -180,6 +180,16 @@ Incomplete Dates
>>> parse('August', settings={'PREFER_DATES_FROM': 'past'})
datetime.datetime(2015, 8, 15, 0, 0)

>>> import dateparser
>>> dateparser.parse("2015") # default behavior
datetime.datetime(2015, 3, 27, 0, 0)
>>> dateparser.parse("2015", settings={"PREFER_MONTH_OF_YEAR": "last"})
datetime.datetime(2015, 12, 27, 0, 0)
>>> dateparser.parse("2015", settings={"PREFER_MONTH_OF_YEAR": "first"})
datetime.datetime(2015, 1, 27, 0, 0)
>>> dateparser.parse("2015", settings={"PREFER_MONTH_OF_YEAR": "current"})
datetime.datetime(2015, 3, 27, 0, 0)

You can also ignore parsing incomplete dates altogether by setting `STRICT_PARSING` flag as follows:

>>> parse('December 2015', settings={'STRICT_PARSING': True})
Expand Down
12 changes: 12 additions & 0 deletions docs/settings.rst
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,18 @@ Handling Incomplete Dates
>>> parse('December 2015', settings={'PREFER_DAY_OF_MONTH': 'first'})
datetime.datetime(2015, 12, 1, 0, 0)

``PREFER_MONTH_OF_YEAR``: Similarly, another useful thing when the date string is missing the month part. It defaults to ``current`` and can be ``first`` and ``last`` denoting first and last month of year respectively as values:

>>> from dateparser import parse
>>> parse("2015") # default behavior
datetime.datetime(2015, 3, 27, 0, 0)
>>> parse("2015", settings={"PREFER_MONTH_OF_YEAR": "last"})
datetime.datetime(2015, 12, 27, 0, 0)
>>> parse("2015", settings={"PREFER_MONTH_OF_YEAR": "first"})
datetime.datetime(2015, 1, 27, 0, 0)
>>> parse("2015", settings={"PREFER_MONTH_OF_YEAR": "current"}) # it exactly behaves like default one
datetime.datetime(2015, 3, 27, 0, 0)

``PREFER_DATES_FROM``: defaults to ``current_period`` and can have ``past`` and ``future`` as values.

If date string is missing some part, this option ensures consistent results depending on the ``past`` or ``future`` preference, for example, assuming current date is `June 16, 2015`:
Expand Down
98 changes: 93 additions & 5 deletions tests/test_date.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
import os
import unittest
from collections import OrderedDict
from copy import copy
from datetime import datetime, timedelta
from datetime import timezone as dttz
from itertools import product
Expand All @@ -14,7 +13,7 @@

import dateparser
from dateparser import date
from dateparser.conf import settings
from dateparser.conf import Settings
from dateparser.date import DateData
from tests import BaseTestCase

Expand Down Expand Up @@ -435,7 +434,7 @@ def test_should_use_correct_day_from_settings_for_dates_without_day(
expected_day,
):
self.given_now(2014, 8, today_day)
settings_mod = copy(settings)
settings_mod = Settings()
settings_mod.PREFER_DAY_OF_MONTH = prefer_day_of_month
self.when_date_is_parsed_with_formats(date_string, date_formats, settings_mod)
self.then_date_was_parsed()
Expand All @@ -444,6 +443,95 @@ def test_should_use_correct_day_from_settings_for_dates_without_day(
datetime(year=expected_year, month=expected_month, day=expected_day)
)

@parameterized.expand(
[
param(
date_string="2014",
date_formats=["%Y"],
expected_year=2014,
prefer_month_of_year="first",
current_month=7,
expected_month=1,
expected_day=1,
),
param(
date_string="2014",
date_formats=["%Y"],
expected_year=2014,
prefer_month_of_year="current",
current_month=7,
expected_month=7,
expected_day=1,
),
param(
date_string="2014",
date_formats=["%Y"],
expected_year=2014,
prefer_month_of_year="last",
current_month=7,
expected_month=12,
expected_day=1,
),
]
)
def test_should_use_correct_month_from_settings_for_dates_without_month(
self,
date_string,
date_formats,
expected_year,
prefer_month_of_year,
current_month,
expected_month,
expected_day,
):
self.given_now(2014, current_month, 1)
settings_mod = Settings()
settings_mod.PREFER_MONTH_OF_YEAR = prefer_month_of_year
self.when_date_is_parsed_with_formats(date_string, date_formats, settings_mod)
self.then_date_was_parsed()
self.then_parsed_period_is("year")
self.then_parsed_date_is(
datetime(year=expected_year, month=expected_month, day=expected_day)
)

@parameterized.expand(
[
param(
date_string="2014",
date_formats=["%Y"],
current_day=15,
current_month=4,
prefer_day_of_month="last",
prefer_month_of_year="last",
expected_year=2014,
expected_month=12,
expected_day=31,
)
]
)
def test_should_use_correct_day_n_month_from_settings_for_dates_without_day_n_month(
self,
date_string,
date_formats,
current_day,
current_month,
prefer_day_of_month,
prefer_month_of_year,
expected_year,
expected_month,
expected_day,
):
self.given_now(2014, current_month, current_day)
settings_mod = Settings()
settings_mod.PREFER_DAY_OF_MONTH = prefer_day_of_month
settings_mod.PREFER_MONTH_OF_YEAR = prefer_month_of_year
self.when_date_is_parsed_with_formats(date_string, date_formats, settings_mod)
self.then_date_was_parsed()
self.then_parsed_period_is("year")
self.then_parsed_date_is(
datetime(year=expected_year, month=expected_month, day=expected_day)
)

def given_now(self, year, month, day, **time):
now = datetime(year, month, day, **time)
datetime_mock = Mock(wraps=datetime)
Expand All @@ -457,7 +545,7 @@ def when_date_is_parsed_with_formats(
self, date_string, date_formats, custom_settings=None
):
self.result = date.parse_with_formats(
date_string, date_formats, custom_settings or settings
date_string, date_formats, custom_settings or Settings()
)

def then_date_was_not_parsed(self):
Expand Down Expand Up @@ -984,7 +1072,7 @@ def test_is_valid_date_data(self, date_data):
language=["en"],
date_string="10 jan 2000",
date_formats=None,
settings=settings,
settings=Settings(),
)
self.when_date_object_is_validated(date_data)
self.then_date_object_is_invalid()
Expand Down

0 comments on commit 0056c88

Please sign in to comment.