-
Notifications
You must be signed in to change notification settings - Fork 6
/
Copy pathdate_parse.go
200 lines (177 loc) · 5.23 KB
/
date_parse.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
package fuzzytime
import (
"regexp"
"strconv"
"strings"
)
// dateCracker is a set of regexps for various date formats
// order is important(ish) - want to match as much of the string as we can
var dateCrackers = []*regexp.Regexp{
//"Tuesday 16 December 2008"
//"Tue 29 Jan 08"
//"Monday, 22 October 2007"
//"Tuesday, 21st January, 2003"
regexp.MustCompile(`(?i)(?P<dayname>\p{L}{3,})[.,\s\p{Z}]+(?P<day>\d{1,2})(?:st|nd|rd|th)?[\s\p{Z}]+(?P<month>\p{L}{3,})[.,\s\p{Z}]+(?P<year>(\d{4})|(\d{2}))`),
// "Friday August 11, 2006"
// "Tuesday October 14 2008"
// "Thursday August 21 2008"
// "Monday, May. 17, 2010"
regexp.MustCompile(`(?i)(?P<dayname>\p{L}{3,})[.,\s\p{Z}]+(?P<month>\p{L}{3,})[.,\s\p{Z}]+(?P<day>\d{1,2})(?:st|nd|rd|th)?[.,\s\p{Z}]+(?P<year>(\d{4})|(\d{2}))`),
// "9 Sep 2009", "09 Sep, 2009", "01 May 10"
// "23rd November 2007", "22nd May 2008"
regexp.MustCompile(`(?i)(?P<day>\d{1,2})(?:st|nd|rd|th)?[\s\p{Z}]+(?P<month>\p{L}{3,})[.,\s\p{Z}]+(?P<year>(\d{4})|(\d{2}))`),
// "Mar 3, 2007", "Jul 21, 08", "May 25 2010", "May 25th 2010", "February 10 2008"
regexp.MustCompile(`(?i)(?P<month>\p{L}{3,})[.,\s\p{Z}]+(?P<day>\d{1,2})(?:st|nd|rd|th)?[.,\s\p{Z}]+(?P<year>(\d{4})|(\d{2}))`),
// "2010-04-02"
regexp.MustCompile(`(?i)(?P<year>\d{4})-(?P<month>\d{1,2})-(?P<day>\d{1,2})`),
// "2007/03/18"
regexp.MustCompile(`(?i)(?P<year>\d{4})/(?P<month>\d{1,2})/(?P<day>\d{1,2})`),
// "09-Apr-2007", "09-Apr-07"
regexp.MustCompile(`(?i)(?P<day>\d{1,2})-(?P<month>\p{L}{3,})-(?P<year>(\d{4})|(\d{2}))`),
// "May 2011"
regexp.MustCompile(`(?i)(?P<month>\p{L}{3,})[\s\p{Z}]+(?P<year>\d{4})`),
// ambiguous formats
// "11/02/2008"
// "11-02-2008"
// "11.02.2008"
regexp.MustCompile(`(?i)(?P<x1>\d{1,2})[/.-](?P<x2>\d{1,2})[/.-](?P<year>\d{4})`),
// even more ambiguous
// eg: japan uses yy/mm/dd
// 11/2/10
// 11-02-10
// 11.02.10
regexp.MustCompile(`(?i)(?P<x1>\d{1,2})[/.-](?P<x2>\d{1,2})[/.-](?P<x3>\d{2})`),
/*.
# TODO:
# year/month only
# "May/June 2011" (common for publications) - just use second month
r'(?P<cruftmonth>\p{L}{3,})/(?P<month>\p{L}{3,})[\s\p{Z}]+(?P<year>\d{4})',
*/
// Missing year, eg
// Thu April 24th
regexp.MustCompile(`(?i)(?P<dayname>\p{L}{3,})[.,\s\p{Z}]+(?P<month>\p{L}{3,})[.,\s\p{Z}]+(?P<day>\d{1,2})(?:st|nd|rd|th)?`),
// April 24th
regexp.MustCompile(`(?i)(?P<month>\p{L}{3,})[.,\s\p{Z}]+(?P<day>\d{1,2})(?:st|nd|rd|th)?`),
}
// ExtendYear extends 2-digit years into 4 digits.
// the rules used:
// 00-69 => 2000-2069
// 70-99 => 1970-1999
func ExtendYear(year int) int {
if year < 70 {
return 2000 + year
}
if year < 100 {
return 1900 + year
}
return year
}
// ExtractDate tries to parse a date from a string.
// It returns a Date and Span indicating which part of string matched.
// If an error occurs, an empty Date will be returned.
func (ctx *Context) ExtractDate(s string) (Date, Span, error) {
for _, pat := range dateCrackers {
fd := Date{}
span := Span{}
names := pat.SubexpNames()
matchSpans := pat.FindStringSubmatchIndex(s)
if matchSpans == nil {
continue
}
var fail bool
unknowns := make([]int, 0, 3) // for ambiguous components
for i, name := range names {
start, end := matchSpans[i*2], matchSpans[(i*2)+1]
var sub string
if start >= 0 && end >= 0 {
sub = strings.ToLower(s[start:end])
}
switch name {
case "year":
year, e := strconv.Atoi(sub)
if e == nil {
year = ExtendYear(year)
fd.SetYear(year)
} else {
fail = true
break
}
case "month":
month, e := strconv.Atoi(sub)
if e == nil {
// it was a number
if month < 1 || month > 12 {
fail = true
break // month out of range
}
fd.SetMonth(month)
} else {
// try month name
month, ok := monthLookup[sub]
if !ok {
fail = true
break // nope.
}
fd.SetMonth(month)
}
case "cruftmonth":
// special case to handle "Jan/Feb 2010"...
// we'll make sure the first month is valid, then ignore it
_, ok := monthLookup[sub]
if !ok {
fail = true
break
}
case "day":
day, e := strconv.Atoi(sub)
if e != nil {
fail = true
break
}
if day < 1 || day > 31 {
fail = true
break
}
fd.SetDay(day)
case "x1", "x2", "x3":
// could be day, month or year...
x, e := strconv.Atoi(sub)
if e != nil {
fail = true
break
}
unknowns = append(unknowns, x)
}
}
if fail {
// regexp matched, but values sucked.
continue
}
// got enough?
if (fd.HasYear() && fd.HasMonth()) || (fd.HasMonth() && fd.HasDay()) {
if fd.sane() {
span.Begin, span.End = matchSpans[0], matchSpans[1]
return fd, span, nil
}
} else {
// got some ambiguous components to try?
if len(unknowns) == 2 && fd.HasYear() {
unknowns = append(unknowns, fd.Year())
}
if len(unknowns) == 3 {
var err error
fd, err = ctx.DateResolver(unknowns[0], unknowns[1], unknowns[2])
if err != nil {
return Date{}, Span{}, err
}
if fd.HasYear() && fd.HasMonth() && fd.HasDay() && fd.sane() {
// resolved.
span.Begin, span.End = matchSpans[0], matchSpans[1]
return fd, span, nil
}
}
}
}
// nothing. Just return an empty date and span
return Date{}, Span{}, nil
}