We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
1 parent 78cbbbb commit d0f786aCopy full SHA for d0f786a
5 files changed
src/undate/converters/calendars/hebrew/transformer.py
@@ -21,14 +21,24 @@ def hebrew_date(self, items):
21
if child.data in ["year", "month", "day"]:
22
# in each case we expect one integer value;
23
# anonymous tokens convert to their value and cast as int
24
- value = int(child.children[0])
+ try:
25
+ value = int(child.children[0])
26
+ except ValueError:
27
+ # if missing digits are present, leave as a string
28
+ value = child.children[0]
29
+
30
parts[str(child.data)] = value
31
32
# initialize and return an undate with year, month, day and
33
# configured calendar (hebrew by default)
34
# NOTE: use self.calendar so Seleucid can extend more easily
35
return Undate(**parts, calendar=self.calendar)
36
37
+ def UNKNOWN_DIGITS(self, token):
38
+ """Convert unknown digits into undate missing digit character."""
39
+ unknown_digits = token.strip("[]").replace(".", Undate.MISSING_DIGIT)
40
+ return token.update(value=unknown_digits)
41
42
def year(self, items):
43
# combine multiple parts into a single string
44
value = "".join([str(i) for i in items])
src/undate/converters/grammars/hebrew.lark
@@ -1,8 +1,8 @@
1
-%import common.WS
+%import common (WS, DIGIT)
2
%ignore WS
3
4
// Ignore periods and commas in dates
5
-%import .undate_common.DATE_PUNCTUATION
+%import .undate_common (DATE_PUNCTUATION, UNKNOWN_DIGITS)
6
%ignore DATE_PUNCTUATION
7
8
// only support day month year format for now
@@ -16,7 +16,7 @@ hebrew_date: weekday? day month year | month year | year
16
// "first third of", seasons (can look for more examples)
17
18
// Hebrew calendar starts with year 1 in 3761 BCE
19
-year: /\d+/
+year: /\d+/ | DIGIT* UNKNOWN_DIGITS DIGIT*
20
// months
month: month_1
src/undate/converters/grammars/undate_common.lark
@@ -1,3 +1,8 @@
// Some abbreviations use periods; some default date formats
// include commas. Ignore both
DATE_PUNCTUATION: "." | ","
+// In some sources like PGP, unknown digits are represented by
+// brackets and periods, where the periods indicate the number of
+// unknown digits, e.g. 18[..] or 14[.]3
+UNKNOWN_DIGITS: /\[\.+\]/
tests/test_converters/test_calendars/test_hebrew/test_hebrew_parser.py
@@ -33,6 +33,9 @@
"536",
"53",
"3",
+ # years with missing digit
+ "53[.]2",
+ "5[..]2",
]
tests/test_converters/test_calendars/test_hebrew/test_hebrew_transformer.py
@@ -32,6 +32,9 @@ def test_hebrew_undate():
("Thursday 12 Sivan 4795", HebrewUndate(4795, 3, 12), DatePrecision.DAY),
# huh, current parsing completely ignores whitespace; do we want that?
("Thursday12Sivan4795", HebrewUndate(4795, 3, 12), DatePrecision.DAY),
+ # years with missing digits
+ ("53[.]2", HebrewUndate("53X2"), DatePrecision.YEAR),
+ ("5[..]2", HebrewUndate("5XX2"), DatePrecision.YEAR),
@@ -41,7 +44,7 @@ def test_transform(date_string, expected, expected_precision):
# parse the input string, then transform to undate object
45
parsetree = hebrew_parser.parse(date_string)
46
transformed_date = transformer.transform(parsetree)
- assert transformed_date == expected
47
+ assert repr(transformed_date) == repr(expected)
48
# currently only undates have date precision
49
if isinstance(transformed_date, Undate):
50
assert transformed_date.precision == expected_precision
0 commit comments