Preliminary support for years with unknown digits

rlskoeser · rlskoeser · commit d0f786a3e6d1 · 2026-03-27T10:36:27.000-04:00
diff --git a/src/undate/converters/calendars/hebrew/transformer.py b/src/undate/converters/calendars/hebrew/transformer.py
@@ -21,14 +21,24 @@ def hebrew_date(self, items):
             if child.data in ["year", "month", "day"]:
                 # in each case we expect one integer value;
                 # anonymous tokens convert to their value and cast as int
-                value = int(child.children[0])
+                try:
+                    value = int(child.children[0])
+                except ValueError:
+                    # if missing digits are present, leave as a string
+                    value = child.children[0]
+
                 parts[str(child.data)] = value
 
         # initialize and return an undate with year, month, day and
         # configured calendar (hebrew by default)
         # NOTE: use self.calendar so Seleucid can extend more easily
         return Undate(**parts, calendar=self.calendar)
 
+    def UNKNOWN_DIGITS(self, token):
+        """Convert unknown digits into undate missing digit character."""
+        unknown_digits = token.strip("[]").replace(".", Undate.MISSING_DIGIT)
+        return token.update(value=unknown_digits)
+
     def year(self, items):
         # combine multiple parts into a single string
         value = "".join([str(i) for i in items])
diff --git a/src/undate/converters/grammars/hebrew.lark b/src/undate/converters/grammars/hebrew.lark
@@ -1,8 +1,8 @@
-%import common.WS
+%import common (WS, DIGIT)
 %ignore WS
 
 // Ignore periods and commas in dates
-%import .undate_common.DATE_PUNCTUATION
+%import .undate_common (DATE_PUNCTUATION, UNKNOWN_DIGITS)
 %ignore DATE_PUNCTUATION
 
 // only support day month year format for now
@@ -16,7 +16,7 @@ hebrew_date: weekday? day month year | month year | year
 // "first third of", seasons (can look for more examples)
 
 // Hebrew calendar starts with year 1 in 3761 BCE
-year: /\d+/
+year: /\d+/ | DIGIT* UNKNOWN_DIGITS DIGIT*
 
 // months
 month: month_1
diff --git a/src/undate/converters/grammars/undate_common.lark b/src/undate/converters/grammars/undate_common.lark
@@ -1,3 +1,8 @@
 // Some abbreviations use periods; some default date formats
 // include commas. Ignore both
 DATE_PUNCTUATION: "." | ","
+
+// In some sources like PGP, unknown digits are represented by
+// brackets and periods, where the periods indicate the number of
+// unknown digits, e.g. 18[..]  or 14[.]3
+UNKNOWN_DIGITS: /\[\.+\]/
diff --git a/tests/test_converters/test_calendars/test_hebrew/test_hebrew_parser.py b/tests/test_converters/test_calendars/test_hebrew/test_hebrew_parser.py
@@ -33,6 +33,9 @@
     "536",
     "53",
     "3",
+    # years with missing digit
+    "53[.]2",
+    "5[..]2",
 ]
 
 
diff --git a/tests/test_converters/test_calendars/test_hebrew/test_hebrew_transformer.py b/tests/test_converters/test_calendars/test_hebrew/test_hebrew_transformer.py
@@ -32,6 +32,9 @@ def test_hebrew_undate():
     ("Thursday 12 Sivan 4795", HebrewUndate(4795, 3, 12), DatePrecision.DAY),
     # huh, current parsing completely ignores whitespace; do we want that?
     ("Thursday12Sivan4795", HebrewUndate(4795, 3, 12), DatePrecision.DAY),
+    # years with missing digits
+    ("53[.]2", HebrewUndate("53X2"), DatePrecision.YEAR),
+    ("5[..]2", HebrewUndate("5XX2"), DatePrecision.YEAR),
 ]
 
 
@@ -41,7 +44,7 @@ def test_transform(date_string, expected, expected_precision):
     # parse the input string, then transform to undate object
     parsetree = hebrew_parser.parse(date_string)
     transformed_date = transformer.transform(parsetree)
-    assert transformed_date == expected
+    assert repr(transformed_date) == repr(expected)
     # currently only undates have date precision
     if isinstance(transformed_date, Undate):
         assert transformed_date.precision == expected_precision

Original file line number	Diff line number	Diff line change
`@@ -33,6 +33,9 @@`
`33`	`33`	`"536",`
`34`	`34`	`"53",`
`35`	`35`	`"3",`
	`36`	`+ # years with missing digit`
	`37`	`+ "53[.]2",`
	`38`	`+ "5[..]2",`
`36`	`39`	`]`
`37`	`40`
`38`	`41`