Skip to content

Commit dc1ee7a

Browse files
feat: Add Transform's builtin functions
1 parent fd98619 commit dc1ee7a

4 files changed

Lines changed: 257 additions & 5 deletions

File tree

magicparse/__init__.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
Builder,
1515
builtins as builtins_composite_processors,
1616
)
17-
from .transform import ParsingTransform, Transform
17+
from .transform import ParsingTransform, Transform, TransformError
1818
from .type_converters import TypeConverter, builtins as builtins_type_converters
1919
from typing import Any
2020
from .validators import Validator, builtins as builtins_validators
@@ -31,6 +31,7 @@
3131
"RowSkipped",
3232
"RowFailed",
3333
"Transform",
34+
"TransformError",
3435
"Validator",
3536
]
3637

magicparse/transform.py

Lines changed: 131 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,9 @@
11
from abc import ABC, abstractmethod
2+
from collections.abc import Callable, Collection, Sequence
23
from dataclasses import dataclass
4+
from decimal import Decimal
35
from enum import StrEnum
4-
from typing import Any, Self
6+
from typing import Any, NoReturn, Self
57
from jsonata import Jsonata # pyright: ignore[reportMissingTypeStubs]
68

79

@@ -48,7 +50,132 @@ def register(cls, transform: type[Self]) -> None:
4850
cls.registry[transform.key()] = transform
4951

5052

53+
class TransformError(Exception):
54+
def __init__(self, message: str, params: Sequence[Any] | dict[str, Any]) -> None:
55+
super().__init__(message)
56+
match params:
57+
case dict():
58+
for param, value in params.items():
59+
self.add_note(f"param({param}) = {value}")
60+
case _:
61+
for i, value in enumerate(params):
62+
self.add_note(f"param({i}) = {value}")
63+
64+
65+
def coalesce_numbers[T: int | float | Decimal | None](*args: T | None) -> T:
66+
for arg in args:
67+
if arg:
68+
return arg
69+
70+
raise TransformError("No non-zero value to coalesce into", params=args)
71+
72+
73+
def divide[T: int | Decimal](numerator: T, denominator: T) -> float | Decimal:
74+
"JSONata native x / y operator can only divide int and float, but not Decimal"
75+
try:
76+
return numerator / denominator
77+
except Exception as error:
78+
raise TransformError("Cannot divide", params={"numerator": numerator, "denominator": denominator}) from error
79+
80+
81+
def is_positive[T: int | float | Decimal](value: T) -> T:
82+
if value <= 0:
83+
raise TransformError("Value is not positive", params={"value": value})
84+
return value
85+
86+
87+
def left_pad_zeroes(value: str, width: int) -> str:
88+
try:
89+
return value.zfill(width)
90+
except Exception as error:
91+
raise TransformError("Cannot left pad zeroes", params={"value": value, "width": width}) from error
92+
93+
94+
def length(value: Collection[Any]) -> int:
95+
try:
96+
return len(value)
97+
except Exception as error:
98+
raise TransformError("Cannot get length", params={"value": value}) from error
99+
100+
101+
def map_to[K, V](key: K, mapping: dict[K, V]) -> V:
102+
try:
103+
return mapping[key]
104+
except Exception as error:
105+
raise TransformError("Cannot map to", params={"key": key, "mapping": mapping}) from error
106+
107+
108+
class SkippedRow(Exception):
109+
pass
110+
111+
112+
def skip_row(reason: str | None) -> NoReturn:
113+
raise SkippedRow(reason or "")
114+
115+
116+
def strip_whitespaces(value: str) -> str:
117+
try:
118+
return value.strip()
119+
except Exception as error:
120+
raise TransformError("Cannot strip whitespaces", params={"value": value}) from error
121+
122+
123+
def to_decimal(value: str | float | int) -> Decimal:
124+
try:
125+
if isinstance(value, str):
126+
return Decimal(value.strip().replace(",", "."))
127+
else:
128+
return Decimal(value)
129+
except Exception as error:
130+
raise TransformError("Cannot convert to decimal", params={"value": value}) from error
131+
132+
133+
def to_int(value: str) -> int:
134+
try:
135+
return int(value.strip())
136+
except Exception as error:
137+
raise TransformError("Cannot convert to int", params={"value": value}) from error
138+
139+
140+
def type_of(value: Any) -> str:
141+
match value:
142+
case int():
143+
return "int"
144+
case float():
145+
return "float"
146+
case Decimal():
147+
return "decimal"
148+
case str():
149+
return "string"
150+
case _:
151+
return str(type(value)) # pyright: ignore[reportUnknownArgumentType]
152+
153+
51154
class Transform(Jsonata):
52-
@classmethod
53-
def build(cls, expression: str) -> "Transform":
54-
return Transform(expr=expression)
155+
def __init__(self, expression: str) -> None:
156+
super().__init__(expression)
157+
self.validate_input = False
158+
159+
@staticmethod
160+
def get_builtin_functions() -> dict[str, Callable[..., Any]]:
161+
return {
162+
"coalesce_numbers": coalesce_numbers,
163+
"divide": divide,
164+
"is_positive": is_positive,
165+
"left_pad_zeroes": left_pad_zeroes,
166+
"length": length,
167+
"map_to": map_to,
168+
"skip_row": skip_row,
169+
"strip_whitespaces": strip_whitespaces,
170+
"to_decimal": to_decimal,
171+
"to_int": to_int,
172+
"type_of": type_of,
173+
}
174+
175+
176+
def _register_builtin_functions():
177+
for function_name, function in Transform.get_builtin_functions().items():
178+
Jsonata.static_frame.bind(function_name, Jsonata.JLambda(function))
179+
180+
181+
_register_builtin_functions()

tests/test_transform.py

Lines changed: 109 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,109 @@
1+
from decimal import Decimal
2+
from magicparse import Transform
3+
import pytest
4+
5+
from magicparse.transform import SkippedRow, TransformError
6+
7+
8+
def test_coalesce_numbers():
9+
assert Transform("$coalesce_numbers(1, 2, 3)").evaluate({}) == 1
10+
assert Transform("$coalesce_numbers(0, 2, 3)").evaluate({}) == 2
11+
assert Transform("$coalesce_numbers(0, 0, 3)").evaluate({}) == 3
12+
13+
with pytest.raises(TransformError, match="No non-zero value to coalesce into"):
14+
Transform("$coalesce_numbers(0, 0, 0)").evaluate({})
15+
16+
17+
def test_divide():
18+
assert Transform("$divide(1, 2)").evaluate({}) == 0.5
19+
20+
assert Transform("$divide(a, b)").evaluate({"a": Decimal(1), "b": Decimal(2)}) == Decimal("0.5")
21+
22+
with pytest.raises(TransformError, match="Cannot divide"):
23+
Transform("$divide(1, 0)").evaluate({})
24+
25+
26+
def test_is_positive():
27+
assert Transform("$is_positive(1)").evaluate({})
28+
29+
with pytest.raises(TransformError, match="Value is not positive"):
30+
Transform("$is_positive(0)").evaluate({})
31+
32+
with pytest.raises(TransformError, match="Value is not positive"):
33+
Transform("$is_positive(-1)").evaluate({})
34+
35+
36+
def test_left_pad_zeroes():
37+
assert Transform('$left_pad_zeroes("", 5)').evaluate({}) == "00000"
38+
assert Transform('$left_pad_zeroes("111", 5)').evaluate({}) == "00111"
39+
assert Transform('$left_pad_zeroes("11111", 5)').evaluate({}) == "11111"
40+
assert Transform('$left_pad_zeroes("11111111", 5)').evaluate({}) == "11111111"
41+
42+
with pytest.raises(TransformError, match="Cannot left pad zeroes"):
43+
Transform("$left_pad_zeroes(-1, 5)").evaluate({})
44+
45+
46+
def test_length():
47+
assert Transform('$length(["A", "B", "C"])').evaluate({}) == 3
48+
assert Transform('$length("ABCD")').evaluate({}) == 4
49+
assert Transform("$length($)").evaluate({"a": "a", "b": "b"}) == 2
50+
51+
with pytest.raises(TransformError, match="Cannot get length"):
52+
Transform("$length(5)").evaluate({})
53+
54+
55+
def test_map_to():
56+
expression = """
57+
(
58+
$values := {
59+
"A": 1,
60+
"B": 2
61+
};
62+
input ~> $map_to($values)
63+
)
64+
"""
65+
assert Transform(expression).evaluate({"input": "A"}) == 1
66+
assert Transform(expression).evaluate({"input": "B"}) == 2
67+
68+
with pytest.raises(TransformError, match="Cannot map to"):
69+
Transform(expression).evaluate({"input": "C"})
70+
71+
72+
def test_skip_row():
73+
with pytest.raises(SkippedRow, match="some reason"):
74+
Transform('$skip_row("some reason")').evaluate({})
75+
76+
77+
def test_strip_whitespaces():
78+
assert Transform('$strip_whitespaces("ABC")').evaluate({}) == "ABC"
79+
assert Transform('$strip_whitespaces(" ABC ")').evaluate({}) == "ABC"
80+
81+
with pytest.raises(TransformError, match="Cannot strip whitespaces"):
82+
Transform("$strip_whitespaces(5)").evaluate({})
83+
84+
85+
def test_to_decimal():
86+
assert Transform("$to_decimal(1)").evaluate({}) == Decimal(1)
87+
assert Transform("$to_decimal(1.5)").evaluate({}) == Decimal("1.5")
88+
assert Transform('$to_decimal("1.5")').evaluate({}) == Decimal("1.5")
89+
assert Transform('$to_decimal("1,5")').evaluate({}) == Decimal("1.5")
90+
assert Transform('$to_decimal(" 1.5 ")').evaluate({}) == Decimal("1.5")
91+
92+
with pytest.raises(TransformError, match="Cannot convert to decimal"):
93+
Transform('$to_decimal("abc")').evaluate({})
94+
95+
96+
def test_to_int():
97+
assert Transform('$to_int("15")').evaluate({}) == 15
98+
assert Transform('$to_int(" 15 ")').evaluate({}) == 15
99+
100+
with pytest.raises(TransformError, match="Cannot convert to int"):
101+
Transform('$to_int("abc")').evaluate({})
102+
103+
104+
def test_type_of():
105+
assert Transform('$type_of("abc")').evaluate({}) == "string"
106+
assert Transform("$type_of(1)").evaluate({}) == "int"
107+
assert Transform("$type_of(1.5)").evaluate({}) == "float"
108+
assert Transform("$type_of(input)").evaluate({"input": Decimal("1.5")}) == "decimal"
109+
assert Transform("$type_of({})").evaluate({}) == "<class 'dict'>"

typings/jsonata/__init__.pyi

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
from collections.abc import Callable
2+
from typing import Any, ClassVar
3+
4+
class Frame:
5+
def bind(self, name: str, val: Any) -> None: ...
6+
7+
class Jsonata:
8+
static_frame: ClassVar[Frame]
9+
validate_input: bool
10+
11+
def __init__(self, expr: str) -> None: ...
12+
def evaluate(self, input: Any) -> Any: ...
13+
14+
class JLambda:
15+
def __init__(self, function: Callable[..., Any]) -> None: ...

0 commit comments

Comments
 (0)