From b2c623deba934528453046ad2843acf23bb5d25a Mon Sep 17 00:00:00 2001 From: Juniper Tyree Date: Thu, 27 Mar 2025 13:32:14 +0200 Subject: [PATCH 1/5] Add a Jpeg2000 compressor --- pyproject.toml | 4 ++- .../compressor/compressors/jpeg2000.py | 31 +++++++++++++++++++ 2 files changed, 34 insertions(+), 1 deletion(-) create mode 100644 src/climatebenchpress/compressor/compressors/jpeg2000.py diff --git a/pyproject.toml b/pyproject.toml index 79a2f1d..26bc770 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -12,9 +12,11 @@ dependencies = [ "numcodecs-observers~=0.1.1", "numcodecs-wasm~=0.1.1", "numcodecs-wasm-bit-round~=0.2.0", + "numcodecs-wasm-fixed-offset-scale~=0.2.1", + "numcodecs-wasm-jpeg2000~=0.1.0", "numcodecs-wasm-pco~=0.1.0", "numcodecs-wasm-round~=0.2.0", - "numcodecs-wasm-sz3~=0.4.0", + "numcodecs-wasm-sz3~=0.5.0", "numcodecs-wasm-tthresh~=0.1.0", "numcodecs-wasm-uniform-noise~=0.2.0", "numcodecs-wasm-zfp~=0.4.0", diff --git a/src/climatebenchpress/compressor/compressors/jpeg2000.py b/src/climatebenchpress/compressor/compressors/jpeg2000.py new file mode 100644 index 0000000..835c5ea --- /dev/null +++ b/src/climatebenchpress/compressor/compressors/jpeg2000.py @@ -0,0 +1,31 @@ +__all__ = ["Jpeg2000"] + +import numcodecs.astype +import numcodecs_wasm_fixed_offset_scale +import numcodecs_wasm_jpeg2000 +import numcodecs_wasm_round +from numcodecs.abc import Codec +from numcodecs_combinators.stack import CodecStack + +from .abc import Compressor + + +class Jpeg2000(Compressor): + name = "jpeg2000" + description = "JPEG 2000" + + @staticmethod + def build() -> Codec: + precision = 0.01 + rate = 10.0 # x10 factor compression + + return CodecStack( + numcodecs_wasm_fixed_offset_scale.FixedOffsetScale( + offset=0, scale=precision, + ), + numcodecs_wasm_round.Round(precision=1), + numcodecs.astype.AsType( + encode_dtype="int32", decode_dtype="float32", + ), + numcodecs_wasm_jpeg2000.Jpeg2000(mode="rate", rate=rate), + ) From aef36b0169240eb2a0cf8750b6e68e0af1167e0a Mon Sep 17 00:00:00 2001 From: Juniper Tyree Date: Thu, 27 Mar 2025 13:34:00 +0200 Subject: [PATCH 2/5] Fix formatting --- src/climatebenchpress/compressor/compressors/jpeg2000.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/climatebenchpress/compressor/compressors/jpeg2000.py b/src/climatebenchpress/compressor/compressors/jpeg2000.py index 835c5ea..9e0e950 100644 --- a/src/climatebenchpress/compressor/compressors/jpeg2000.py +++ b/src/climatebenchpress/compressor/compressors/jpeg2000.py @@ -21,11 +21,13 @@ def build() -> Codec: return CodecStack( numcodecs_wasm_fixed_offset_scale.FixedOffsetScale( - offset=0, scale=precision, + offset=0, + scale=precision, ), numcodecs_wasm_round.Round(precision=1), numcodecs.astype.AsType( - encode_dtype="int32", decode_dtype="float32", + encode_dtype="int32", + decode_dtype="float32", ), numcodecs_wasm_jpeg2000.Jpeg2000(mode="rate", rate=rate), ) From 58de24340e9002b4a26a8cea5b7f8f953dd07172 Mon Sep 17 00:00:00 2001 From: Juniper Tyree <50025784+juntyr@users.noreply.github.com> Date: Thu, 27 Mar 2025 21:19:14 +0200 Subject: [PATCH 3/5] Update numcodecs-wasm-jpeg2000 to v0.1.1 --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 26bc770..ba35b02 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -13,7 +13,7 @@ dependencies = [ "numcodecs-wasm~=0.1.1", "numcodecs-wasm-bit-round~=0.2.0", "numcodecs-wasm-fixed-offset-scale~=0.2.1", - "numcodecs-wasm-jpeg2000~=0.1.0", + "numcodecs-wasm-jpeg2000~=0.1.1", "numcodecs-wasm-pco~=0.1.0", "numcodecs-wasm-round~=0.2.0", "numcodecs-wasm-sz3~=0.5.0", From 18313b5abec1bb3e0bdf2d23fe0706c5efd836f1 Mon Sep 17 00:00:00 2001 From: Juniper Tyree Date: Fri, 28 Mar 2025 16:15:49 +0200 Subject: [PATCH 4/5] Ducktape over deadlock --- scripts/compress.py | 5 ++++- src/climatebenchpress/compressor/compressors/__init__.py | 3 ++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/scripts/compress.py b/scripts/compress.py index 5f7c365..3d47eec 100644 --- a/scripts/compress.py +++ b/scripts/compress.py @@ -2,6 +2,7 @@ import json from pathlib import Path +import dask import numcodecs_observers import xarray as xr from climatebenchpress.compressor.compressors.abc import Compressor @@ -80,7 +81,9 @@ def compress_decompress(codec: Codec, ds: xr.Dataset) -> tuple[xr.Dataset, dict] timing, ], ) as codec_: - variables[v] = codec_.encode_decode_data_array(ds[v]).compute() + # FIXME: allow compressing chunks in parallel + with dask.config.set(scheduler="synchronous"): + variables[v] = codec_.encode_decode_data_array(ds[v]).compute() measurements[v] = { "encoded_bytes": sum( diff --git a/src/climatebenchpress/compressor/compressors/__init__.py b/src/climatebenchpress/compressor/compressors/__init__.py index 9ecd244..f3e899c 100644 --- a/src/climatebenchpress/compressor/compressors/__init__.py +++ b/src/climatebenchpress/compressor/compressors/__init__.py @@ -1,8 +1,9 @@ -__all__ = ["BitRound", "BitRoundPco", "StochRound", "Sz3", "Tthresh", "Zfp"] +__all__ = ["BitRound", "BitRoundPco", "Jpeg2000", "StochRound", "Sz3", "Tthresh", "Zfp"] from . import abc as abc from .bitround import BitRound from .bitround_pco import BitRoundPco +from .jpeg2000 import Jpeg2000 from .stochround import StochRound from .sz3 import Sz3 from .tthresh import Tthresh From f100b0e5446a688d3911f86288dd891b8500629b Mon Sep 17 00:00:00 2001 From: Juniper Tyree Date: Mon, 31 Mar 2025 10:06:23 +0300 Subject: [PATCH 5/5] Bump numcodecs-wasm to v0.1.3 to fix deadlocks --- pyproject.toml | 2 +- scripts/compress.py | 5 +---- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index ba35b02..6992b06 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -10,7 +10,7 @@ dependencies = [ "numcodecs>=0.13.0,<0.16", "numcodecs-combinators[xarray]~=0.2.4", "numcodecs-observers~=0.1.1", - "numcodecs-wasm~=0.1.1", + "numcodecs-wasm~=0.1.3", "numcodecs-wasm-bit-round~=0.2.0", "numcodecs-wasm-fixed-offset-scale~=0.2.1", "numcodecs-wasm-jpeg2000~=0.1.1", diff --git a/scripts/compress.py b/scripts/compress.py index 3d47eec..5f7c365 100644 --- a/scripts/compress.py +++ b/scripts/compress.py @@ -2,7 +2,6 @@ import json from pathlib import Path -import dask import numcodecs_observers import xarray as xr from climatebenchpress.compressor.compressors.abc import Compressor @@ -81,9 +80,7 @@ def compress_decompress(codec: Codec, ds: xr.Dataset) -> tuple[xr.Dataset, dict] timing, ], ) as codec_: - # FIXME: allow compressing chunks in parallel - with dask.config.set(scheduler="synchronous"): - variables[v] = codec_.encode_decode_data_array(ds[v]).compute() + variables[v] = codec_.encode_decode_data_array(ds[v]).compute() measurements[v] = { "encoded_bytes": sum(