From 17ddc18521faeac3a16cc967cca1b9187a654faf Mon Sep 17 00:00:00 2001 From: Jayaram Kancherla Date: Mon, 30 Mar 2026 11:45:25 -0700 Subject: [PATCH 1/8] use the latest versions for the rds2cpp library --- lib/CMakeLists.txt | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/lib/CMakeLists.txt b/lib/CMakeLists.txt index 96a7421..14470b7 100644 --- a/lib/CMakeLists.txt +++ b/lib/CMakeLists.txt @@ -11,16 +11,23 @@ include(FetchContent) FetchContent_Declare( rds2cpp GIT_REPOSITORY https://github.com/LTLA/rds2cpp - GIT_TAG v1.1.0 + GIT_TAG master ) FetchContent_Declare( byteme GIT_REPOSITORY https://github.com/LTLA/byteme - GIT_TAG v1.2.2 + GIT_TAG master +) + +FetchContent_Declare( + sanisizer + GIT_REPOSITORY https://github.com/LTLA/sanisizer + GIT_TAG master ) FetchContent_MakeAvailable(byteme) +FetchContent_MakeAvailable(sanisizer) FetchContent_MakeAvailable(rds2cpp) # Defining the targets. From 9a4c9244b0d4069e95de933a18582fff4446762c Mon Sep 17 00:00:00 2001 From: Jayaram Kancherla Date: Mon, 30 Mar 2026 11:50:45 -0700 Subject: [PATCH 2/8] generate the test files in Rdata format --- tests/data/dataframe.RData | Bin 0 -> 225 bytes tests/data/generate_rdata.R | 36 +++++++++++++++++++++++++++++++++ tests/data/list.RData | Bin 0 -> 164 bytes tests/data/matrix.RData | Bin 0 -> 117 bytes tests/data/mixed.RData | Bin 0 -> 222 bytes tests/data/simple.RData | Bin 0 -> 161 bytes tests/data/single_object.RData | Bin 0 -> 121 bytes 7 files changed, 36 insertions(+) create mode 100644 tests/data/dataframe.RData create mode 100644 tests/data/generate_rdata.R create mode 100644 tests/data/list.RData create mode 100644 tests/data/matrix.RData create mode 100644 tests/data/mixed.RData create mode 100644 tests/data/simple.RData create mode 100644 tests/data/single_object.RData diff --git a/tests/data/dataframe.RData b/tests/data/dataframe.RData new file mode 100644 index 0000000000000000000000000000000000000000..58ea28401a3b6ca0dc1fcc5484330ecf4a3e910d GIT binary patch literal 225 zcmV<703QDziwFP!000000}FDAFy@NjVqjokW?*4uVqj(kG8tGyL)>&N7#LWXfE-2! z76wir&0dmPToRv>22?5xR?ooj4v3MJ@FnNx79=K@#AoJ}6sH!V$+9FR7N{JS!Wc;~Mly_% z0%N42`It2?F*mgsY7#R{RRxSu31d{D>r2i_EQaZW338<*mL%$>6#&N7#LWXfE-2! z76wir%~_IKToRv?SzN-vz$6UT!NBkih>;cXCFkcBBqo={XXcd@rxv2gvLq!Ir$WVe z!Qv45A4n(2LQcjU*$So$r+BN=QV^XpnJZ6JzsJI2$`D zDn@Aj?1-Q#kp~YPIKnORh|!zbGU2boSB1R}XV{LsaX76 T+LPh-hdGg<TWXaZjJ+Gv06f~m(XLX+{J#u`#iVMllNs$W) z_yZ;B^vj^Nh3TcdvQ#FzEx$Qga$9$cQ)@P1mYm$rsaVN{ YW2daRX6!cym(296KijhKimL$t0IDcuQvd(} literal 0 HcmV?d00001 diff --git a/tests/data/simple.RData b/tests/data/simple.RData new file mode 100644 index 0000000000000000000000000000000000000000..b93171641fffa1bbba5a86d877d5c18d89236e02 GIT binary patch literal 161 zcmV;S0ABweiwFP!000000}FDAFy@NjVqjokW?*4uVqj(kG8tGyL)>&N7#LWXfE-2! z76wir&7PT85?_{@3}o_x)PSWJ7?{9%fodUQs9I8za-drHK+^UfXM(^i2SzB(l?Db5 zf>6E~0~DYdQ(RI6HAVp9Qm`;QcjU*$So$r+BN=QV^XpnJZ6JzsJI2$@= z=Ja`!rRL9{!un({b5cs=z9)a~G#z;|>BpHplOA1hGKrkF Date: Mon, 30 Mar 2026 11:51:40 -0700 Subject: [PATCH 3/8] include the new options for parse rds --- lib/src/rdswrapper.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lib/src/rdswrapper.cpp b/lib/src/rdswrapper.cpp index 1b52da2..3438fb6 100644 --- a/lib/src/rdswrapper.cpp +++ b/lib/src/rdswrapper.cpp @@ -148,7 +148,8 @@ class RdsObject { public: RdsObject(const std::string& file) { try { - parsed = std::make_unique(rds2cpp::parse_rds(file)); + rds2cpp::ParseRdsOptions options; + parsed = std::make_unique(rds2cpp::parse_rds(file, options)); if (!parsed || !parsed->object) { throw std::runtime_error("Failed to parse RDS file"); } From 45894a489cebd98aecaa8eed3e1df419d9295050 Mon Sep 17 00:00:00 2001 From: Jayaram Kancherla Date: Mon, 30 Mar 2026 12:14:47 -0700 Subject: [PATCH 4/8] basic read object --- lib/src/rdswrapper.cpp | 33 +++++++++++++++++++++++++ src/rds2py/PyRdaReader.py | 51 +++++++++++++++++++++++++++++++++++++++ tests/test_rdata.py | 14 +++++++++++ 3 files changed, 98 insertions(+) create mode 100644 src/rds2py/PyRdaReader.py create mode 100644 tests/test_rdata.py diff --git a/lib/src/rdswrapper.cpp b/lib/src/rdswrapper.cpp index 3438fb6..0550938 100644 --- a/lib/src/rdswrapper.cpp +++ b/lib/src/rdswrapper.cpp @@ -165,6 +165,35 @@ class RdsObject { } }; +class RdaObject { +private: + std::unique_ptr parsed; + +public: + RdaObject(const std::string& file) { + try { + rds2cpp::ParseRdaOptions options; + parsed = std::make_unique(rds2cpp::parse_rda(file, options)); + } catch (const std::exception& e) { + throw std::runtime_error(std::string("Error in 'RdaObject' constructor: ") + e.what()); + } + } + + py::list get_object_names() const { + if (!parsed) throw std::runtime_error("Null parsed in 'get_object_names'"); + const auto& pairlist = parsed->contents; + py::list names; + for (size_t i = 0; i < pairlist.tag_names.size(); ++i) { + if (pairlist.has_tag[i]) { + names.append(pairlist.tag_names[i]); + } else { + names.append(py::none()); + } + } + return names; + } +}; + PYBIND11_MODULE(lib_rds_parser, m) { py::register_exception(m, "RdsParserError"); @@ -172,6 +201,10 @@ PYBIND11_MODULE(lib_rds_parser, m) { .def(py::init()) .def("get_robject", &RdsObject::get_robject, py::return_value_policy::reference_internal); + py::class_(m, "RdaObject") + .def(py::init()) + .def("get_object_names", &RdaObject::get_object_names); + py::class_(m, "RdsReader") .def(py::init()) .def("get_rtype", &RdsReader::get_rtype) diff --git a/src/rds2py/PyRdaReader.py b/src/rds2py/PyRdaReader.py new file mode 100644 index 0000000..48976ac --- /dev/null +++ b/src/rds2py/PyRdaReader.py @@ -0,0 +1,51 @@ +"""Low-level interface for reading RData files. + +This module provides the core functionality for parsing RData (.RData/.rda) files +and converting them into dictionary representations that can be further processed +by higher-level functions. +""" + +from .lib_rds_parser import RdaObject + +__author__ = "jkanche" +__copyright__ = "jkanche" +__license__ = "MIT" + + +class PyRdaParserError(Exception): + """Exception raised for errors during RData parsing.""" + + pass + + +class PyRdaParser: + """Parser for reading RData files. + + This class provides low-level access to RData file contents, handling the binary + format and converting it into Python data structures. It reuses the same + ``RdsReader``-based object processing from :py:class:`~.PyRdsParser`. + + Attributes: + rda_object: + Internal representation of the RData file. + """ + + def __init__(self, file_path: str): + """Initialize the parser. + + Args: + file_path: + Path to the RData file to be read. + """ + try: + self.rda_object = RdaObject(file_path) + except Exception as e: + raise PyRdaParserError(f"Error initializing 'PyRdaParser': {str(e)}") + + def get_object_names(self): + """Get the names of all objects stored in the RData file. + + Returns: + A list of object names (strings). + """ + return list(self.rda_object.get_object_names()) diff --git a/tests/test_rdata.py b/tests/test_rdata.py new file mode 100644 index 0000000..309da00 --- /dev/null +++ b/tests/test_rdata.py @@ -0,0 +1,14 @@ +"""Tests for RData (.RData/.rda) file reading.""" + +from rds2py.PyRdaReader import PyRdaParser + +__author__ = "jkanche" +__copyright__ = "jkanche" +__license__ = "MIT" + + +class TestPyRdaParser: + def test_object_names(self): + parser = PyRdaParser("tests/data/simple.RData") + names = parser.get_object_names() + assert set(names) == {"int_vec", "dbl_vec", "str_vec", "bool_vec"} From 931e305c0f7cc9df971b4f62f06c730323edaa0c Mon Sep 17 00:00:00 2001 From: Jayaram Kancherla Date: Mon, 30 Mar 2026 19:00:20 -0700 Subject: [PATCH 5/8] finally able to parse rdata files --- lib/src/rdswrapper.cpp | 30 +++++++++- src/rds2py/PyRdaReader.py | 62 +++++++++++++++++++- src/rds2py/__init__.py | 5 +- src/rds2py/generics.py | 31 +++++++++- src/rds2py/rdsutils.py | 35 ++++++++++- tests/test_rdata.py | 119 +++++++++++++++++++++++++++++++++++++- 6 files changed, 272 insertions(+), 10 deletions(-) diff --git a/lib/src/rdswrapper.cpp b/lib/src/rdswrapper.cpp index 0550938..2ed6aa2 100644 --- a/lib/src/rdswrapper.cpp +++ b/lib/src/rdswrapper.cpp @@ -192,6 +192,31 @@ class RdaObject { } return names; } + + int get_object_count() const { + if (!parsed) throw std::runtime_error("Null parsed in 'get_object_count'"); + return static_cast(parsed->contents.data.size()); + } + + RdsReader* get_object_by_index(int index) const { + if (!parsed) throw std::runtime_error("Null parsed in 'get_object_by_index'"); + const auto& data = parsed->contents.data; + if (index < 0 || static_cast(index) >= data.size()) { + throw std::out_of_range("Object index out of range"); + } + return new RdsReader(data[index].get()); + } + + RdsReader* get_object_by_name(const std::string& name) const { + if (!parsed) throw std::runtime_error("Null parsed in 'get_object_by_name'"); + const auto& pairlist = parsed->contents; + for (size_t i = 0; i < pairlist.tag_names.size(); ++i) { + if (pairlist.has_tag[i] && pairlist.tag_names[i] == name) { + return new RdsReader(pairlist.data[i].get()); + } + } + throw std::runtime_error("Object not found: " + name); + } }; PYBIND11_MODULE(lib_rds_parser, m) { @@ -203,7 +228,10 @@ PYBIND11_MODULE(lib_rds_parser, m) { py::class_(m, "RdaObject") .def(py::init()) - .def("get_object_names", &RdaObject::get_object_names); + .def("get_object_names", &RdaObject::get_object_names) + .def("get_object_count", &RdaObject::get_object_count) + .def("get_object_by_index", &RdaObject::get_object_by_index, py::return_value_policy::take_ownership, py::keep_alive<0, 1>()) + .def("get_object_by_name", &RdaObject::get_object_by_name, py::return_value_policy::take_ownership, py::keep_alive<0, 1>()); py::class_(m, "RdsReader") .def(py::init()) diff --git a/src/rds2py/PyRdaReader.py b/src/rds2py/PyRdaReader.py index 48976ac..ad8d266 100644 --- a/src/rds2py/PyRdaReader.py +++ b/src/rds2py/PyRdaReader.py @@ -5,7 +5,10 @@ by higher-level functions. """ -from .lib_rds_parser import RdaObject +from typing import Any, Dict + +from .lib_rds_parser import RdaObject, RdsReader +from .PyRdsReader import PyRdsParser __author__ = "jkanche" __copyright__ = "jkanche" @@ -49,3 +52,60 @@ def get_object_names(self): A list of object names (strings). """ return list(self.rda_object.get_object_names()) + + def get_object_count(self) -> int: + """Get the number of objects stored in the RData file. + + Returns: + Number of objects. + """ + return self.rda_object.get_object_count() + + def parse(self) -> Dict[str, Dict[str, Any]]: + """Parse all objects in the RData file. + + Returns: + A dictionary mapping object names to their parsed representations. + Each value has the same structure as the output of + :py:meth:`~rds2py.PyRdsReader.PyRdsParser.parse`. + """ + try: + helper = _RdsProcessorHelper() + + result = {} + names = self.get_object_names() + for i, name in enumerate(names): + reader = self.rda_object.get_object_by_index(i) + key = name if name is not None else f"__unnamed_{i}" + result[key] = helper._process_object(reader) + + return result + except Exception as e: + raise PyRdaParserError(f"Error parsing RData file: {str(e)}") + + def parse_object(self, name: str) -> Dict[str, Any]: + """Parse a single named object from the RData file. + + Args: + name: + Name of the object to parse. + + Returns: + A dictionary containing the parsed data for the requested object. + """ + try: + helper = _RdsProcessorHelper() + reader = self.rda_object.get_object_by_name(name) + return helper._process_object(reader) + except Exception as e: + raise PyRdaParserError(f"Error parsing object '{name}': {str(e)}") + + +class _RdsProcessorHelper(PyRdsParser): + """Helper that reuses PyRdsParser's object processing without requiring a file.""" + + def __init__(self): + self.R_MIN = -2147483648 + + def _process_object(self, obj: RdsReader) -> Dict[str, Any]: + return super()._process_object(obj) diff --git a/src/rds2py/__init__.py b/src/rds2py/__init__.py index f64e9e8..73136dc 100644 --- a/src/rds2py/__init__.py +++ b/src/rds2py/__init__.py @@ -15,5 +15,6 @@ finally: del version, PackageNotFoundError -from .generics import read_rds -from .rdsutils import parse_rds + +from .generics import read_rds, read_rda +from .rdsutils import parse_rds, parse_rda \ No newline at end of file diff --git a/src/rds2py/generics.py b/src/rds2py/generics.py index 50501e5..a448f26 100644 --- a/src/rds2py/generics.py +++ b/src/rds2py/generics.py @@ -16,9 +16,10 @@ """ from importlib import import_module +from typing import List, Optional from warnings import warn -from .rdsutils import get_class, parse_rds +from .rdsutils import get_class, parse_rda, parse_rds __author__ = "jkanche" __copyright__ = "jkanche" @@ -105,6 +106,34 @@ def read_rds(path: str, **kwargs): return _dispatcher(_robj, **kwargs) +def read_rda(path: str, objects: Optional[List[str]] = None, **kwargs) -> dict: + """Read an RData file and convert each object to an appropriate Python type. + + This function parses all (or selected) objects and dispatches each one + through the same type registry used by :py:func:`~.read_rds`. + + Args: + path: + Path to the RData (.RData/.rda) file to be read. + + objects: + Optional list of object names to read. If ``None``, + all objects in the file are read. + + **kwargs: + Additional arguments passed to specific parser functions. + + Returns: + A dictionary mapping object names to their converted Python + representations. + """ + parsed = parse_rda(path=path, objects=objects) + result = {} + for name, robj in parsed.items(): + result[name] = _dispatcher(robj, **kwargs) + return result + + def _dispatcher(robject: dict, **kwargs): """Internal function to dispatch R objects to appropriate parser functions. diff --git a/src/rds2py/rdsutils.py b/src/rds2py/rdsutils.py index 82f52a3..2359b43 100644 --- a/src/rds2py/rdsutils.py +++ b/src/rds2py/rdsutils.py @@ -1,9 +1,12 @@ -"""Utility functions for RDS file parsing and class inference. +"""Utility functions for RDS/RData file parsing and class inference. -This module provides helper functions for parsing RDS files and inferring the appropriate R class information from -parsed objects. +This module provides helper functions for parsing RDS and RData files and inferring the appropriate R class +information from parsed objects. """ +from typing import Dict, List, Optional + +from .PyRdaReader import PyRdaParser from .PyRdsReader import PyRdsParser __author__ = "jkanche" @@ -28,6 +31,32 @@ def parse_rds(path: str) -> dict: return realized +def parse_rda(path: str, objects: Optional[List[str]] = None) -> Dict[str, dict]: + """Parse an RData file into a dictionary of named objects. + + Args: + path: + Path to the RData (.RData/.rda) file to be parsed. + + objects: + Optional list of object names to parse. If ``None``, + all objects in the file are parsed. + + Returns: + A dictionary mapping object names to their parsed representations. + Each value has the same structure as the output of :py:func:`~.parse_rds`. + """ + parser = PyRdaParser(path) + + if objects is None: + return parser.parse() + + result = {} + for name in objects: + result[name] = parser.parse_object(name) + return result + + def get_class(robj: dict) -> str: """Infer the R class name from a parsed RDS object. diff --git a/tests/test_rdata.py b/tests/test_rdata.py index 309da00..3077866 100644 --- a/tests/test_rdata.py +++ b/tests/test_rdata.py @@ -1,6 +1,8 @@ -"""Tests for RData (.RData/.rda) file reading.""" +import numpy as np +import pytest -from rds2py.PyRdaReader import PyRdaParser +from rds2py import parse_rda, read_rda +from rds2py.PyRdaReader import PyRdaParser, PyRdaParserError __author__ = "jkanche" __copyright__ = "jkanche" @@ -12,3 +14,116 @@ def test_object_names(self): parser = PyRdaParser("tests/data/simple.RData") names = parser.get_object_names() assert set(names) == {"int_vec", "dbl_vec", "str_vec", "bool_vec"} + + def test_object_count(self): + parser = PyRdaParser("tests/data/simple.RData") + assert parser.get_object_count() == 4 + + def test_parse_all(self): + parser = PyRdaParser("tests/data/simple.RData") + result = parser.parse() + assert isinstance(result, dict) + assert set(result.keys()) == {"int_vec", "dbl_vec", "str_vec", "bool_vec"} + + def test_parse_single_object(self): + parser = PyRdaParser("tests/data/simple.RData") + obj = parser.parse_object("int_vec") + assert obj["type"] == "integer" + + def test_parse_missing_object(self): + parser = PyRdaParser("tests/data/simple.RData") + with pytest.raises(PyRdaParserError): + parser.parse_object("nonexistent") + + def test_invalid_file(self): + with pytest.raises(PyRdaParserError): + PyRdaParser("tests/data/nonexistent.RData") + + def test_single_object_file(self): + parser = PyRdaParser("tests/data/single_object.RData") + names = parser.get_object_names() + assert names == ["single_obj"] + assert parser.get_object_count() == 1 + + +class TestParseRda: + def test_parse_all_objects(self): + result = parse_rda("tests/data/simple.RData") + assert isinstance(result, dict) + assert "int_vec" in result + assert "dbl_vec" in result + assert "str_vec" in result + assert "bool_vec" in result + + def test_parse_selected_objects(self): + result = parse_rda("tests/data/simple.RData", objects=["int_vec", "dbl_vec"]) + assert set(result.keys()) == {"int_vec", "dbl_vec"} + + def test_integer_data(self): + result = parse_rda("tests/data/simple.RData", objects=["int_vec"]) + obj = result["int_vec"] + assert obj["type"] == "integer" + data = obj["data"] + np.testing.assert_array_equal(data, [1, 2, 3, 4, 5]) + + def test_double_data(self): + result = parse_rda("tests/data/simple.RData", objects=["dbl_vec"]) + obj = result["dbl_vec"] + assert obj["type"] == "double" + data = obj["data"] + np.testing.assert_array_almost_equal(data, [1.1, 2.2, 3.3, 4.4, 5.5]) + + def test_string_data(self): + result = parse_rda("tests/data/simple.RData", objects=["str_vec"]) + obj = result["str_vec"] + assert obj["type"] == "string" + assert list(obj["data"]) == ["hello", "world", "foo"] + + def test_boolean_data(self): + result = parse_rda("tests/data/simple.RData", objects=["bool_vec"]) + obj = result["bool_vec"] + assert obj["type"] == "boolean" + + def test_single_object_file(self): + result = parse_rda("tests/data/single_object.RData") + assert "single_obj" in result + obj = result["single_obj"] + assert obj["type"] == "integer" + + def test_mixed_types(self): + result = parse_rda("tests/data/mixed.RData") + assert "nums" in result + assert "chars" in result + assert "ints" in result + assert "nested_list" in result + + assert result["nums"]["type"] == "double" + assert result["chars"]["type"] == "string" + assert result["ints"]["type"] == "integer" + assert result["nested_list"]["type"] == "vector" + + +class TestReadRda: + def test_read_all(self): + result = read_rda("tests/data/simple.RData") + assert isinstance(result, dict) + assert len(result) == 4 + + def test_read_selected(self): + result = read_rda("tests/data/simple.RData", objects=["int_vec"]) + assert set(result.keys()) == {"int_vec"} + + def test_read_list(self): + result = read_rda("tests/data/list.RData") + assert "test_list" in result + + def test_read_mixed(self): + result = read_rda("tests/data/mixed.RData") + assert "nums" in result + assert "chars" in result + assert "ints" in result + assert "nested_list" in result + + def test_read_single_object(self): + result = read_rda("tests/data/single_object.RData") + assert "single_obj" in result From 08527baf305bd1f33cb4335fb5f8faf8e8366648 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 31 Mar 2026 02:02:25 +0000 Subject: [PATCH 6/8] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/rds2py/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/rds2py/__init__.py b/src/rds2py/__init__.py index 73136dc..58dee0f 100644 --- a/src/rds2py/__init__.py +++ b/src/rds2py/__init__.py @@ -17,4 +17,4 @@ from .generics import read_rds, read_rda -from .rdsutils import parse_rds, parse_rda \ No newline at end of file +from .rdsutils import parse_rds, parse_rda From 09996fa32578955ba3f2379e75c1d037238838bb Mon Sep 17 00:00:00 2001 From: Jayaram Kancherla Date: Mon, 30 Mar 2026 19:07:50 -0700 Subject: [PATCH 7/8] Update changelog --- CHANGELOG.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6bf6c55..5723201 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,10 @@ # Changelog +## Version 0.9.0 + +- Added support to parse .Rdata/.rda files. +- Bump the version of rds2cpp library. + ## Version 0.8.0 - 0.8.1 - Implement parsers for compressed list objects. From 9b01b58a9b0acddf039d3dc1020a838362c2eeee Mon Sep 17 00:00:00 2001 From: Jayaram Kancherla Date: Mon, 30 Mar 2026 19:14:45 -0700 Subject: [PATCH 8/8] update readme, clean up setup config --- README.md | 38 ++++++++++---------------------------- setup.cfg | 2 +- 2 files changed, 11 insertions(+), 29 deletions(-) diff --git a/README.md b/README.md index 44c3f66..74f419a 100644 --- a/README.md +++ b/README.md @@ -4,18 +4,7 @@ # rds2py -Parse and construct Python representations for datasets stored in RDS files. `rds2py` supports various base classes from R, and Bioconductor's `SummarizedExperiment` and `SingleCellExperiment` S4 classes. **_For more details, check out [rds2cpp library](https://github.com/LTLA/rds2cpp)._** - ---- - -**Version 0.5.0** brings major changes to the package, - -- Complete overhaul of the codebase using pybind11 -- Streamlined readers for R data types -- Updated API for all classes and methods - -Please refer to the [documentation](https://biocpy.github.io/rds2py/) for the latest usage guidelines. Previous versions may have incompatible APIs. - +Parse and construct Python representations for datasets stored in **RDS or RData** files. `rds2py` supports various base classes from R, and Bioconductor's `SummarizedExperiment` and `SingleCellExperiment` S4 classes. **_For more details, check out [rds2cpp library](https://github.com/LTLA/rds2cpp)._** ## Installation @@ -32,36 +21,29 @@ By default, the package does not install packages to convert python representati ## Usage -If you do not have an RDS object handy, feel free to download one from [single-cell-test-files](https://github.com/jkanche/random-test-files/releases). +> [!NOTE] +> +> If you do not have an RDS object handy, feel free to download one from [single-cell-test-files](https://github.com/jkanche/random-test-files/releases). ```python -from rds2py import read_rds -r_obj = read_rds("path/to/file.rds") +from rds2py import read_rds, read_rda +r_obj = read_rds("path/to/file.rds") # or read_rda("path/to/file.rda") ``` The returned `r_obj` either returns an appropriate Python class if a parser is already implemented or returns the dictionary containing the data from the RDS file. -To just get the parsed dictionary representation of the RDS file, - -```python -from rds2py import parse_rds - -robject_dict = parse_rds("path/to/file.rds") -print(robject_dict) -``` - ### Write-your-own-reader -Reading RDS files as dictionary representations allows users to write their own custom readers into appropriate Python representations. +Reading RDS or RData files as dictionary representations allows users to write their own custom readers into appropriate Python representations. ```python -from rds2py import parse_rds +from rds2py import parse_rds, parse_rda -robject = parse_rds("path/to/file.rds") +robject = parse_rds("path/to/file.rds") # or use parse_rda for rdata files print(robject) ``` -if you know this RDS file contains an `GenomicRanges` object, you can use the built-in reader or write your own reader to convert this dictionary. +If you know this RDS file contains an `GenomicRanges` object, you can use the built-in reader or write your own reader to convert this dictionary. ```python from rds2py.read_granges import read_genomic_ranges diff --git a/setup.cfg b/setup.cfg index b853bae..c7dbc2a 100644 --- a/setup.cfg +++ b/setup.cfg @@ -62,7 +62,6 @@ exclude = # `pip install rds2py[PDF]` like: # PDF = ReportLab; RXP optional = - pandas hdf5array scipy biocframe @@ -72,6 +71,7 @@ optional = multiassayexperiment>=0.6.0 compressed_lists>=0.4.4 biocutils>=0.3.4 + compressed_lists # Add here test requirements (semicolon/line-separated) testing =