diff --git a/docs/reference/parsers/empaia.md b/docs/reference/parsers/empaia.md new file mode 100644 index 0000000..7734d36 --- /dev/null +++ b/docs/reference/parsers/empaia.md @@ -0,0 +1,4 @@ +# ratiopath.parsers.EMPAIAParser + +::: ratiopath.parsers.EMPAIAParser + diff --git a/pyproject.toml b/pyproject.toml index db35120..4b6e514 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,11 +1,12 @@ [project] name = "ratiopath" -version = "1.3.0" +version = "1.3.1" description = "A library for efficient processing and analysis of whole-slide pathology images." authors = [ { name = "Matěj Pekár", email = "matejpekar@mail.muni.cz" }, { name = "Jakub Pekár", email = "jakubpekar@mail.muni.cz" }, { name = "Adam Kukučka", email = "adamkukucka@mail.muni.cz" }, + { name = "Vít Musil", email = "musil@fi.muni.cz" }, ] readme = "README.md" license = "MIT" diff --git a/ratiopath/parsers/__init__.py b/ratiopath/parsers/__init__.py index 9aa5761..4d3e091 100644 --- a/ratiopath/parsers/__init__.py +++ b/ratiopath/parsers/__init__.py @@ -1,6 +1,7 @@ from ratiopath.parsers.asap_parser import ASAPParser from ratiopath.parsers.darwin7_json_parser import Darwin7JSONParser +from ratiopath.parsers.empaia_parser import EMPAIAParser from ratiopath.parsers.geojson_parser import GeoJSONParser -__all__ = ["ASAPParser", "Darwin7JSONParser", "GeoJSONParser"] +__all__ = ["ASAPParser", "Darwin7JSONParser", "EMPAIAParser", "GeoJSONParser"] diff --git a/ratiopath/parsers/asap_parser.py b/ratiopath/parsers/asap_parser.py index 601b70e..06081a5 100644 --- a/ratiopath/parsers/asap_parser.py +++ b/ratiopath/parsers/asap_parser.py @@ -6,7 +6,7 @@ from pathlib import Path from typing import TextIO -from shapely.geometry import Point, Polygon +from shapely import Point, Polygon class ASAPParser: @@ -16,7 +16,7 @@ class ASAPParser: This parser supports both polygon and point annotations. """ - def __init__(self, file_path: Path | str | TextIO): + def __init__(self, file_path: Path | str | TextIO) -> None: self.tree = ET.parse(file_path) self.root = self.tree.getroot() diff --git a/ratiopath/parsers/empaia_parser.py b/ratiopath/parsers/empaia_parser.py new file mode 100644 index 0000000..e426bc3 --- /dev/null +++ b/ratiopath/parsers/empaia_parser.py @@ -0,0 +1,78 @@ +import json +import re +from collections.abc import Iterable +from pathlib import Path +from typing import TextIO + +from shapely import Point, Polygon + + +class EMPAIAParser: + """Parser for EMPAIA format annotation files. + + EMPAIA uses JSON format for storing annotations. This parser supports + both polygon and point geometry features from the EMPAIA standardized schema. + """ + + def __init__(self, file_path: Path | str | TextIO) -> None: + """Initialize the EMPAIA parser. + + Args: + file_path: Path to the EMPAIA JSON annotation file or a file-like object. + """ + if isinstance(file_path, Path | str): + with open(file_path) as f: + self.annotations = json.load(f) + else: + self.annotations = json.load(file_path) + + def _get_filtered_annotations( + self, name: str, annotation_type: str + ) -> Iterable[dict]: + """Get annotations that match the provided regex filters. + + Args: + name: Regex pattern to match annotation names. + annotation_type: Type of annotation to match (e.g., 'polygon', 'point'). + + Yields: + Dictionary annotation elements that match the filters. + """ + name_regex = re.compile(name) + for annotation in self.annotations["items"]: + if ( + name_regex.match(annotation["name"]) + and annotation["type"] == annotation_type + ): + yield annotation + + def get_polygons(self, name: str = ".*") -> Iterable[Polygon]: + """Get polygon annotations that match the given name pattern. + + Args: + name: Regex pattern to match annotation names. Default is ".*" (all). + + Yields: + Polygon representations of the matching annotations. + """ + for annotation in self._get_filtered_annotations(name, "polygon"): + yield Polygon( + [ + (float(coordinate[0]), float(coordinate[1])) + for coordinate in annotation["coordinates"] + ] + ) + + def get_points(self, name: str = ".*") -> Iterable[Point]: + """Get point annotations that match the given name pattern. + + Args: + name: Regex pattern to match annotation names. Default is ".*" (all). + + Yields: + Point representations of the matching annotations. + """ + for annotation in self._get_filtered_annotations(name, "point"): + yield Point( + float(annotation["coordinates"][0]), float(annotation["coordinates"][1]) + ) diff --git a/tests/test_parsers.py b/tests/test_parsers.py index fdecfb1..b5860d2 100644 --- a/tests/test_parsers.py +++ b/tests/test_parsers.py @@ -6,7 +6,7 @@ import pandas as pd import pytest -from ratiopath.parsers import ASAPParser, Darwin7JSONParser, GeoJSONParser +from ratiopath.parsers import ASAPParser, Darwin7JSONParser, EMPAIAParser, GeoJSONParser class TestASAPParser: @@ -307,6 +307,83 @@ def test_extract_nested(self): ) +class TestEMPAIAParser: + """Test the EMPAIA parser.""" + + @pytest.fixture + def empaia_json_content(self): + """Sample EMPAIA JSON content.""" + return { + "items_count": 2, + "items": [ + { + "name": "Annotation 1", + "type": "polygon", + "coordinates": [[100.0, 200.0], [150.0, 200.0], [125.0, 250.0]], + }, + { + "name": "Annotation 2", + "type": "point", + "coordinates": [300.0, 400.0], + }, + ], + } + + def test_get_polygons(self, empaia_json_content): + """Test parsing polygons from EMPAIA JSON.""" + f = io.StringIO(json.dumps(empaia_json_content)) + + parser = EMPAIAParser(f) + polygons = list(parser.get_polygons()) + + assert len(polygons) == 1 + # Check that we have a polygon-like object + polygon = polygons[0] + assert hasattr(polygon, "exterior") + assert list(polygon.exterior.coords) == [ + (100.0, 200.0), + (150.0, 200.0), + (125.0, 250.0), + (100.0, 200.0), + ] + + def test_get_points(self, empaia_json_content): + """Test parsing points from EMPAIA JSON.""" + f = io.StringIO(json.dumps(empaia_json_content)) + + parser = EMPAIAParser(f) + points = list(parser.get_points()) + + assert len(points) == 1 + # Check that we have a point-like object + point = points[0] + assert hasattr(point, "x") and hasattr(point, "y") + assert point.x == 300.0 + assert point.y == 400.0 + + def test_get_polygons_with_filters(self, empaia_json_content): + """Test parsing polygons with filters.""" + f = io.StringIO(json.dumps(empaia_json_content)) + parser = EMPAIAParser(f) + + polygons = list(parser.get_polygons(name="Annotation 1")) + assert len(polygons) == 1 + + polygons = list(parser.get_polygons(name="Nonexistent")) + assert len(polygons) == 0 + + def test_get_points_with_filters(self, empaia_json_content): + """Test parsing points with filters.""" + f = io.StringIO(json.dumps(empaia_json_content)) + parser = EMPAIAParser(f) + + points = list(parser.get_points(name="Annotation 2")) + assert len(points) == 1 + + points = list(parser.get_points(name="Nonexistent")) + assert len(points) == 0 + + def test_safe_to_dict(): """Test the safe_to_dict utility function.""" from ratiopath.parsers.geojson_parser import safe_to_dict diff --git a/uv.lock b/uv.lock index 072407b..6e32abc 100644 --- a/uv.lock +++ b/uv.lock @@ -1390,7 +1390,7 @@ wheels = [ [[package]] name = "ratiopath" -version = "1.3.0" +version = "1.3.1" source = { virtual = "." } dependencies = [ { name = "albumentations" },