From 125b2c3e7b38a0ee02eaa16e3b93f3371cb7b6ab Mon Sep 17 00:00:00 2001 From: Maxim Zhiltsov Date: Wed, 26 Jan 2022 22:11:12 +0300 Subject: [PATCH 01/11] Improve simple merge --- datumaro/components/errors.py | 29 +++++- datumaro/components/media.py | 13 ++- datumaro/components/operations.py | 111 +++++++++++++++++------ datumaro/plugins/voc_format/extractor.py | 23 +++-- tests/test_dataset.py | 46 +++++++++- tests/test_images.py | 7 +- 6 files changed, 184 insertions(+), 45 deletions(-) diff --git a/datumaro/components/errors.py b/datumaro/components/errors.py index 627ebe2997..6cbf486fde 100644 --- a/datumaro/components/errors.py +++ b/datumaro/components/errors.py @@ -2,6 +2,8 @@ # # SPDX-License-Identifier: MIT +from typing import Any, Optional, Tuple + from attr import attrib, attrs @@ -223,14 +225,35 @@ def _my__init__(self, msg=None, *, sources=None): @attrs class MismatchingImageInfoError(DatasetMergeError): - item_id = attrib() - a = attrib() - b = attrib() + item_id: Optional[Tuple[str, str]] = attrib() + a: int = attrib() + b: int = attrib() def __str__(self): return "Item %s: mismatching image size info: %s vs %s" % \ (self.item_id, self.a, self.b) +@attrs +class MismatchingImagePathError(DatasetMergeError): + item_id: Optional[Tuple[str, str]] = attrib() + a: str = attrib() + b: str = attrib() + + def __str__(self): + return "Item %s: mismatching image path info: %s vs %s" % \ + (self.item_id, self.a, self.b) + +@attrs +class MismatchingAttributesError(DatasetMergeError): + item_id: Optional[Tuple[str, str]] = attrib() + key: str = attrib() + a: Any = attrib() + b: Any = attrib() + + def __str__(self): + return "Item %s: mismatching image attribute %s: %s vs %s" % \ + (self.item_id or '', self.key, self.a, self.b) + class ConflictingCategoriesError(DatasetMergeError): pass diff --git a/datumaro/components/media.py b/datumaro/components/media.py index e72afe35a4..1e5c53a126 100644 --- a/datumaro/components/media.py +++ b/datumaro/components/media.py @@ -49,9 +49,14 @@ def __init__(self, if size is not None: assert len(size) == 2 and 0 < size[0] and 0 < size[1], size size = tuple(map(int, size)) + self._size = size # (H, W) - if not self._size and isinstance(data, np.ndarray): - self._size = data.shape[:2] + + if isinstance(data, np.ndarray): + if not self._size: + self._size = data.shape[:2] + else: + assert self._size == data.shape[:2] assert path is None or isinstance(path, str), path if path is None: @@ -72,8 +77,8 @@ def __init__(self, self._ext = ext if not isinstance(data, np.ndarray): - assert path or callable(data), "Image can not be empty" - assert data is None or callable(data) + assert path or callable(data) or size, "Image can not be empty" + assert data is None or callable(data) or size if path and osp.isfile(path) or data: data = lazy_image(path, loader=data) self._data = data diff --git a/datumaro/components/operations.py b/datumaro/components/operations.py index ff8ef4fdbf..04c963df7a 100644 --- a/datumaro/components/operations.py +++ b/datumaro/components/operations.py @@ -4,7 +4,7 @@ from collections import OrderedDict from copy import deepcopy -from typing import Callable, Dict, Optional, Set, Tuple +from typing import Callable, Dict, Iterable, List, Optional, Set, Tuple from unittest import TestCase import hashlib import logging as log @@ -15,17 +15,19 @@ import numpy as np from datumaro.components.annotation import ( - AnnotationType, Bbox, Label, LabelCategories, MaskCategories, + Annotation, AnnotationType, Bbox, Label, LabelCategories, MaskCategories, PointsCategories, ) from datumaro.components.cli_plugin import CliPlugin from datumaro.components.dataset import Dataset, DatasetItemStorage, IDataset from datumaro.components.errors import ( AnnotationsTooCloseError, ConflictingCategoriesError, DatasetMergeError, - FailedAttrVotingError, FailedLabelVotingError, MismatchingImageInfoError, - NoMatchingAnnError, NoMatchingItemError, WrongGroupError, + FailedAttrVotingError, FailedLabelVotingError, MismatchingAttributesError, + MismatchingImageInfoError, MismatchingImagePathError, NoMatchingAnnError, + NoMatchingItemError, WrongGroupError, ) from datumaro.components.extractor import CategoriesInfo, DatasetItem +from datumaro.components.media import Image from datumaro.util import filter_dict, find from datumaro.util.annotation_util import ( OKS, approximate_line, bbox_iou, find_instances, max_bbox, mean_bbox, @@ -106,40 +108,95 @@ def merge(cls, *sources): def merge_items(cls, existing_item, current_item): return existing_item.wrap( image=cls.merge_images(existing_item, current_item), + attributes=cls.merge_attrs( + existing_item.attributes, current_item.attributes, + item_id=(existing_item.id, existing_item.subset)), annotations=cls.merge_anno( existing_item.annotations, current_item.annotations)) @staticmethod - def merge_images(existing_item, current_item): - image = None - if existing_item.has_image and current_item.has_image: - if existing_item.image.has_data: - image = existing_item.image + def merge_attrs(a: Dict, b: Dict, + item_id: Optional[Tuple[str, str]] = None) -> Dict: + merged = {} + + for name in set(a) | set(b): + a_val = a.get(name, None) + b_val = b.get(name, None) + + if name not in a: + m_val = b_val + elif name not in b: + m_val = a_val + elif a_val != b_val: + raise MismatchingAttributesError(item_id, name, a_val, b_val) else: - image = current_item.image - - if existing_item.image.path != current_item.image.path: - if not existing_item.image.path: - image._path = current_item.image.path - - if all([existing_item.image._size, current_item.image._size]): - if existing_item.image._size != current_item.image._size: - raise MismatchingImageInfoError( - (existing_item.id, existing_item.subset), - existing_item.image._size, current_item.image._size) - elif existing_item.image._size: - image._size = existing_item.image._size + m_val = a_val + + merged[name] = m_val + + return merged + + @staticmethod + def merge_images(item_a: DatasetItem, item_b: DatasetItem) -> Image: + image = None + + if item_a.has_image and item_b.has_image: + if item_a.image.path and item_b.image.path and \ + item_a.image.path != item_b.image.path and \ + item_a.image.has_data is item_b.image.has_data: + # We use has_data as a replacement for path existence check + # - If only one image has data, we'll use it. The other + # one is just a path metainfo, which is not significant + # in this case. + # - If both images have data or both don't, we need + # to compare paths. + # + # Different paths can aclually point to the same file, + # but it's not the case we'd like to allow here to be + # a "simple" merging strategy used for extractor joining + raise MismatchingImagePathError( + (item_a.id, item_a.subset), + item_a.image.path, item_b.image.path) + + if item_a.image.has_size and item_b.image.has_size and \ + item_a.image.size != item_b.image.size: + raise MismatchingImageInfoError( + (item_a.id, item_a.subset), + item_a.image.size, item_b.image.size) + + # Avoid direct comparison here for better performance + # If there are 2 "data-only" images, they won't be compared and + # we just use the first one + if item_a.image.has_data: + image = item_a.image + elif item_b.image.has_data: + image = item_b.image + elif item_a.image.path: + image = item_a.image + elif item_b.image.path: + image = item_b.image + elif item_a.image.has_size: + image = item_a.image + elif item_b.image.has_size: + image = item_b.image else: - image._size = current_item.image._size - elif existing_item.has_image: - image = existing_item.image + image = item_a.image + + if not image.has_data or not image.has_size: + if item_a.image._size: + image._size = item_a.image._size + elif item_b.image._size: + image._size = item_b.image._size + elif item_a.has_image: + image = item_a.image else: - image = current_item.image + image = item_b.image return image @staticmethod - def merge_anno(a, b): + def merge_anno(a: Iterable[Annotation], + b: Iterable[Annotation]) -> List[Annotation]: return merge_annotations_equal(a, b) @staticmethod diff --git a/datumaro/plugins/voc_format/extractor.py b/datumaro/plugins/voc_format/extractor.py index d57b2e25a4..97b60d3d81 100644 --- a/datumaro/plugins/voc_format/extractor.py +++ b/datumaro/plugins/voc_format/extractor.py @@ -130,12 +130,20 @@ def __init__(self, path, task): super().__init__(path, task) def __iter__(self): + image_dir = osp.join(self._dataset_dir, VocPath.IMAGES_DIR) + if osp.isdir(image_dir): + images = { + osp.splitext(osp.relpath(p, image_dir))[0].replace('\\', '/'): p + for p in find_images(image_dir, recursive=True) + } + else: + images = {} + anno_dir = osp.join(self._dataset_dir, VocPath.ANNOTATIONS_DIR) for item_id in self._items: log.debug("Reading item '%s'" % item_id) - image = item_id + VocPath.IMAGE_EXT - height, width = 0, 0 + size = None anns = [] ann_file = osp.join(anno_dir, item_id + '.xml') @@ -147,14 +155,17 @@ def __iter__(self): width = root_elem.find('size/width') if width is not None: width = int(width.text) + if height and width: + size = (height, width) filename_elem = root_elem.find('filename') if filename_elem is not None: - image = filename_elem.text + image = osp.join(image_dir, filename_elem.text) anns = self._parse_annotations(root_elem) + else: + image = images.pop(item_id, None) - image = osp.join(self._dataset_dir, VocPath.IMAGES_DIR, image) - if height and width: - image = Image(path=image, size=(height, width)) + if image or size: + image = Image(path=image, size=size) yield DatasetItem(id=item_id, subset=self._subset, image=image, annotations=anns) diff --git a/tests/test_dataset.py b/tests/test_dataset.py index f69f1216b4..f399fab35d 100644 --- a/tests/test_dataset.py +++ b/tests/test_dataset.py @@ -17,7 +17,9 @@ ) from datumaro.components.environment import Environment from datumaro.components.errors import ( - ConflictingCategoriesError, DatasetNotFoundError, MultipleFormatsMatchError, + ConflictingCategoriesError, DatasetNotFoundError, + MismatchingAttributesError, MismatchingImageInfoError, + MismatchingImagePathError, MultipleFormatsMatchError, NoMatchingFormatsError, RepeatedItemError, UnknownFormatError, ) from datumaro.components.extractor import ( @@ -390,14 +392,14 @@ def test_can_join_annotations(self): DatasetItem(id=1, subset='train', annotations=[ Label(1, id=3), Label(2, attributes={ 'x': 1 }), - ]) + ], attributes={'x': 1, 'y': 2}) ], categories=['a', 'b', 'c', 'd']) b = Dataset.from_iterable([ DatasetItem(id=1, subset='train', annotations=[ Label(2, attributes={ 'x': 1 }), Label(3, id=4), - ]) + ], attributes={'z': 3, 'y': 2}) ], categories=['a', 'b', 'c', 'd']) expected = Dataset.from_iterable([ @@ -405,7 +407,7 @@ def test_can_join_annotations(self): Label(1, id=3), Label(2, attributes={ 'x': 1 }), Label(3, id=4), - ]) + ], attributes={'x': 1, 'y': 2, 'z': 3}) ], categories=['a', 'b', 'c', 'd']) merged = Dataset.from_extractors(a, b) @@ -420,6 +422,42 @@ def test_cant_join_different_categories(self): with self.assertRaises(ConflictingCategoriesError): Dataset.from_extractors(s1, s2) + @mark_requirement(Requirements.DATUM_GENERAL_REQ) + def test_cant_join_different_image_info(self): + s1 = Dataset.from_iterable([ + DatasetItem(1, image=Image(path='1.png', size=(2, 4))) + ]) + s2 = Dataset.from_iterable([ + DatasetItem(1, image=Image(path='1.png', size=(4, 2))) + ]) + + with self.assertRaises(MismatchingImageInfoError): + Dataset.from_extractors(s1, s2) + + @mark_requirement(Requirements.DATUM_GENERAL_REQ) + def test_cant_join_different_images(self): + s1 = Dataset.from_iterable([ + DatasetItem(1, image=Image(path='1.png')) + ]) + s2 = Dataset.from_iterable([ + DatasetItem(1, image=Image(path='2.png')) + ]) + + with self.assertRaises(MismatchingImagePathError): + Dataset.from_extractors(s1, s2) + + @mark_requirement(Requirements.DATUM_GENERAL_REQ) + def test_cant_join_different_attrs(self): + s1 = Dataset.from_iterable([ + DatasetItem(1, attributes={'x': 1}) + ]) + s2 = Dataset.from_iterable([ + DatasetItem(1, attributes={'x': 2}) + ]) + + with self.assertRaises(MismatchingAttributesError): + Dataset.from_extractors(s1, s2) + @mark_requirement(Requirements.DATUM_GENERAL_REQ) def test_can_join_datasets(self): s1 = Dataset.from_iterable([ DatasetItem(0), DatasetItem(1) ]) diff --git a/tests/test_images.py b/tests/test_images.py index 829ff0dcbb..717ab5ca7e 100644 --- a/tests/test_images.py +++ b/tests/test_images.py @@ -98,11 +98,15 @@ def test_ctors(self): np.testing.assert_array_equal(img.data, image) self.assertEqual(img.size, tuple(image.shape[:2])) + with self.subTest(): + img = Image(size=(2, 4)) + self.assertEqual(img.size, (2, 4)) + @mark_requirement(Requirements.DATUM_GENERAL_REQ) def test_ctor_errors(self): with self.subTest('no data specified'): with self.assertRaisesRegex(Exception, "can not be empty"): - Image(ext='jpg', size=(1, 2)) + Image(ext='jpg') with self.subTest('either path or ext'): with self.assertRaisesRegex(Exception, "both 'path' and 'ext'"): @@ -135,6 +139,7 @@ def test_ctors(self): { 'data': image_bytes, 'path': path, 'size': (2, 4) }, { 'path': path }, { 'path': path, 'size': (2, 4) }, + { 'path': path, 'size': (2, 4) }, ]: with self.subTest(**args): img = ByteImage(**args) From 9249983e30166a06e9910cb954b2b09a250b51b4 Mon Sep 17 00:00:00 2001 From: Maxim Zhiltsov Date: Thu, 27 Jan 2022 10:56:08 +0300 Subject: [PATCH 02/11] Update changelog --- CHANGELOG.md | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index ce2db9e22c..54b8daabff 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -42,6 +42,12 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 () - OpenVINO telemetry library 2022.1.0 from PyPI. () +- Allowed `Image` creation from just `size` info + () +- Added image search in VOC XML-based subformats + () +- Added image path equality checks in simple merge, when applicable + () ### Deprecated - TBD @@ -70,6 +76,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Numeric warnings that sometimes occurred in `stats` command (e.g. ) () +- Added missing item attribute merging in simple merge + () ### Security - TBD From 41a19eb4cea958a2f74ae2a1782d89ec2f36f5ab Mon Sep 17 00:00:00 2001 From: Maxim Zhiltsov Date: Thu, 27 Jan 2022 11:16:33 +0300 Subject: [PATCH 03/11] Add image docs --- datumaro/components/media.py | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/datumaro/components/media.py b/datumaro/components/media.py index 1e5c53a126..31242d6a67 100644 --- a/datumaro/components/media.py +++ b/datumaro/components/media.py @@ -44,7 +44,25 @@ def __init__(self, *, path: Optional[str] = None, ext: Optional[str] = None, - size: Optional[Tuple[int, int]] = None): + size: Optional[Tuple[int, int]] = None) -> None: + """ + Creates an image. + + Any combinations of the `data`, `path` and `size` arguments are + possible, but at least one of them must be provided. + The `ext` parameter cannot be used as a single argument for + construction. + + Args: + data - Image pixels or a function to retrieve them. The expected + image shape is (H, W [, C]). If a function is provided, + it must accept image path as the first argument. + path - Image path + ext - Image extension. Cannot be used together with `path`. It is + useful for saving with a custom extension. + size - A pair (H, W), which represents image size. + """ + assert size is None or len(size) == 2, size if size is not None: assert len(size) == 2 and 0 < size[0] and 0 < size[1], size @@ -102,6 +120,7 @@ def has_data(self) -> bool: @property def has_size(self) -> bool: + """Indicates that size info is cached and won't require image loading""" return self._size is not None or isinstance(self._data, np.ndarray) @property From e6545ed87d3b265c52cd78c4e5eb5bd9baf4c853 Mon Sep 17 00:00:00 2001 From: Maxim Zhiltsov Date: Thu, 27 Jan 2022 11:28:31 +0300 Subject: [PATCH 04/11] Fix tests --- tests/cli/test_image_zip_format.py | 2 +- tests/test_images.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/cli/test_image_zip_format.py b/tests/cli/test_image_zip_format.py index 7dde4707a1..ca64cd8ab3 100644 --- a/tests/cli/test_image_zip_format.py +++ b/tests/cli/test_image_zip_format.py @@ -49,7 +49,7 @@ def test_can_save_and_load(self): def test_can_export_zip_images_from_coco_dataset(self): with TestDir() as test_dir: coco_dir = osp.join(__file__[:__file__.rfind(osp.join('tests', ''))], - 'tests', 'assets', 'coco_dataset') + 'tests', 'assets', 'coco_dataset', 'coco') run(self, 'create', '-o', test_dir) run(self, 'import', '-p', test_dir, '-f', 'coco', coco_dir) diff --git a/tests/test_images.py b/tests/test_images.py index 717ab5ca7e..fc0b651c8e 100644 --- a/tests/test_images.py +++ b/tests/test_images.py @@ -65,8 +65,8 @@ class ImageTest(TestCase): def test_lazy_image_shape(self): data = np.ones((5, 6, 3)) - image_lazy = Image(data=data, size=(2, 4)) - image_eager = Image(data=data) + image_lazy = Image(data=lambda _: data, size=(2, 4)) + image_eager = Image(data=lambda _: data) self.assertEqual((2, 4), image_lazy.size) self.assertEqual((5, 6), image_eager.size) From 022d40eb5b7cc3ff9060746fed060f8e56501033 Mon Sep 17 00:00:00 2001 From: Maxim Zhiltsov Date: Tue, 1 Feb 2022 17:57:30 +0300 Subject: [PATCH 05/11] Make secondary methods of ExactMatcher private --- datumaro/components/operations.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/datumaro/components/operations.py b/datumaro/components/operations.py index e7ac8f0248..023a061194 100644 --- a/datumaro/components/operations.py +++ b/datumaro/components/operations.py @@ -4,7 +4,7 @@ from collections import OrderedDict from copy import deepcopy -from typing import Callable, Dict, Iterable, List, Optional, Set, Tuple +from typing import Any, Callable, Dict, Iterable, List, Optional, Set, Tuple from unittest import TestCase import hashlib import logging as log @@ -107,7 +107,7 @@ def merge(cls, *sources: IDataset) -> DatasetItemStorage: existing_item = items.get(item.id, item.subset) if existing_item is not None: try: - item = cls.merge_items(existing_item, item) + item = cls._merge_items(existing_item, item) except DatasetMergeError as e: e.sources = set(range(source_idx)) raise e @@ -116,19 +116,19 @@ def merge(cls, *sources: IDataset) -> DatasetItemStorage: return items @classmethod - def merge_items(cls, existing_item: DatasetItem, + def _merge_items(cls, existing_item: DatasetItem, current_item: DatasetItem) -> DatasetItem: return existing_item.wrap( - image=cls.merge_images(existing_item, current_item), - attributes=cls.merge_attrs( + image=cls._merge_images(existing_item, current_item), + attributes=cls._merge_attrs( existing_item.attributes, current_item.attributes, item_id=(existing_item.id, existing_item.subset)), - annotations=cls.merge_anno( + annotations=cls._merge_anno( existing_item.annotations, current_item.annotations)) @staticmethod - def merge_attrs(a: Dict, b: Dict, - item_id: Optional[Tuple[str, str]] = None) -> Dict: + def _merge_attrs(a: Dict[str, Any], b: Dict[str, Any], + item_id: Tuple[str, str]) -> Dict: merged = {} for name in set(a) | set(b): @@ -149,7 +149,7 @@ def merge_attrs(a: Dict, b: Dict, return merged @staticmethod - def merge_images(item_a: DatasetItem, item_b: DatasetItem) -> Image: + def _merge_images(item_a: DatasetItem, item_b: DatasetItem) -> Image: image = None if item_a.has_image and item_b.has_image: @@ -207,7 +207,7 @@ def merge_images(item_a: DatasetItem, item_b: DatasetItem) -> Image: return image @staticmethod - def merge_anno(a: Iterable[Annotation], b: Iterable[Annotation]) \ + def _merge_anno(a: Iterable[Annotation], b: Iterable[Annotation]) \ -> List[Annotation]: return merge_annotations_equal(a, b) From 166062297263ca9673e7e1cee376a044c081ae33 Mon Sep 17 00:00:00 2001 From: Maxim Zhiltsov Date: Tue, 1 Feb 2022 17:57:59 +0300 Subject: [PATCH 06/11] Use define and field in errors --- datumaro/components/errors.py | 268 +++++++++++++++++----------------- 1 file changed, 134 insertions(+), 134 deletions(-) diff --git a/datumaro/components/errors.py b/datumaro/components/errors.py index 6cbf486fde..a6afba9471 100644 --- a/datumaro/components/errors.py +++ b/datumaro/components/errors.py @@ -1,10 +1,10 @@ -# Copyright (C) 2020-2021 Intel Corporation +# Copyright (C) 2020-2022 Intel Corporation # # SPDX-License-Identifier: MIT -from typing import Any, Optional, Tuple +from typing import Any, Tuple -from attr import attrib, attrs +from attrs import define, field class ImmutableObjectError(Exception): @@ -26,9 +26,9 @@ class ReadonlyProjectError(VcsError): def __str__(self): return "Can't change a read-only project" -@attrs +@define class UnknownRefError(VcsError): - ref = attrib() + ref = field() def __str__(self): return f"Can't parse ref '{self.ref}'" @@ -39,9 +39,9 @@ class MissingObjectError(VcsError): class MismatchingObjectError(VcsError): pass -@attrs +@define class UnsavedChangesError(VcsError): - paths = attrib() + paths = field() def __str__(self): return "There are some uncommitted changes: %s" % ', '.join(self.paths) @@ -99,47 +99,47 @@ def __str__(self): """ -@attrs +@define class ProjectNotFoundError(DatumaroError): - path = attrib() + path = field() def __str__(self): return f"Can't find project at '{self.path}'" -@attrs +@define class ProjectAlreadyExists(DatumaroError): - path = attrib() + path = field() def __str__(self): return f"Can't create project: a project already exists " \ f"at '{self.path}'" -@attrs +@define class UnknownSourceError(DatumaroError): - name = attrib() + name = field() def __str__(self): return f"Unknown source '{self.name}'" -@attrs +@define class UnknownTargetError(DatumaroError): - name = attrib() + name = field() def __str__(self): return f"Unknown target '{self.name}'" -@attrs +@define class UnknownFormatError(DatumaroError): - format = attrib() + format = field() def __str__(self): return f"Unknown source format '{self.format}'. To make it " \ "available, add the corresponding Extractor implementation " \ "to the environment" -@attrs +@define class SourceExistsError(DatumaroError): - name = attrib() + name = field() def __str__(self): return f"Source '{self.name}' already exists" @@ -148,16 +148,16 @@ def __str__(self): class DatasetImportError(DatumaroError): pass -@attrs +@define class DatasetNotFoundError(DatasetImportError): - path = attrib() + path = field() def __str__(self): return f"Failed to find dataset at '{self.path}'" -@attrs +@define class MultipleFormatsMatchError(DatasetImportError): - formats = attrib() + formats = field() def __str__(self): return "Failed to detect dataset format automatically:" \ @@ -176,9 +176,9 @@ class CategoriesRedefinedError(DatasetError): def __str__(self): return "Categories can only be set once for a dataset" -@attrs +@define class RepeatedItemError(DatasetError): - item_id = attrib() + item_id = field() def __str__(self): return f"Item {self.item_id} is repeated in the source sequence." @@ -187,23 +187,23 @@ def __str__(self): class DatasetQualityError(DatasetError): pass -@attrs +@define class AnnotationsTooCloseError(DatasetQualityError): - item_id = attrib() - a = attrib() - b = attrib() - distance = attrib() + item_id = field() + a = field() + b = field() + distance = field() def __str__(self): return "Item %s: annotations are too close: %s, %s, distance = %s" % \ (self.item_id, self.a, self.b, self.distance) -@attrs +@define class WrongGroupError(DatasetQualityError): - item_id = attrib() - found = attrib(converter=set) - expected = attrib(converter=set) - group = attrib(converter=list) + item_id = field() + found = field(converter=set) + expected = field(converter=set) + group = field(converter=list) def __str__(self): return "Item %s: annotation group has wrong labels: " \ @@ -211,9 +211,9 @@ def __str__(self): (self.item_id, self.found, self.expected, self.group) -@attrs(init=False) +@define(init=False) class DatasetMergeError(DatasetError): - sources = attrib(converter=set, factory=set, kw_only=True) + sources = field(converter=set, factory=set, kw_only=True) def _my__init__(self, msg=None, *, sources=None): super().__init__(msg) @@ -223,84 +223,84 @@ def _my__init__(self, msg=None, *, sources=None): # when __init__ is defined directly setattr(DatasetMergeError, '__init__', DatasetMergeError._my__init__) -@attrs +@define class MismatchingImageInfoError(DatasetMergeError): - item_id: Optional[Tuple[str, str]] = attrib() - a: int = attrib() - b: int = attrib() + item_id: Tuple[str, str] + a: Tuple[int, int] + b: Tuple[int, int] def __str__(self): return "Item %s: mismatching image size info: %s vs %s" % \ (self.item_id, self.a, self.b) -@attrs +@define class MismatchingImagePathError(DatasetMergeError): - item_id: Optional[Tuple[str, str]] = attrib() - a: str = attrib() - b: str = attrib() + item_id: Tuple[str, str] + a: str + b: str def __str__(self): return "Item %s: mismatching image path info: %s vs %s" % \ (self.item_id, self.a, self.b) -@attrs +@define class MismatchingAttributesError(DatasetMergeError): - item_id: Optional[Tuple[str, str]] = attrib() - key: str = attrib() - a: Any = attrib() - b: Any = attrib() + item_id: Tuple[str, str] + key: str + a: Any + b: Any def __str__(self): return "Item %s: mismatching image attribute %s: %s vs %s" % \ - (self.item_id or '', self.key, self.a, self.b) + (self.item_id, self.key, self.a, self.b) class ConflictingCategoriesError(DatasetMergeError): pass -@attrs +@define class NoMatchingAnnError(DatasetMergeError): - item_id = attrib() - ann = attrib() + item_id = field() + ann = field() def __str__(self): return "Item %s: can't find matching annotation " \ "in sources %s, annotation is %s" % \ (self.item_id, self.sources, self.ann) -@attrs +@define class NoMatchingItemError(DatasetMergeError): - item_id = attrib() + item_id = field() def __str__(self): return "Item %s: can't find matching item in sources %s" % \ (self.item_id, self.sources) -@attrs +@define class FailedLabelVotingError(DatasetMergeError): - item_id = attrib() - votes = attrib() - ann = attrib(default=None) + item_id = field() + votes = field() + ann = field(default=None) def __str__(self): return "Item %s: label voting failed%s, votes %s, sources %s" % \ (self.item_id, 'for ann %s' % self.ann if self.ann else '', self.votes, self.sources) -@attrs +@define class FailedAttrVotingError(DatasetMergeError): - item_id = attrib() - attr = attrib() - votes = attrib() - ann = attrib() + item_id = field() + attr = field() + votes = field() + ann = field() def __str__(self): return "Item %s: attribute voting failed " \ "for ann %s, votes %s, sources %s" % \ (self.item_id, self.ann, self.votes, self.sources) -@attrs +@define class DatasetValidationError(DatumaroError): - severity = attrib() + severity = field() def to_dict(self): return { @@ -310,10 +310,10 @@ def to_dict(self): } -@attrs +@define class DatasetItemValidationError(DatasetValidationError): - item_id = attrib() - subset = attrib() + item_id = field() + subset = field() def to_dict(self): dict_repr = super().to_dict() @@ -321,103 +321,103 @@ def to_dict(self): dict_repr['subset'] = self.subset return dict_repr -@attrs +@define class MissingLabelCategories(DatasetValidationError): def __str__(self): return "Metadata (ex. LabelCategories) should be defined" \ " to validate a dataset." -@attrs +@define class MissingAnnotation(DatasetItemValidationError): - ann_type = attrib() + ann_type = field() def __str__(self): return f"Item needs '{self.ann_type}' annotation(s), " \ "but not found." -@attrs +@define class MultiLabelAnnotations(DatasetItemValidationError): def __str__(self): return 'Item needs a single label but multiple labels are found.' -@attrs +@define class MissingAttribute(DatasetItemValidationError): - label_name = attrib() - attr_name = attrib() + label_name = field() + attr_name = field() def __str__(self): return f"Item needs the attribute '{self.attr_name}' " \ f"for the label '{self.label_name}'." -@attrs +@define class UndefinedLabel(DatasetItemValidationError): - label_name = attrib() + label_name = field() def __str__(self): return f"Item has the label '{self.label_name}' which " \ "is not defined in metadata." -@attrs +@define class UndefinedAttribute(DatasetItemValidationError): - label_name = attrib() - attr_name = attrib() + label_name = field() + attr_name = field() def __str__(self): return f"Item has the attribute '{self.attr_name}' for the " \ f"label '{self.label_name}' which is not defined in metadata." -@attrs +@define class LabelDefinedButNotFound(DatasetValidationError): - label_name = attrib() + label_name = field() def __str__(self): return f"The label '{self.label_name}' is defined in " \ "metadata, but not found in the dataset." -@attrs +@define class AttributeDefinedButNotFound(DatasetValidationError): - label_name = attrib() - attr_name = attrib() + label_name = field() + attr_name = field() def __str__(self): return f"The attribute '{self.attr_name}' for the label " \ f"'{self.label_name}' is defined in metadata, but not " \ "found in the dataset." -@attrs +@define class OnlyOneLabel(DatasetValidationError): - label_name = attrib() + label_name = field() def __str__(self): return f"The dataset has only one label '{self.label_name}'." -@attrs +@define class OnlyOneAttributeValue(DatasetValidationError): - label_name = attrib() - attr_name = attrib() - value = attrib() + label_name = field() + attr_name = field() + value = field() def __str__(self): return "The dataset has the only attribute value " \ f"'{self.value}' for the attribute '{self.attr_name}' for the " \ f"label '{self.label_name}'." -@attrs +@define class FewSamplesInLabel(DatasetValidationError): - label_name = attrib() - count = attrib() + label_name = field() + count = field() def __str__(self): return f"The number of samples in the label '{self.label_name}'" \ f" might be too low. Found '{self.count}' samples." -@attrs +@define class FewSamplesInAttribute(DatasetValidationError): - label_name = attrib() - attr_name = attrib() - attr_value = attrib() - count = attrib() + label_name = field() + attr_name = field() + attr_value = field() + count = field() def __str__(self): return "The number of samples for attribute = value " \ @@ -425,69 +425,69 @@ def __str__(self): f"'{self.label_name}' might be too low. " \ f"Found '{self.count}' samples." -@attrs +@define class ImbalancedLabels(DatasetValidationError): def __str__(self): return 'There is an imbalance in the label distribution.' -@attrs +@define class ImbalancedAttribute(DatasetValidationError): - label_name = attrib() - attr_name = attrib() + label_name = field() + attr_name = field() def __str__(self): return "There is an imbalance in the distribution of attribute" \ f" '{self. attr_name}' for the label '{self.label_name}'." -@attrs +@define class ImbalancedDistInLabel(DatasetValidationError): - label_name = attrib() - prop = attrib() + label_name = field() + prop = field() def __str__(self): return f"Values of '{self.prop}' are not evenly " \ f"distributed for '{self.label_name}' label." -@attrs +@define class ImbalancedDistInAttribute(DatasetValidationError): - label_name = attrib() - attr_name = attrib() - attr_value = attrib() - prop = attrib() + label_name = field() + attr_name = field() + attr_value = field() + prop = field() def __str__(self): return f"Values of '{self.prop}' are not evenly " \ f"distributed for '{self.attr_name}' = '{self.attr_value}' for " \ f"the '{self.label_name}' label." -@attrs +@define class NegativeLength(DatasetItemValidationError): - ann_id = attrib() - prop = attrib() - val = attrib() + ann_id = field() + prop = field() + val = field() def __str__(self): return f"Annotation '{self.ann_id}' in " \ "the item should have a positive value of " \ f"'{self.prop}' but got '{self.val}'." -@attrs +@define class InvalidValue(DatasetItemValidationError): - ann_id = attrib() - prop = attrib() + ann_id = field() + prop = field() def __str__(self): return f"Annotation '{self.ann_id}' in " \ 'the item has an inf or a NaN value of ' \ f"'{self.prop}'." -@attrs +@define class FarFromLabelMean(DatasetItemValidationError): - label_name = attrib() - ann_id = attrib() - prop = attrib() - mean = attrib() - val = attrib() + label_name = field() + ann_id = field() + prop = field() + mean = field() + val = field() def __str__(self): return f"Annotation '{self.ann_id}' in " \ @@ -495,15 +495,15 @@ def __str__(self): "is too far from the label average. (mean of " \ f"'{self.label_name}' label: {self.mean}, got '{self.val}')." -@attrs +@define class FarFromAttrMean(DatasetItemValidationError): - label_name = attrib() - ann_id = attrib() - attr_name = attrib() - attr_value = attrib() - prop = attrib() - mean = attrib() - val = attrib() + label_name = field() + ann_id = field() + attr_name = field() + attr_value = field() + prop = field() + mean = field() + val = field() def __str__(self): return f"Annotation '{self.ann_id}' in the " \ From f2bc5ab2962d3eed8c799e0e8b53d0b8a9eece96 Mon Sep 17 00:00:00 2001 From: Maxim Zhiltsov Date: Tue, 1 Feb 2022 18:13:55 +0300 Subject: [PATCH 07/11] Fix image tests --- datumaro/components/media.py | 7 ------- tests/test_images.py | 9 +++------ 2 files changed, 3 insertions(+), 13 deletions(-) diff --git a/datumaro/components/media.py b/datumaro/components/media.py index 31242d6a67..d024ee7d51 100644 --- a/datumaro/components/media.py +++ b/datumaro/components/media.py @@ -67,15 +67,8 @@ def __init__(self, if size is not None: assert len(size) == 2 and 0 < size[0] and 0 < size[1], size size = tuple(map(int, size)) - self._size = size # (H, W) - if isinstance(data, np.ndarray): - if not self._size: - self._size = data.shape[:2] - else: - assert self._size == data.shape[:2] - assert path is None or isinstance(path, str), path if path is None: path = '' diff --git a/tests/test_images.py b/tests/test_images.py index fc0b651c8e..4de3662ec1 100644 --- a/tests/test_images.py +++ b/tests/test_images.py @@ -62,14 +62,12 @@ def tearDown(self) -> None: class ImageTest(TestCase): @mark_requirement(Requirements.DATUM_GENERAL_REQ) - def test_lazy_image_shape(self): + def test_can_report_cached_size(self): data = np.ones((5, 6, 3)) - image_lazy = Image(data=lambda _: data, size=(2, 4)) - image_eager = Image(data=lambda _: data) + image = Image(data=lambda _: data, size=(2, 4)) - self.assertEqual((2, 4), image_lazy.size) - self.assertEqual((5, 6), image_eager.size) + self.assertEqual((2, 4), image.size) @mark_requirement(Requirements.DATUM_GENERAL_REQ) def test_ctors(self): @@ -139,7 +137,6 @@ def test_ctors(self): { 'data': image_bytes, 'path': path, 'size': (2, 4) }, { 'path': path }, { 'path': path, 'size': (2, 4) }, - { 'path': path, 'size': (2, 4) }, ]: with self.subTest(**args): img = ByteImage(**args) From 807d0ccd2c4a72f7c6a21947d7d7f11c9dc5bf87 Mon Sep 17 00:00:00 2001 From: Maxim Zhiltsov Date: Tue, 1 Feb 2022 18:31:07 +0300 Subject: [PATCH 08/11] Fix wording in Image doc --- datumaro/components/media.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/datumaro/components/media.py b/datumaro/components/media.py index b4b450e957..587a5ba7f3 100644 --- a/datumaro/components/media.py +++ b/datumaro/components/media.py @@ -48,8 +48,8 @@ def __init__(self, """ Creates an image. - Any combinations of the `data`, `path` and `size` arguments are - possible, but at least one of them must be provided. + Any combination of the `data`, `path` and `size` is possible, + but at least one of these arguments must be provided. The `ext` parameter cannot be used as a single argument for construction. @@ -58,8 +58,9 @@ def __init__(self, image shape is (H, W [, C]). If a function is provided, it must accept image path as the first argument. path - Image path - ext - Image extension. Cannot be used together with `path`. It is - useful for saving with a custom extension. + ext - Image extension. Cannot be used together with `path`. It can + be used for saving with a custom extension - in that case, + the image need to have the `data` and `ext` fields defined. size - A pair (H, W), which represents image size. """ From d0e39ed7e43de51d997d9d855cca4713b2a10741 Mon Sep 17 00:00:00 2001 From: Maxim Zhiltsov Date: Tue, 1 Feb 2022 18:32:49 +0300 Subject: [PATCH 09/11] Throw an error --- datumaro/components/operations.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datumaro/components/operations.py b/datumaro/components/operations.py index d5b95b3ae4..1fd73e28a7 100644 --- a/datumaro/components/operations.py +++ b/datumaro/components/operations.py @@ -189,7 +189,7 @@ def _merge_images(item_a: DatasetItem, item_b: DatasetItem) -> Image: elif item_b.image.has_size: image = item_b.image else: - image = item_a.image + assert False, "Unknown image field combination" if not image.has_data or not image.has_size: if item_a.image._size: From f88fcf76b7412c2764dc155252085ef55d627ed8 Mon Sep 17 00:00:00 2001 From: Maxim Zhiltsov Date: Tue, 1 Feb 2022 19:28:32 +0300 Subject: [PATCH 10/11] Fix test error --- datumaro/components/errors.py | 86 +++++++++++++++++------------------ 1 file changed, 43 insertions(+), 43 deletions(-) diff --git a/datumaro/components/errors.py b/datumaro/components/errors.py index a6afba9471..2b555cc26a 100644 --- a/datumaro/components/errors.py +++ b/datumaro/components/errors.py @@ -26,7 +26,7 @@ class ReadonlyProjectError(VcsError): def __str__(self): return "Can't change a read-only project" -@define +@define(auto_exc=False) class UnknownRefError(VcsError): ref = field() @@ -39,7 +39,7 @@ class MissingObjectError(VcsError): class MismatchingObjectError(VcsError): pass -@define +@define(auto_exc=False) class UnsavedChangesError(VcsError): paths = field() @@ -99,14 +99,14 @@ def __str__(self): """ -@define +@define(auto_exc=False) class ProjectNotFoundError(DatumaroError): path = field() def __str__(self): return f"Can't find project at '{self.path}'" -@define +@define(auto_exc=False) class ProjectAlreadyExists(DatumaroError): path = field() @@ -114,21 +114,21 @@ def __str__(self): return f"Can't create project: a project already exists " \ f"at '{self.path}'" -@define +@define(auto_exc=False) class UnknownSourceError(DatumaroError): name = field() def __str__(self): return f"Unknown source '{self.name}'" -@define +@define(auto_exc=False) class UnknownTargetError(DatumaroError): name = field() def __str__(self): return f"Unknown target '{self.name}'" -@define +@define(auto_exc=False) class UnknownFormatError(DatumaroError): format = field() @@ -137,7 +137,7 @@ def __str__(self): "available, add the corresponding Extractor implementation " \ "to the environment" -@define +@define(auto_exc=False) class SourceExistsError(DatumaroError): name = field() @@ -148,14 +148,14 @@ def __str__(self): class DatasetImportError(DatumaroError): pass -@define +@define(auto_exc=False) class DatasetNotFoundError(DatasetImportError): path = field() def __str__(self): return f"Failed to find dataset at '{self.path}'" -@define +@define(auto_exc=False) class MultipleFormatsMatchError(DatasetImportError): formats = field() @@ -176,7 +176,7 @@ class CategoriesRedefinedError(DatasetError): def __str__(self): return "Categories can only be set once for a dataset" -@define +@define(auto_exc=False) class RepeatedItemError(DatasetError): item_id = field() @@ -187,7 +187,7 @@ def __str__(self): class DatasetQualityError(DatasetError): pass -@define +@define(auto_exc=False) class AnnotationsTooCloseError(DatasetQualityError): item_id = field() a = field() @@ -198,7 +198,7 @@ def __str__(self): return "Item %s: annotations are too close: %s, %s, distance = %s" % \ (self.item_id, self.a, self.b, self.distance) -@define +@define(auto_exc=False) class WrongGroupError(DatasetQualityError): item_id = field() found = field(converter=set) @@ -211,7 +211,7 @@ def __str__(self): (self.item_id, self.found, self.expected, self.group) -@define(init=False) +@define(auto_exc=False, init=False) class DatasetMergeError(DatasetError): sources = field(converter=set, factory=set, kw_only=True) @@ -223,7 +223,7 @@ def _my__init__(self, msg=None, *, sources=None): # when __init__ is defined directly setattr(DatasetMergeError, '__init__', DatasetMergeError._my__init__) -@define +@define(auto_exc=False) class MismatchingImageInfoError(DatasetMergeError): item_id: Tuple[str, str] a: Tuple[int, int] @@ -233,7 +233,7 @@ def __str__(self): return "Item %s: mismatching image size info: %s vs %s" % \ (self.item_id, self.a, self.b) -@define +@define(auto_exc=False) class MismatchingImagePathError(DatasetMergeError): item_id: Tuple[str, str] a: str @@ -243,7 +243,7 @@ def __str__(self): return "Item %s: mismatching image path info: %s vs %s" % \ (self.item_id, self.a, self.b) -@define +@define(auto_exc=False) class MismatchingAttributesError(DatasetMergeError): item_id: Tuple[str, str] key: str @@ -257,7 +257,7 @@ def __str__(self): class ConflictingCategoriesError(DatasetMergeError): pass -@define +@define(auto_exc=False) class NoMatchingAnnError(DatasetMergeError): item_id = field() ann = field() @@ -267,7 +267,7 @@ def __str__(self): "in sources %s, annotation is %s" % \ (self.item_id, self.sources, self.ann) -@define +@define(auto_exc=False) class NoMatchingItemError(DatasetMergeError): item_id = field() @@ -275,7 +275,7 @@ def __str__(self): return "Item %s: can't find matching item in sources %s" % \ (self.item_id, self.sources) -@define +@define(auto_exc=False) class FailedLabelVotingError(DatasetMergeError): item_id = field() votes = field() @@ -286,7 +286,7 @@ def __str__(self): (self.item_id, 'for ann %s' % self.ann if self.ann else '', self.votes, self.sources) -@define +@define(auto_exc=False) class FailedAttrVotingError(DatasetMergeError): item_id = field() attr = field() @@ -298,7 +298,7 @@ def __str__(self): "for ann %s, votes %s, sources %s" % \ (self.item_id, self.ann, self.votes, self.sources) -@define +@define(auto_exc=False) class DatasetValidationError(DatumaroError): severity = field() @@ -310,7 +310,7 @@ def to_dict(self): } -@define +@define(auto_exc=False) class DatasetItemValidationError(DatasetValidationError): item_id = field() subset = field() @@ -321,14 +321,14 @@ def to_dict(self): dict_repr['subset'] = self.subset return dict_repr -@define +@define(auto_exc=False) class MissingLabelCategories(DatasetValidationError): def __str__(self): return "Metadata (ex. LabelCategories) should be defined" \ " to validate a dataset." -@define +@define(auto_exc=False) class MissingAnnotation(DatasetItemValidationError): ann_type = field() @@ -336,12 +336,12 @@ def __str__(self): return f"Item needs '{self.ann_type}' annotation(s), " \ "but not found." -@define +@define(auto_exc=False) class MultiLabelAnnotations(DatasetItemValidationError): def __str__(self): return 'Item needs a single label but multiple labels are found.' -@define +@define(auto_exc=False) class MissingAttribute(DatasetItemValidationError): label_name = field() attr_name = field() @@ -350,7 +350,7 @@ def __str__(self): return f"Item needs the attribute '{self.attr_name}' " \ f"for the label '{self.label_name}'." -@define +@define(auto_exc=False) class UndefinedLabel(DatasetItemValidationError): label_name = field() @@ -358,7 +358,7 @@ def __str__(self): return f"Item has the label '{self.label_name}' which " \ "is not defined in metadata." -@define +@define(auto_exc=False) class UndefinedAttribute(DatasetItemValidationError): label_name = field() attr_name = field() @@ -367,7 +367,7 @@ def __str__(self): return f"Item has the attribute '{self.attr_name}' for the " \ f"label '{self.label_name}' which is not defined in metadata." -@define +@define(auto_exc=False) class LabelDefinedButNotFound(DatasetValidationError): label_name = field() @@ -375,7 +375,7 @@ def __str__(self): return f"The label '{self.label_name}' is defined in " \ "metadata, but not found in the dataset." -@define +@define(auto_exc=False) class AttributeDefinedButNotFound(DatasetValidationError): label_name = field() attr_name = field() @@ -385,14 +385,14 @@ def __str__(self): f"'{self.label_name}' is defined in metadata, but not " \ "found in the dataset." -@define +@define(auto_exc=False) class OnlyOneLabel(DatasetValidationError): label_name = field() def __str__(self): return f"The dataset has only one label '{self.label_name}'." -@define +@define(auto_exc=False) class OnlyOneAttributeValue(DatasetValidationError): label_name = field() attr_name = field() @@ -403,7 +403,7 @@ def __str__(self): f"'{self.value}' for the attribute '{self.attr_name}' for the " \ f"label '{self.label_name}'." -@define +@define(auto_exc=False) class FewSamplesInLabel(DatasetValidationError): label_name = field() count = field() @@ -412,7 +412,7 @@ def __str__(self): return f"The number of samples in the label '{self.label_name}'" \ f" might be too low. Found '{self.count}' samples." -@define +@define(auto_exc=False) class FewSamplesInAttribute(DatasetValidationError): label_name = field() attr_name = field() @@ -425,12 +425,12 @@ def __str__(self): f"'{self.label_name}' might be too low. " \ f"Found '{self.count}' samples." -@define +@define(auto_exc=False) class ImbalancedLabels(DatasetValidationError): def __str__(self): return 'There is an imbalance in the label distribution.' -@define +@define(auto_exc=False) class ImbalancedAttribute(DatasetValidationError): label_name = field() attr_name = field() @@ -439,7 +439,7 @@ def __str__(self): return "There is an imbalance in the distribution of attribute" \ f" '{self. attr_name}' for the label '{self.label_name}'." -@define +@define(auto_exc=False) class ImbalancedDistInLabel(DatasetValidationError): label_name = field() prop = field() @@ -448,7 +448,7 @@ def __str__(self): return f"Values of '{self.prop}' are not evenly " \ f"distributed for '{self.label_name}' label." -@define +@define(auto_exc=False) class ImbalancedDistInAttribute(DatasetValidationError): label_name = field() attr_name = field() @@ -460,7 +460,7 @@ def __str__(self): f"distributed for '{self.attr_name}' = '{self.attr_value}' for " \ f"the '{self.label_name}' label." -@define +@define(auto_exc=False) class NegativeLength(DatasetItemValidationError): ann_id = field() prop = field() @@ -471,7 +471,7 @@ def __str__(self): "the item should have a positive value of " \ f"'{self.prop}' but got '{self.val}'." -@define +@define(auto_exc=False) class InvalidValue(DatasetItemValidationError): ann_id = field() prop = field() @@ -481,7 +481,7 @@ def __str__(self): 'the item has an inf or a NaN value of ' \ f"'{self.prop}'." -@define +@define(auto_exc=False) class FarFromLabelMean(DatasetItemValidationError): label_name = field() ann_id = field() @@ -495,7 +495,7 @@ def __str__(self): "is too far from the label average. (mean of " \ f"'{self.label_name}' label: {self.mean}, got '{self.val}')." -@define +@define(auto_exc=False) class FarFromAttrMean(DatasetItemValidationError): label_name = field() ann_id = field() From 88ed8497b0bb8df0642474bcb801faca22b4438a Mon Sep 17 00:00:00 2001 From: Maxim Zhiltsov Date: Fri, 4 Feb 2022 15:34:35 +0300 Subject: [PATCH 11/11] Remove extra casting --- datumaro/components/operations.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datumaro/components/operations.py b/datumaro/components/operations.py index 1fd73e28a7..2a263cdc28 100644 --- a/datumaro/components/operations.py +++ b/datumaro/components/operations.py @@ -128,7 +128,7 @@ def _merge_attrs(a: Dict[str, Any], b: Dict[str, Any], item_id: Tuple[str, str]) -> Dict: merged = {} - for name in set(a) | set(b): + for name in a.keys() | b.keys(): a_val = a.get(name, None) b_val = b.get(name, None)