Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Optimize validation layout updates #8789

Merged
merged 41 commits into from
Dec 19, 2024
Merged
Show file tree
Hide file tree
Changes from 8 commits
Commits
Show all changes
41 commits
Select commit Hold shift + click to select a range
a7df39d
Improve validation frame distribution in honeypot tasks
zhiltsov-max Dec 4, 2024
8f83f04
Improve frame distribution in automatic honeypot rerolls
zhiltsov-max Dec 4, 2024
4a9ce0f
Reset only active frames, fix errors
zhiltsov-max Dec 5, 2024
daebdcd
Add tests
zhiltsov-max Dec 5, 2024
1b71f6a
Add changelog
zhiltsov-max Dec 5, 2024
4c07c0c
Fix possible invalid honeypot picks in task creation
zhiltsov-max Dec 5, 2024
006e855
t
zhiltsov-max Dec 5, 2024
306c006
Optimize validation layout updates
zhiltsov-max Dec 6, 2024
f55595d
Fix indentation
zhiltsov-max Dec 6, 2024
d45a33d
Update cvat/apps/engine/cache.py
zhiltsov-max Dec 7, 2024
0f2b2ee
Refactor code: extract common functions, add typing, change chunked_l…
zhiltsov-max Dec 10, 2024
15bf9b7
Remove handling of impossible exceptions
zhiltsov-max Dec 10, 2024
ec0c353
Merge remote-tracking branch 'origin/zm/optimize-validation-layout-up…
zhiltsov-max Dec 10, 2024
4acbeb1
Merge remote-tracking branch 'origin/develop' into zm/optimize-valida…
zhiltsov-max Dec 10, 2024
1dc2b42
Fix request response and behavior in simultaneous deleted_frames and …
zhiltsov-max Dec 10, 2024
61eeb8d
Fix formatting
zhiltsov-max Dec 10, 2024
268b54b
Fix test
zhiltsov-max Dec 10, 2024
bea74b4
Merge branch 'develop' into zm/optimize-validation-layout-updates
zhiltsov-max Dec 12, 2024
e0e978a
Move import
zhiltsov-max Dec 12, 2024
043bf83
Remove extra sorting
zhiltsov-max Dec 12, 2024
e202521
Add sorting
zhiltsov-max Dec 12, 2024
5e3a797
Fix merge
zhiltsov-max Dec 12, 2024
db59fb2
Improve error message
zhiltsov-max Dec 12, 2024
e2b2807
Fix imports
zhiltsov-max Dec 12, 2024
88bd0ce
Refactor some code, fix frame counts use in random reroll
zhiltsov-max Dec 12, 2024
199ef37
Improve tests, fix random reroll in task
zhiltsov-max Dec 13, 2024
21c1866
Update changelog
zhiltsov-max Dec 13, 2024
57afe1e
Fix newline
zhiltsov-max Dec 17, 2024
3326cad
Apply suggestions from code review
zhiltsov-max Dec 18, 2024
82c4ab2
Rename variable
zhiltsov-max Dec 18, 2024
0d1555f
Add named arg in function call
zhiltsov-max Dec 18, 2024
7779b4e
Add notes on remove_segment_chunks api
zhiltsov-max Dec 18, 2024
151df1b
Fix cache removal log messages
zhiltsov-max Dec 18, 2024
86645ff
Add a model property for active validation frames
zhiltsov-max Dec 18, 2024
b67ba9a
Remove accumulating media cache
zhiltsov-max Dec 18, 2024
33b229e
Remove extra variables
zhiltsov-max Dec 18, 2024
78dbd35
Fix and refactor bulk rf m2m updates
zhiltsov-max Dec 19, 2024
e61f1ac
Fix related file field name
zhiltsov-max Dec 19, 2024
a268cd8
Fix cache keys for context image chunks
zhiltsov-max Dec 19, 2024
5c3522f
Fix honeypot skipping for unchanged honeypots
zhiltsov-max Dec 19, 2024
68d1771
Fix context image chunks removal for updated honeypot frames
zhiltsov-max Dec 19, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
### Changed

- Improved uniformity of validation frames distribution in honeypot tasks and
random honeypot rerolls
(<https://github.com/cvat-ai/cvat/pull/8776>)
2 changes: 1 addition & 1 deletion cvat/apps/dataset_manager/bindings.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ class Attribute(NamedTuple):
value: Any

@classmethod
def add_prefetch_info(cls, queryset: QuerySet):
def add_prefetch_info(cls, queryset: QuerySet[Label]) -> QuerySet[Label]:
assert issubclass(queryset.model, Label)

return add_prefetch_fields(queryset, [
Expand Down
2 changes: 1 addition & 1 deletion cvat/apps/dataset_manager/task.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ def merge_table_rows(rows, keys_for_merge, field_id):

class JobAnnotation:
@classmethod
def add_prefetch_info(cls, queryset: QuerySet, prefetch_images: bool = True):
def add_prefetch_info(cls, queryset: QuerySet[models.Job], prefetch_images: bool = True) -> QuerySet[models.Job]:
assert issubclass(queryset.model, models.Job)

label_qs = add_prefetch_fields(models.Label.objects.all(), [
Expand Down
19 changes: 19 additions & 0 deletions cvat/apps/engine/cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@
from cvat.apps.engine.rq_job_handler import RQJobMetaField
from cvat.apps.engine.utils import (
CvatChunkTimestampMismatchError,
format_list,
get_rq_lock_for_job,
load_image,
md5_hash,
Expand Down Expand Up @@ -275,6 +276,15 @@ def _delete_cache_item(self, key: str):
except pickle.UnpicklingError:
slogger.glob.error(f"Failed to remove item from the cache: key {key}", exc_info=True)

def _bulk_delete_cache_items(self, keys: str):
try:
self._cache().delete_many(keys)
slogger.glob.info(f"Removed chunks from the cache: keys {format_list(keys)}")
except pickle.UnpicklingError:
zhiltsov-max marked this conversation as resolved.
Show resolved Hide resolved
slogger.glob.error(
f"Failed to remove items from the cache: keys {format_list(keys)}", exc_info=True
)

zhiltsov-max marked this conversation as resolved.
Show resolved Hide resolved
def _get_cache_item(self, key: str) -> Optional[_CacheItem]:
try:
item = self._cache().get(key)
Expand Down Expand Up @@ -468,6 +478,15 @@ def remove_segment_chunk(
self._make_chunk_key(db_segment, chunk_number=chunk_number, quality=quality)
)

def remove_segment_chunks(self, params: Sequence[dict[str, Any]]) -> None:
# TODO: maybe add a more generic version
Marishka17 marked this conversation as resolved.
Show resolved Hide resolved
keys_to_remove = []
for item_params in params:
db_obj = item_params.pop("db_segment")
keys_to_remove.append(self._make_chunk_key(db_obj, **item_params))

self._bulk_delete_cache_items(keys_to_remove)

def get_cloud_preview(self, db_storage: models.CloudStorage) -> Optional[DataWithMime]:
return self._to_data_with_mime(self._get_cache_item(self._make_preview_key(db_storage)))

Expand Down
47 changes: 47 additions & 0 deletions cvat/apps/engine/quality_control.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
# Copyright (C) 2024 CVAT.ai Corporation
#
# SPDX-License-Identifier: MIT

from typing import Generic, Mapping, Sequence, TypeVar

import numpy as np

_T = TypeVar("_T")


class HoneypotFrameSelector(Generic[_T]):
def __init__(
self, validation_frame_counts: Mapping[_T, int], *, rng: np.random.Generator | None = None
):
self.validation_frame_counts = validation_frame_counts

if not rng:
rng = np.random.default_rng()
Fixed Show fixed Hide fixed

self.rng = rng

def select_next_frames(self, count: int) -> Sequence[_T]:
# This approach guarantees that:
# - every GT frame is used
# - GT frames are used uniformly (at most min count + 1)
# - GT frames are not repeated in jobs
# - honeypot sets are different in jobs
# - honeypot sets are random
# if possible (if the job and GT counts allow this).
pick = []

for random_number in self.rng.random(count):
least_count = min(c for f, c in self.validation_frame_counts.items() if f not in pick)
least_used_frames = tuple(
f
for f, c in self.validation_frame_counts.items()
if f not in pick
if c == least_count
)

selected_item = int(random_number * len(least_used_frames))
selected_frame = least_used_frames[selected_item]
pick.append(selected_frame)
self.validation_frame_counts[selected_frame] += 1

return pick
Loading
Loading