From 8f56cca92ecc0d23d0d80d859669591273c6f331 Mon Sep 17 00:00:00 2001 From: Stepan Anokhin Date: Fri, 20 Nov 2020 20:44:13 +0700 Subject: [PATCH 1/5] Update db schema --- db/schema.py | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/db/schema.py b/db/schema.py index 0c2b542b..bd62f498 100644 --- a/db/schema.py +++ b/db/schema.py @@ -38,28 +38,15 @@ class Signature(Base): signature = Column(LargeBinary) -# TODO:Revaluate which columns are actually essential -# TODO: Add sha signature - class VideoMetadata(Base): __tablename__ = 'videometadata' id = Column(Integer, primary_key=True) file_id = Column(Integer, ForeignKey('files.id'), unique=True, nullable=False) file = relationship("Files", back_populates="meta") - video_length = Column(Float) - avg_act = Column(Float) - video_avg_std = Column(Float) - video_max_dif = Column(Float) - gray_avg = Column(Float) - gray_std = Column(Float) gray_max = Column(Float) - video_duration_flag = Column(Boolean) video_dark_flag = Column(Boolean) flagged = Column(Boolean) - video_duration_seconds = Column(Float) - avg_scene_duration_seconds = Column(Float) - total_video_duration_timestamp = Column(String) class Scene(Base): From 9c1f4b510a9e77b0ba1117b54248d23a132ad0e2 Mon Sep 17 00:00:00 2001 From: Stepan Anokhin Date: Fri, 20 Nov 2020 20:46:16 +0700 Subject: [PATCH 2/5] Update db access logic --- db/access/files.py | 12 +++---- winnow/storage/db_result_storage.py | 49 ++++------------------------- 2 files changed, 12 insertions(+), 49 deletions(-) diff --git a/db/access/files.py b/db/access/files.py index a3edf048..d0aff20c 100644 --- a/db/access/files.py +++ b/db/access/files.py @@ -5,7 +5,7 @@ from sqlalchemy import or_, func, literal_column from sqlalchemy.orm import aliased -from db.schema import Files, Matches, VideoMetadata, Exif +from db.schema import Files, Matches, Exif class FileMatchFilter: @@ -145,8 +145,8 @@ def _sort_items(req: ListFilesRequest, query): (match.match_video_file_id == Files.id)) & (match.distance < threshold)) return query.group_by(Files.id).order_by(literal_column(FilesDAO._LABEL_COUNT).desc(), Files.id.asc()) elif req.sort == FileSort.LENGTH: - meta = aliased(VideoMetadata) - return query.outerjoin(meta).order_by(meta.video_length.desc(), Files.id.asc()) + exif = aliased(Exif) + return query.outerjoin(exif).order_by(exif.General_Duration.desc(), Files.id.asc()) elif req.sort == FileSort.DATE: exif = aliased(Exif) return query.outerjoin(exif).order_by(exif.General_Encoded_Date.desc(), Files.id.asc()) @@ -206,13 +206,13 @@ def _filter_date(req: ListFilesRequest, query): def _filter_length(req: ListFilesRequest, query): """Filter by length.""" if req.min_length is not None or req.max_length is not None: - query = query.join(Files.meta) + query = query.join(Files.exif) if req.min_length is not None: - query = query.filter(VideoMetadata.video_length >= req.min_length) + query = query.filter(Exif.General_Duration >= req.min_length) if req.max_length is not None: - query = query.filter(VideoMetadata.video_length <= req.max_length) + query = query.filter(Exif.General_Duration <= req.max_length) return query diff --git a/winnow/storage/db_result_storage.py b/winnow/storage/db_result_storage.py index 7dcd7767..5d81db2e 100644 --- a/winnow/storage/db_result_storage.py +++ b/winnow/storage/db_result_storage.py @@ -473,54 +473,17 @@ def _create_scenes(file, durations): @staticmethod def _update_metadata(metadata_entity, metadata): """Update metadata attributes""" - metadata_entity.video_length = metadata.get('video_length', - metadata_entity - .video_length - ) - metadata_entity.avg_act = metadata.get( - 'avg_act', - metadata_entity.avg_act - ) - metadata_entity.video_avg_std = metadata.get( - 'video_avg_std', - metadata_entity - .video_avg_std - ) - metadata_entity.video_max_dif = metadata.get( - 'video_max_dif', - metadata_entity - .video_max_dif - ) - - metadata_entity.gray_avg = metadata.get( - 'gray_avg', - metadata_entity - .gray_avg) - - metadata_entity.gray_std = metadata.get( - 'gray_std', - metadata_entity - .gray_std) - metadata_entity.gray_max = metadata.get( - 'gray_max', - metadata_entity - .gray_max) + 'gray_max', + metadata_entity.gray_max) metadata_entity.video_dark_flag = metadata.get( - 'video_dark_flag', - metadata_entity - .video_dark_flag) - - metadata_entity.video_duration_flag = metadata.get( - 'video_duration_flag', - metadata_entity - .video_duration_flag) + 'video_dark_flag', + metadata_entity.video_dark_flag) metadata_entity.flagged = metadata.get( - 'flagged', - metadata_entity - .flagged) + 'flagged', + metadata_entity.flagged) @staticmethod def _index_matches(chunk): From 2e81c06e2f5a9ec7668158a02a6fe638bd660605 Mon Sep 17 00:00:00 2001 From: Stepan Anokhin Date: Fri, 20 Nov 2020 20:46:51 +0700 Subject: [PATCH 3/5] Update frontend --- web/src/server-api/Server/Transform.js | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/web/src/server-api/Server/Transform.js b/web/src/server-api/Server/Transform.js index 65e43d53..a9f7cd9e 100644 --- a/web/src/server-api/Server/Transform.js +++ b/web/src/server-api/Server/Transform.js @@ -57,13 +57,9 @@ export default class Transform { }; } return { - grayAverage: data.meta.gray_avg, grayMax: data.meta.gray_max, - grayStd: data.meta.gray_std, - stdAverage: data.meta.video_avg_std, - maxDiff: data.meta.video_max_dif, flagged: data.meta.flagged, - length: data.meta.video_length * 1000 || data.exif?.General_Duration || 0, + length: data.exif?.General_Duration || 0, }; } From a05a3771b9dc3ebb7c3d2548806db2bdf8c0e0b8 Mon Sep 17 00:00:00 2001 From: Stepan Anokhin Date: Fri, 20 Nov 2020 20:47:04 +0700 Subject: [PATCH 4/5] Update server tests --- server/tests/db/access/test_matches_dao.py | 4 +-- server/tests/server/test_api.py | 36 ++++++++++++---------- 2 files changed, 22 insertions(+), 18 deletions(-) diff --git a/server/tests/db/access/test_matches_dao.py b/server/tests/db/access/test_matches_dao.py index c8f8dc23..fa4cf017 100644 --- a/server/tests/db/access/test_matches_dao.py +++ b/server/tests/db/access/test_matches_dao.py @@ -13,8 +13,8 @@ def make_file(prefix="", length=42, ext="flv", scenes=((0, 1), (1, 2))): path = f"{prefix}some/path/{uuid()}.{ext}" sha256 = f"hash-of-{path}" return Files(file_path=path, sha256=sha256, - exif=Exif(General_FileExtension=ext, ), - meta=VideoMetadata(video_length=length), + exif=Exif(General_FileExtension=ext, General_Duration=length * 1000), + meta=VideoMetadata(), scenes=[Scene(start_time=start, duration=duration) for start, duration in scenes]) diff --git a/server/tests/server/test_api.py b/server/tests/server/test_api.py index ba24356f..0355916b 100644 --- a/server/tests/server/test_api.py +++ b/server/tests/server/test_api.py @@ -119,8 +119,8 @@ def make_file(prefix="", length=42, ext="flv", audio=True, date=datetime.date(20 sha256 = f"hash-of-{path}" return Files(file_path=path, sha256=sha256, exif=Exif(General_FileExtension=ext, Audio_Duration=float(audio), - General_Encoded_Date=date), - meta=VideoMetadata(video_length=length), + General_Encoded_Date=date, General_Duration=length), + meta=VideoMetadata(), scenes=[Scene(start_time=start, duration=duration) for start, duration in scenes]) @@ -226,15 +226,15 @@ def test_get_file(client, app): assert {"scenes", "meta", "exif"}.isdisjoint(json_payload(resp).keys()) # Include some fields - resp = client.get(f"/api/v1/files/{file.id}?include=meta,scenes") + resp = client.get(f"/api/v1/files/{file.id}?include=exif,scenes") assert_json_response(resp, { "id": file.id, "file_path": file.file_path, "sha256": file.sha256, - "meta": {"video_length": file.meta.video_length}, + "exif": {"General_Duration": file.exif.General_Duration}, "scenes": [{"duration": scene.duration, "start_time": scene.start_time} for scene in file.scenes] }) - assert "exif" not in json_payload(resp) + assert "meta" not in json_payload(resp) def test_list_files_basic(client, app): @@ -305,15 +305,15 @@ def test_list_files_include(client, app): ) # With scenes and meta included - resp = client.get(f"/api/v1/files/?limit={len(files)}&include=scenes,meta") + resp = client.get(f"/api/v1/files/?limit={len(files)}&include=scenes,exif") assert len(items(resp)) == len(files) assert all( - "exif" not in file for file in items(resp) + "meta" not in file for file in items(resp) ) assert all( has_shape(file, { "scenes": [expected_scene], - "meta": {"video_length": expected_length} + "exif": {"General_Duration": expected_length} }) for file in items(resp) ) @@ -596,10 +596,10 @@ def test_list_files_mixed_example(client, app, config): ]) # Long videos - b.meta.video_length = length_large # duplicates: a - c.meta.video_length = length_large # duplicates: a, related: d - e.meta.video_length = length_large # related: d - f.meta.video_length = length_large # no matches + b.exif.General_Duration = length_large # duplicates: a + c.exif.General_Duration = length_large # duplicates: a, related: d + e.exif.General_Duration = length_large # related: d + f.exif.General_Duration = length_large # no matches # Get long videos with related matches sorted by amount of duplicates resp = client.get( @@ -698,8 +698,10 @@ def test_list_file_matches_include(client, app): "items": [ { "file": { - "meta": {"video_length": match.match_video_file.meta.video_length}, - "exif": {"General_FileExtension": match.match_video_file.exif.General_FileExtension} + "exif": { + "General_FileExtension": match.match_video_file.exif.General_FileExtension, + "General_Duration": match.match_video_file.exif.General_Duration + } } } for match in matches ] @@ -784,8 +786,10 @@ def test_fetch_file_cluster_include(client, app): "total": len(matches), "files": [ { - "meta": {"video_length": file.meta.video_length}, - "exif": {"General_FileExtension": file.exif.General_FileExtension} + "exif": { + "General_FileExtension": file.exif.General_FileExtension, + "General_Duration": file.exif.General_Duration + } } for file in files ] From 359b1cc16998fd15938af85180e4de097d96d920 Mon Sep 17 00:00:00 2001 From: Stepan Anokhin Date: Fri, 20 Nov 2020 20:56:58 +0700 Subject: [PATCH 5/5] Update storage tests --- tests/winnow/storage/test_db_result_storage.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/tests/winnow/storage/test_db_result_storage.py b/tests/winnow/storage/test_db_result_storage.py index 46a1f6ae..698207fa 100644 --- a/tests/winnow/storage/test_db_result_storage.py +++ b/tests/winnow/storage/test_db_result_storage.py @@ -95,14 +95,14 @@ def test_add_signatures_update(store): def test_add_file_metadata(store): # Check metadata write - orig = File("some/path", "some-hash", {"gray_avg": 42.5}) + orig = File("some/path", "some-hash", {"gray_max": 42.5}) store.add_file_metadata(orig.path, orig.sha256, orig.value) - check_files(store, [orig], lambda file: {"gray_avg": file.meta.gray_avg}) + check_files(store, [orig], lambda file: {"gray_max": file.meta.gray_max}) # Check metadata updated - updated = File(orig.path, orig.sha256, {"gray_avg": orig.value["gray_avg"] + 1}) + updated = File(orig.path, orig.sha256, {"gray_max": orig.value["gray_max"] + 1}) store.add_file_metadata(orig.path, orig.sha256, updated.value) - check_files(store, [updated], lambda file: {"gray_avg": file.meta.gray_avg}) + check_files(store, [updated], lambda file: {"gray_max": file.meta.gray_max}) # Check no entity duplication assert count(store, VideoMetadata) == 1 @@ -110,14 +110,14 @@ def test_add_file_metadata(store): def test_add_metadata(store): # Check bulk write - saved = [File(f"some/path{i}", f"some-hash{i}", {"gray_avg": float(i)}) for i in range(100)] + saved = [File(f"some/path{i}", f"some-hash{i}", {"gray_max": float(i)}) for i in range(100)] store.add_metadata(saved) - check_files(store, saved, lambda file: {"gray_avg": file.meta.gray_avg}) + check_files(store, saved, lambda file: {"gray_max": file.meta.gray_max}) # Check bulk update - updated = [File(orig.path, orig.sha256, {"gray_avg": orig.value["gray_avg"] + 1.0}) for orig in saved] + updated = [File(orig.path, orig.sha256, {"gray_max": orig.value["gray_max"] + 1.0}) for orig in saved] store.add_metadata(updated) - check_files(store, updated, lambda file: {"gray_avg": file.meta.gray_avg}) + check_files(store, updated, lambda file: {"gray_max": file.meta.gray_max}) # Check no entity duplication assert count(store, VideoMetadata) == len(updated)