Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update video metadata model (#139) #196

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 6 additions & 6 deletions db/access/files.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from sqlalchemy import or_, func, literal_column
from sqlalchemy.orm import aliased

from db.schema import Files, Matches, VideoMetadata, Exif
from db.schema import Files, Matches, Exif


class FileMatchFilter:
Expand Down Expand Up @@ -145,8 +145,8 @@ def _sort_items(req: ListFilesRequest, query):
(match.match_video_file_id == Files.id)) & (match.distance < threshold))
return query.group_by(Files.id).order_by(literal_column(FilesDAO._LABEL_COUNT).desc(), Files.id.asc())
elif req.sort == FileSort.LENGTH:
meta = aliased(VideoMetadata)
return query.outerjoin(meta).order_by(meta.video_length.desc(), Files.id.asc())
exif = aliased(Exif)
return query.outerjoin(exif).order_by(exif.General_Duration.desc(), Files.id.asc())
elif req.sort == FileSort.DATE:
exif = aliased(Exif)
return query.outerjoin(exif).order_by(exif.General_Encoded_Date.desc(), Files.id.asc())
Expand Down Expand Up @@ -206,13 +206,13 @@ def _filter_date(req: ListFilesRequest, query):
def _filter_length(req: ListFilesRequest, query):
"""Filter by length."""
if req.min_length is not None or req.max_length is not None:
query = query.join(Files.meta)
query = query.join(Files.exif)

if req.min_length is not None:
query = query.filter(VideoMetadata.video_length >= req.min_length)
query = query.filter(Exif.General_Duration >= req.min_length)

if req.max_length is not None:
query = query.filter(VideoMetadata.video_length <= req.max_length)
query = query.filter(Exif.General_Duration <= req.max_length)

return query

Expand Down
13 changes: 0 additions & 13 deletions db/schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,28 +38,15 @@ class Signature(Base):
signature = Column(LargeBinary)


# TODO:Revaluate which columns are actually essential
# TODO: Add sha signature

class VideoMetadata(Base):
__tablename__ = 'videometadata'

id = Column(Integer, primary_key=True)
file_id = Column(Integer, ForeignKey('files.id'), unique=True, nullable=False)
file = relationship("Files", back_populates="meta")
video_length = Column(Float)
avg_act = Column(Float)
video_avg_std = Column(Float)
video_max_dif = Column(Float)
gray_avg = Column(Float)
gray_std = Column(Float)
gray_max = Column(Float)
video_duration_flag = Column(Boolean)
video_dark_flag = Column(Boolean)
flagged = Column(Boolean)
video_duration_seconds = Column(Float)
avg_scene_duration_seconds = Column(Float)
total_video_duration_timestamp = Column(String)


class Scene(Base):
Expand Down
4 changes: 2 additions & 2 deletions server/tests/db/access/test_matches_dao.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@ def make_file(prefix="", length=42, ext="flv", scenes=((0, 1), (1, 2))):
path = f"{prefix}some/path/{uuid()}.{ext}"
sha256 = f"hash-of-{path}"
return Files(file_path=path, sha256=sha256,
exif=Exif(General_FileExtension=ext, ),
meta=VideoMetadata(video_length=length),
exif=Exif(General_FileExtension=ext, General_Duration=length * 1000),
meta=VideoMetadata(),
scenes=[Scene(start_time=start, duration=duration) for start, duration in scenes])


Expand Down
36 changes: 20 additions & 16 deletions server/tests/server/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,8 +119,8 @@ def make_file(prefix="", length=42, ext="flv", audio=True, date=datetime.date(20
sha256 = f"hash-of-{path}"
return Files(file_path=path, sha256=sha256,
exif=Exif(General_FileExtension=ext, Audio_Duration=float(audio),
General_Encoded_Date=date),
meta=VideoMetadata(video_length=length),
General_Encoded_Date=date, General_Duration=length),
meta=VideoMetadata(),
scenes=[Scene(start_time=start, duration=duration) for start, duration in scenes])


Expand Down Expand Up @@ -226,15 +226,15 @@ def test_get_file(client, app):
assert {"scenes", "meta", "exif"}.isdisjoint(json_payload(resp).keys())

# Include some fields
resp = client.get(f"/api/v1/files/{file.id}?include=meta,scenes")
resp = client.get(f"/api/v1/files/{file.id}?include=exif,scenes")
assert_json_response(resp, {
"id": file.id,
"file_path": file.file_path,
"sha256": file.sha256,
"meta": {"video_length": file.meta.video_length},
"exif": {"General_Duration": file.exif.General_Duration},
"scenes": [{"duration": scene.duration, "start_time": scene.start_time} for scene in file.scenes]
})
assert "exif" not in json_payload(resp)
assert "meta" not in json_payload(resp)


def test_list_files_basic(client, app):
Expand Down Expand Up @@ -305,15 +305,15 @@ def test_list_files_include(client, app):
)

# With scenes and meta included
resp = client.get(f"/api/v1/files/?limit={len(files)}&include=scenes,meta")
resp = client.get(f"/api/v1/files/?limit={len(files)}&include=scenes,exif")
assert len(items(resp)) == len(files)
assert all(
"exif" not in file for file in items(resp)
"meta" not in file for file in items(resp)
)
assert all(
has_shape(file, {
"scenes": [expected_scene],
"meta": {"video_length": expected_length}
"exif": {"General_Duration": expected_length}
}) for file in items(resp)
)

Expand Down Expand Up @@ -596,10 +596,10 @@ def test_list_files_mixed_example(client, app, config):
])

# Long videos
b.meta.video_length = length_large # duplicates: a
c.meta.video_length = length_large # duplicates: a, related: d
e.meta.video_length = length_large # related: d
f.meta.video_length = length_large # no matches
b.exif.General_Duration = length_large # duplicates: a
c.exif.General_Duration = length_large # duplicates: a, related: d
e.exif.General_Duration = length_large # related: d
f.exif.General_Duration = length_large # no matches

# Get long videos with related matches sorted by amount of duplicates
resp = client.get(
Expand Down Expand Up @@ -698,8 +698,10 @@ def test_list_file_matches_include(client, app):
"items": [
{
"file": {
"meta": {"video_length": match.match_video_file.meta.video_length},
"exif": {"General_FileExtension": match.match_video_file.exif.General_FileExtension}
"exif": {
"General_FileExtension": match.match_video_file.exif.General_FileExtension,
"General_Duration": match.match_video_file.exif.General_Duration
}
}
} for match in matches
]
Expand Down Expand Up @@ -784,8 +786,10 @@ def test_fetch_file_cluster_include(client, app):
"total": len(matches),
"files": [
{
"meta": {"video_length": file.meta.video_length},
"exif": {"General_FileExtension": file.exif.General_FileExtension}
"exif": {
"General_FileExtension": file.exif.General_FileExtension,
"General_Duration": file.exif.General_Duration
}

} for file in files
]
Expand Down
16 changes: 8 additions & 8 deletions tests/winnow/storage/test_db_result_storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,29 +95,29 @@ def test_add_signatures_update(store):

def test_add_file_metadata(store):
# Check metadata write
orig = File("some/path", "some-hash", {"gray_avg": 42.5})
orig = File("some/path", "some-hash", {"gray_max": 42.5})
store.add_file_metadata(orig.path, orig.sha256, orig.value)
check_files(store, [orig], lambda file: {"gray_avg": file.meta.gray_avg})
check_files(store, [orig], lambda file: {"gray_max": file.meta.gray_max})

# Check metadata updated
updated = File(orig.path, orig.sha256, {"gray_avg": orig.value["gray_avg"] + 1})
updated = File(orig.path, orig.sha256, {"gray_max": orig.value["gray_max"] + 1})
store.add_file_metadata(orig.path, orig.sha256, updated.value)
check_files(store, [updated], lambda file: {"gray_avg": file.meta.gray_avg})
check_files(store, [updated], lambda file: {"gray_max": file.meta.gray_max})

# Check no entity duplication
assert count(store, VideoMetadata) == 1


def test_add_metadata(store):
# Check bulk write
saved = [File(f"some/path{i}", f"some-hash{i}", {"gray_avg": float(i)}) for i in range(100)]
saved = [File(f"some/path{i}", f"some-hash{i}", {"gray_max": float(i)}) for i in range(100)]
store.add_metadata(saved)
check_files(store, saved, lambda file: {"gray_avg": file.meta.gray_avg})
check_files(store, saved, lambda file: {"gray_max": file.meta.gray_max})

# Check bulk update
updated = [File(orig.path, orig.sha256, {"gray_avg": orig.value["gray_avg"] + 1.0}) for orig in saved]
updated = [File(orig.path, orig.sha256, {"gray_max": orig.value["gray_max"] + 1.0}) for orig in saved]
store.add_metadata(updated)
check_files(store, updated, lambda file: {"gray_avg": file.meta.gray_avg})
check_files(store, updated, lambda file: {"gray_max": file.meta.gray_max})

# Check no entity duplication
assert count(store, VideoMetadata) == len(updated)
Expand Down
6 changes: 1 addition & 5 deletions web/src/server-api/Server/Transform.js
Original file line number Diff line number Diff line change
Expand Up @@ -57,13 +57,9 @@ export default class Transform {
};
}
return {
grayAverage: data.meta.gray_avg,
grayMax: data.meta.gray_max,
grayStd: data.meta.gray_std,
stdAverage: data.meta.video_avg_std,
maxDiff: data.meta.video_max_dif,
flagged: data.meta.flagged,
length: data.meta.video_length * 1000 || data.exif?.General_Duration || 0,
length: data.exif?.General_Duration || 0,
};
}

Expand Down
49 changes: 6 additions & 43 deletions winnow/storage/db_result_storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -473,54 +473,17 @@ def _create_scenes(file, durations):
@staticmethod
def _update_metadata(metadata_entity, metadata):
"""Update metadata attributes"""
metadata_entity.video_length = metadata.get('video_length',
metadata_entity
.video_length
)
metadata_entity.avg_act = metadata.get(
'avg_act',
metadata_entity.avg_act
)
metadata_entity.video_avg_std = metadata.get(
'video_avg_std',
metadata_entity
.video_avg_std
)
metadata_entity.video_max_dif = metadata.get(
'video_max_dif',
metadata_entity
.video_max_dif
)

metadata_entity.gray_avg = metadata.get(
'gray_avg',
metadata_entity
.gray_avg)

metadata_entity.gray_std = metadata.get(
'gray_std',
metadata_entity
.gray_std)

metadata_entity.gray_max = metadata.get(
'gray_max',
metadata_entity
.gray_max)
'gray_max',
metadata_entity.gray_max)

metadata_entity.video_dark_flag = metadata.get(
'video_dark_flag',
metadata_entity
.video_dark_flag)

metadata_entity.video_duration_flag = metadata.get(
'video_duration_flag',
metadata_entity
.video_duration_flag)
'video_dark_flag',
metadata_entity.video_dark_flag)

metadata_entity.flagged = metadata.get(
'flagged',
metadata_entity
.flagged)
'flagged',
metadata_entity.flagged)

@staticmethod
def _index_matches(chunk):
Expand Down