Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: sequence processing order for v0.8 #554

Merged
merged 4 commits into from
Oct 4, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .github/workflows/python-package.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,11 @@ name: Python package

on:
push:
branches: [ main ]
branches: [ 'main', '0.8' ]
tags:
- 'v*.*.*'
pull_request:
branches: [ main ]
branches: [ 'main', '0.8' ]

jobs:
build:
Expand Down
48 changes: 36 additions & 12 deletions mapillary_tools/process_sequence_properties.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import datetime
import uuid
import itertools
from pathlib import Path

from . import types, constants
from .geo import compute_bearing, gps_distance, diff_bearing, pairwise
Expand Down Expand Up @@ -61,6 +62,7 @@ def cut_sequences(
sequences.append([cur])
continue
time_diff = (cur.time - prev.time).total_seconds()
assert 0 <= time_diff, "sequence must be sorted by capture times"
if cutoff_time <= time_diff:
sequences.append([cur])
continue
Expand Down Expand Up @@ -103,17 +105,6 @@ def find_duplicates(
return duplicates


def group_descs_by_folder(
descs: T.List[types.ImageDescriptionFile],
) -> T.List[T.List[types.ImageDescriptionFile]]:
descs.sort(key=lambda desc: os.path.dirname(desc["filename"]))
group = itertools.groupby(descs, key=lambda desc: os.path.dirname(desc["filename"]))
sequences = []
for _, sequence in group:
sequences.append(list(sequence))
return sequences


def duplication_check(
sequence: GPXSequence,
duplicate_distance: float,
Expand Down Expand Up @@ -201,6 +192,29 @@ def cap_sequence(sequence: GPXSequence) -> T.List[GPXSequence]:
return sequences


def group_and_sort_descs_by_folder(
descs: T.List[types.ImageDescriptionFile],
) -> T.List[T.List[types.ImageDescriptionFile]]:
# group descs by parent directory
sequences_by_parent: T.Dict[str, T.List[types.ImageDescriptionFile]] = {}
for desc in descs:
filename = Path(desc["filename"]).resolve()
sequences_by_parent.setdefault(str(filename.parent), []).append(desc)

sequences = list(sequences_by_parent.values())
for sequence in sequences:
# Sort images in a sequence by capture time
# and then filename (in case capture times are the same)
sequence.sort(
key=lambda desc: (
types.map_capture_time_to_datetime(desc["MAPCaptureTime"]),
os.path.basename(desc["filename"]),
)
)

return sequences


def process_sequence_properties(
descs: T.List[types.ImageDescriptionFileOrError],
cutoff_distance=constants.CUTOFF_DISTANCE,
Expand All @@ -209,7 +223,16 @@ def process_sequence_properties(
duplicate_distance=constants.DUPLICATE_DISTANCE,
duplicate_angle=constants.DUPLICATE_ANGLE,
) -> T.List[types.ImageDescriptionFileOrError]:
groups = group_descs_by_folder(types.filter_out_errors(descs))
groups = group_and_sort_descs_by_folder(types.filter_out_errors(descs))

# make sure they are sorted
for group in groups:
for cur, nxt in pairwise(group):
assert types.map_capture_time_to_datetime(
cur["MAPCaptureTime"]
) <= types.map_capture_time_to_datetime(
nxt["MAPCaptureTime"]
), "sequence must be sorted"

sequences = []
for group in groups:
Expand All @@ -222,6 +245,7 @@ def process_sequence_properties(
processed = [desc for desc in descs if types.is_error(desc)]

for sequence in sequences:

# duplication check
passed, failed = duplication_check(
sequence,
Expand Down
14 changes: 14 additions & 0 deletions tests/cli/process_sequence_properties.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
import json
import sys

from mapillary_tools import process_sequence_properties


def main():
descs = json.load(sys.stdin)
processed_descs = process_sequence_properties.process_sequence_properties(descs)
print(json.dumps(processed_descs))


if __name__ == "__main__":
main()
3 changes: 3 additions & 0 deletions tests/integration/test_process.py
Original file line number Diff line number Diff line change
Expand Up @@ -555,8 +555,11 @@ def test_geotagging_from_gpx_use_gpx_start_time_with_offset(setup_data: py.path.

def ffmpeg_installed():
ffmpeg_path = os.getenv("MAPILLARY_FFMPEG_PATH", "ffmpeg")
ffprobe_path = os.getenv("MAPILLARY_FFPROBE_PATH", "ffprobe")
try:
subprocess.run([ffmpeg_path, "-version"])
# In Windows, ffmpeg is installed but ffprobe is not?
subprocess.run([ffprobe_path, "-version"])
except FileNotFoundError:
return False
return True
Expand Down
70 changes: 42 additions & 28 deletions tests/unit/test_sequence_processing.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import itertools
import typing as T

from mapillary_tools import process_sequence_properties as psp, types
from mapillary_tools import geo, process_sequence_properties as psp, types


def make_image_desc(
Expand Down Expand Up @@ -33,13 +33,15 @@ def test_find_sequences_by_folder():
{"error": "hello"},
# s1
make_image_desc(1.00001, 1.00001, 2, filename="hello/foo.jpg"),
make_image_desc(1.00002, 1.00002, 2, filename="hello/bar.jpg"),
make_image_desc(1.00002, 1.00002, 2, filename="hello/"),
make_image_desc(1.00002, 1.00002, 8, filename="./hello/bar.jpg"),
make_image_desc(1.00002, 1.00002, 9, filename="hello/a.jpg"),
# s2
make_image_desc(1.00001, 1.00001, 2, filename="foo.jpg"),
make_image_desc(1.00002, 1.00002, 2, filename="hello/"),
make_image_desc(1.00001, 1.00001, 3, filename="./foo.jpg"),
make_image_desc(1.00001, 1.00001, 1, filename="a.jpg"),
# s3
make_image_desc(1.00001, 1.00001, 19, filename="/foo.jpg"),
make_image_desc(1.00002, 1.00002, 28, filename="/bar.jpg"),
make_image_desc(1.00001, 1.00001, 19, filename="./../foo.jpg"),
make_image_desc(1.00002, 1.00002, 28, filename="../bar.jpg"),
]
descs = psp.process_sequence_properties(
sequence,
Expand All @@ -52,30 +54,42 @@ def test_find_sequences_by_folder():
assert len(descs) == len(sequence)
descs = [d for d in descs if "error" not in d]

descs.sort(key=lambda d: d["MAPSequenceUUID"])
actual_seqs = []
for key, seq in itertools.groupby(descs, key=lambda d: d["MAPSequenceUUID"]):
actual_seqs.append(list(seq))
actual_seqs.sort(key=lambda s: s[0]["filename"])
assert {"/foo.jpg", "/bar.jpg"} == set(d["filename"] for d in actual_seqs[0])
assert {"foo.jpg"} == set(d["filename"] for d in actual_seqs[1])
assert {"hello/foo.jpg", "hello/bar.jpg", "hello/"} == set(
d["filename"] for d in actual_seqs[2]
actual_descs = {}
for d in descs:
actual_descs.setdefault(d["MAPSequenceUUID"], []).append(d)

for s in actual_descs.values():
for c, n in geo.pairwise(s):
assert c["MAPCaptureTime"] <= n["MAPCaptureTime"]

actual_sequences = sorted(
list(actual_descs.values()), key=lambda s: s[0]["filename"]
)
assert 3 == len(actual_sequences)

assert ["./../foo.jpg", "../bar.jpg"] == [
d["filename"] for d in actual_sequences[0]
]
assert ["a.jpg", "hello/", "./foo.jpg"] == [
d["filename"] for d in actual_sequences[1]
]
assert ["hello/foo.jpg", "./hello/bar.jpg", "hello/a.jpg"] == [
d["filename"] for d in actual_sequences[2]
]


def test_cut_sequences():
sequence = [
# s1
make_image_desc(1, 1, 1),
make_image_desc(1.00001, 1.00001, 2),
make_image_desc(1.00002, 1.00002, 2),
make_image_desc(1.00001, 1.00001, 2, filename="a.jpg"),
make_image_desc(1.00002, 1.00002, 2, filename="b.jpg"),
# s2
make_image_desc(1.00090, 1.00090, 2),
make_image_desc(1.00091, 1.00091, 3),
make_image_desc(1.00090, 1.00090, 2, filename="foo/b.jpg"),
make_image_desc(1.00091, 1.00091, 3, filename="foo/a.jpg"),
# s3
make_image_desc(1.00092, 1.00092, 19),
make_image_desc(1.00093, 1.00093, 28),
make_image_desc(1.00092, 1.00092, 19, filename="../a.jpg"),
make_image_desc(1.00093, 1.00093, 28, filename="../b.jpg"),
]
descs = psp.process_sequence_properties(
sequence,
Expand Down Expand Up @@ -106,11 +120,11 @@ def test_duplication():
# s1
make_image_desc(1, 1, 1, angle=0),
make_image_desc(1.00001, 1.00001, 2, angle=1),
make_image_desc(1.00002, 1.00002, 2, angle=-1),
make_image_desc(1.00003, 1.00003, 2, angle=-2),
make_image_desc(1.00009, 1.00009, 2, angle=5),
make_image_desc(1.00090, 1.00090, 2, angle=5),
make_image_desc(1.00091, 1.00091, 2, angle=-1),
make_image_desc(1.00002, 1.00002, 3, angle=-1),
make_image_desc(1.00003, 1.00003, 4, angle=-2),
make_image_desc(1.00009, 1.00009, 5, angle=5),
make_image_desc(1.00090, 1.00090, 6, angle=5),
make_image_desc(1.00091, 1.00091, 7, angle=-1),
]
descs = psp.process_sequence_properties(
sequence,
Expand All @@ -130,11 +144,11 @@ def test_duplication():
def test_interpolation():
sequence = [
# s1
make_image_desc(0, 0, 1, angle=2),
make_image_desc(1, 0, 2, angle=123),
make_image_desc(1, 1, 3, angle=344),
make_image_desc(0, 1, 4, angle=22),
make_image_desc(0, 0, 5, angle=-123),
make_image_desc(0, 0, 1, angle=2),
make_image_desc(1, 0, 2, angle=123),
]
descs = psp.process_sequence_properties(
sequence,
Expand Down