Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix sequence processing order #553

Merged
merged 4 commits into from
Oct 2, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 30 additions & 9 deletions mapillary_tools/process_sequence_properties.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
import dataclasses
import itertools
import os
import typing as T
import uuid
from pathlib import Path

from . import constants, geo, types
from .exceptions import MapillaryDuplicationError
Expand Down Expand Up @@ -61,6 +61,7 @@ def cut_sequences(
sequences.append([cur])
continue
time_diff = cur.time - prev.time
assert 0 <= time_diff, "sequence must be sorted by capture times"
if cutoff_time <= time_diff:
sequences.append([cur])
continue
Expand Down Expand Up @@ -138,15 +139,26 @@ def cap_sequence(sequence: GPXSequence) -> T.List[GPXSequence]:
return sequences


def group_descs_by_folder(
def group_and_sort_descs_by_folder(
descs: T.List[types.ImageDescriptionFile],
) -> T.List[T.List[types.ImageDescriptionFile]]:
# TODO: use absolute path?
descs.sort(key=lambda desc: os.path.dirname(desc["filename"]))
group = itertools.groupby(descs, key=lambda desc: os.path.dirname(desc["filename"]))
sequences = []
for _, sequence in group:
sequences.append(list(sequence))
# group descs by parent directory
sequences_by_parent: T.Dict[str, T.List[types.ImageDescriptionFile]] = {}
for desc in descs:
filename = Path(desc["filename"]).resolve()
sequences_by_parent.setdefault(str(filename.parent), []).append(desc)

sequences = list(sequences_by_parent.values())
for sequence in sequences:
# Sort images in a sequence by capture time
# and then filename (in case capture times are the same)
sequence.sort(
key=lambda desc: (
types.map_capture_time_to_datetime(desc["MAPCaptureTime"]),
os.path.basename(desc["filename"]),
)
)

return sequences


Expand All @@ -168,7 +180,15 @@ def process_sequence_properties(
else:
good_descs.append(T.cast(types.ImageDescriptionFile, desc))

groups = group_descs_by_folder(good_descs)
groups = group_and_sort_descs_by_folder(good_descs)
# make sure they are sorted
for group in groups:
for cur, nxt in geo.pairwise(group):
assert types.map_capture_time_to_datetime(
cur["MAPCaptureTime"]
) <= types.map_capture_time_to_datetime(
nxt["MAPCaptureTime"]
), "sequence must be sorted"

# cut sequences
sequences = []
Expand All @@ -178,6 +198,7 @@ def process_sequence_properties(
assert len(good_descs) == sum(len(s) for s in sequences)

for sequence in sequences:

# duplication check
sequence, dups = duplication_check(
sequence,
Expand Down
14 changes: 14 additions & 0 deletions tests/cli/process_sequence_properties.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
import json
import sys

from mapillary_tools import process_sequence_properties


def main():
descs = json.load(sys.stdin)
processed_descs = process_sequence_properties.process_sequence_properties(descs)
print(json.dumps(processed_descs))


if __name__ == "__main__":
main()
70 changes: 42 additions & 28 deletions tests/unit/test_sequence_processing.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import typing as T
import uuid

from mapillary_tools import process_sequence_properties as psp, types
from mapillary_tools import geo, process_sequence_properties as psp, types


def make_image_desc(
Expand Down Expand Up @@ -33,13 +33,15 @@ def test_find_sequences_by_folder():
{"error": "hello"},
# s1
make_image_desc(1.00001, 1.00001, 2, filename="hello/foo.jpg"),
make_image_desc(1.00002, 1.00002, 2, filename="hello/bar.jpg"),
make_image_desc(1.00002, 1.00002, 2, filename="hello/"),
make_image_desc(1.00002, 1.00002, 8, filename="./hello/bar.jpg"),
make_image_desc(1.00002, 1.00002, 9, filename="hello/a.jpg"),
# s2
make_image_desc(1.00001, 1.00001, 2, filename="foo.jpg"),
make_image_desc(1.00002, 1.00002, 2, filename="hello/"),
make_image_desc(1.00001, 1.00001, 3, filename="./foo.jpg"),
make_image_desc(1.00001, 1.00001, 1, filename="a.jpg"),
# s3
make_image_desc(1.00001, 1.00001, 19, filename="/foo.jpg"),
make_image_desc(1.00002, 1.00002, 28, filename="/bar.jpg"),
make_image_desc(1.00001, 1.00001, 19, filename="./../foo.jpg"),
make_image_desc(1.00002, 1.00002, 28, filename="../bar.jpg"),
]
descs = psp.process_sequence_properties(
sequence,
Expand All @@ -52,30 +54,42 @@ def test_find_sequences_by_folder():
assert len(descs) == len(sequence)
descs = [d for d in descs if "error" not in d]

descs.sort(key=lambda d: d["MAPSequenceUUID"])
actual_seqs = []
for key, seq in itertools.groupby(descs, key=lambda d: d["MAPSequenceUUID"]):
actual_seqs.append(list(seq))
actual_seqs.sort(key=lambda s: s[0]["filename"])
assert {"/foo.jpg", "/bar.jpg"} == set(d["filename"] for d in actual_seqs[0])
assert {"foo.jpg"} == set(d["filename"] for d in actual_seqs[1])
assert {"hello/foo.jpg", "hello/bar.jpg", "hello/"} == set(
d["filename"] for d in actual_seqs[2]
actual_descs = {}
for d in descs:
actual_descs.setdefault(d["MAPSequenceUUID"], []).append(d)

for s in actual_descs.values():
for c, n in geo.pairwise(s):
assert c["MAPCaptureTime"] <= n["MAPCaptureTime"]

actual_sequences = sorted(
list(actual_descs.values()), key=lambda s: s[0]["filename"]
)
assert 3 == len(actual_sequences)

assert ["./../foo.jpg", "../bar.jpg"] == [
d["filename"] for d in actual_sequences[0]
]
assert ["a.jpg", "hello/", "./foo.jpg"] == [
d["filename"] for d in actual_sequences[1]
]
assert ["hello/foo.jpg", "./hello/bar.jpg", "hello/a.jpg"] == [
d["filename"] for d in actual_sequences[2]
]


def test_cut_sequences():
sequence = [
# s1
make_image_desc(1, 1, 1),
make_image_desc(1.00001, 1.00001, 2),
make_image_desc(1.00002, 1.00002, 2),
make_image_desc(1.00001, 1.00001, 2, filename="a.jpg"),
make_image_desc(1.00002, 1.00002, 2, filename="b.jpg"),
# s2
make_image_desc(1.00090, 1.00090, 2),
make_image_desc(1.00091, 1.00091, 3),
make_image_desc(1.00090, 1.00090, 2, filename="foo/b.jpg"),
make_image_desc(1.00091, 1.00091, 3, filename="foo/a.jpg"),
# s3
make_image_desc(1.00092, 1.00092, 19),
make_image_desc(1.00093, 1.00093, 28),
make_image_desc(1.00092, 1.00092, 19, filename="../a.jpg"),
make_image_desc(1.00093, 1.00093, 28, filename="../b.jpg"),
]
descs = psp.process_sequence_properties(
sequence,
Expand Down Expand Up @@ -106,11 +120,11 @@ def test_duplication():
# s1
make_image_desc(1, 1, 1, angle=0),
make_image_desc(1.00001, 1.00001, 2, angle=1),
make_image_desc(1.00002, 1.00002, 2, angle=-1),
make_image_desc(1.00003, 1.00003, 2, angle=-2),
make_image_desc(1.00009, 1.00009, 2, angle=5),
make_image_desc(1.00090, 1.00090, 2, angle=5),
make_image_desc(1.00091, 1.00091, 2, angle=-1),
make_image_desc(1.00002, 1.00002, 3, angle=-1),
make_image_desc(1.00003, 1.00003, 4, angle=-2),
make_image_desc(1.00009, 1.00009, 5, angle=5),
make_image_desc(1.00090, 1.00090, 6, angle=5),
make_image_desc(1.00091, 1.00091, 7, angle=-1),
]
descs = psp.process_sequence_properties(
sequence,
Expand All @@ -130,11 +144,11 @@ def test_duplication():
def test_interpolation():
sequence = [
# s1
make_image_desc(0, 0, 1, angle=2),
make_image_desc(1, 0, 2, angle=123),
make_image_desc(1, 1, 3, angle=344),
make_image_desc(0, 1, 4, angle=22),
make_image_desc(0, 0, 5, angle=-123),
make_image_desc(0, 0, 1, angle=2),
make_image_desc(1, 0, 2, angle=123),
]
descs = psp.process_sequence_properties(
sequence,
Expand Down