Skip to content

Commit

Permalink
Merge pull request #68 from internetarchive/issue-49
Browse files Browse the repository at this point in the history
Adding VTT support
  • Loading branch information
digitaldogsbody authored May 23, 2024
2 parents e3ef404 + 65c97f9 commit 01848a3
Show file tree
Hide file tree
Showing 5 changed files with 116 additions and 19 deletions.
5 changes: 5 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,11 @@ Unit tests are in the `tests` folder and can be run with:
python -m unittest discover -s tests
```

Run single test:
```
python -m unittest tests.test_video.TestVideo.test_vtt_autogenerated
```

Retrieve large.jpg as 800px wide JPEG
* http://127.0.0.1:8080/iiif/large.jpg/full/800,/0/default.jpg

Expand Down
35 changes: 35 additions & 0 deletions iiify/resolver.py
Original file line number Diff line number Diff line change
Expand Up @@ -553,6 +553,7 @@ def create_manifest3(identifier, domain=None, page=None):
# sort the files into originals and derivatives, splitting the derivatives into buckets based on the original
originals = []
derivatives = {}
vttfiles = {}
for f in metadata['files']:
if f['source'] == 'derivative':
if f['original'] in derivatives:
Expand All @@ -561,6 +562,14 @@ def create_manifest3(identifier, domain=None, page=None):
derivatives[f['original']] = {f['format']: f}
elif f['source'] == 'original':
originals.append(f)

if f['format'] == 'Web Video Text Tracks':
# Example: cruz-test.en.vtt and 34C3_-_International_Image_Interoperability_Framework_IIIF_Kulturinstitutionen_schaffen_interop-SvH4fbjOT0A.autogenerated.vtt
sourceFilename = re.sub('\.[a-zA-H-]*\.vtt', '', f['name'])
if sourceFilename not in vttfiles:
vttfiles[sourceFilename] = []

vttfiles[sourceFilename].append(f)

# create the canvases for each original
for file in [f for f in originals if f['format'] in ['MPEG4', 'h.264 MPEG4', '512Kb MPEG4', 'HiRes MPEG4', 'MPEG2', 'h.264', 'Matroska', 'Ogg Video', 'Ogg Theora', 'WebM', 'Windows Media', 'Cinepack']]:
Expand All @@ -569,6 +578,32 @@ def create_manifest3(identifier, domain=None, page=None):
c_id = f"{URI_PRIFIX}/{identifier}/{slugged_id}/canvas"
c = Canvas(id=c_id, label=normalised_id, duration=float(file['length']), height=int(file['height']), width=int(file['width']))

# Add vtt if present
if vttfiles and normalised_id in vttfiles:
vttAPId = f"{URI_PRIFIX}/{identifier}/{slugged_id}/vtt"

vttNo = 1
for vttFile in vttfiles[normalised_id]:
vtAnno = c.make_annotation(id=f"{URI_PRIFIX}/{identifier}/{slugged_id}/annotation/vtt/{vttNo}",
motivation="supplementing",
target=c.id,
anno_page_id=vttAPId,
body={"id": f"{domain}resource/{identifier}/{vttFile['name']}",
"type": "Text",
"format": "text/vtt",
})
# add label and language
if vttFile['name'].endswith("autogenerated.vtt"):
vtAnno.body.label = { 'en': ['autogenerated']}
else:
# Assume language
splitName = vttFile['name'].split(".")
lang = splitName[-2]
vtAnno.body.add_label(lang, language="none")
vtAnno.body.language = lang

vttNo += 1

# create intermediary objects
ap = AnnotationPage(id=f"{URI_PRIFIX}/{identifier}/{slugged_id}/page")
anno = Annotation(id=f"{URI_PRIFIX}/{identifier}/{slugged_id}/annotation", motivation="painting", target=c.id)
Expand Down
7 changes: 7 additions & 0 deletions nginx-vhost.conf
Original file line number Diff line number Diff line change
Expand Up @@ -37,4 +37,11 @@ server {
# Reverse proxy with the variables captured above
proxy_pass https://cantaloupe.prod.archive.org/iiif/$1/$2;
}

location /iiif/resource/ {
add_header 'Access-Control-Allow-Origin' '*' always;
add_header 'Access-Control-Allow-Methods' 'GET, HEAD, POST, PUT, PATCH, DELETE' always;
# https://archive.org/download/cruz-test/cruz-test.af.vtt
proxy_pass https://archive.org/download/;
}
}
20 changes: 1 addition & 19 deletions tests/test_manifests.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,21 +42,13 @@ def test_v3_single_text_manifest(self):
self.assertEqual(manifest['type'], "Manifest", f"Unexpected type. Expected Manifest go {manifest['type']}")
self.assertEqual(len(manifest['items']),1,f"Expected 1 canvas but got: {len(manifest['items'])}")


def test_v3_vermont_Life_Magazine(self):
resp = self.test_app.get("/iiif/3/rbmsbk_ap2-v4_2001_V55N4/manifest.json")
self.assertEqual(resp.status_code, 200)
manifest = resp.json

self.assertEqual(len(manifest['items']),116,f"Expected 116 canvas but got: {len(manifest['items'])}")

def test_v3_single_video_manifest(self):
resp = self.test_app.get("/iiif/3/youtube-7w8F2Xi3vFw/manifest.json")
self.assertEqual(resp.status_code, 200)
manifest = resp.json

self.assertEqual(len(manifest['items']),1,f"Expected 1 canvas but got: {len(manifest['items'])}")


#logic to cover etree mediatype github issue #123
def test_v3_etree_mediatype(self):
resp = self.test_app.get("/iiif/3/gd72-04-14.aud.vernon.23662.sbeok.shnf/manifest.json")
Expand All @@ -66,30 +58,20 @@ def test_v3_etree_mediatype(self):
self.assertEqual(len(manifest['items']),36,f"Expected 36 canvases but got: {len(manifest['items'])}")
self.assertEqual(manifest['items'][0]['items'][0]['items'][0]['body']['items'][0]['type'],"Sound",f"Expected 'Sound' but got: {manifest['items'][0]['items'][0]['items'][0]['body']['items'][0]['type']}")


def test_v3_64Kbps_MP3(self):
resp = self.test_app.get("/iiif/3/TvQuran.com__Alafasi/manifest.json")
self.assertEqual(resp.status_code, 200)
manifest = resp.json
self.assertEqual(len(manifest['items']),114,f"Expected 114 canvases but got: {len(manifest['items'])}")
self.assertEqual("64Kbps MP3".lower() in resp.text.lower(), True, f"Expected the string '64Kbps MP3'")


def test_v3_128Kbps_MP3(self):
resp = self.test_app.get("/iiif/3/alice_in_wonderland_librivox/manifest.json")
self.assertEqual(resp.status_code, 200)
manifest = resp.json
self.assertEqual(len(manifest['items']),12,f"Expected 12 canvases but got: {len(manifest['items'])}")
self.assertEqual("128kbps mp3".lower() in resp.text.lower(), True, f"Expected the string '128kbps mp3'")

def test_v3_h264_MPEG4_OGG_Theora(self):
resp = self.test_app.get("/iiif/3/taboca_201002_03/manifest.json")
self.assertEqual(resp.status_code, 200)
manifest = resp.json
self.assertEqual(len(manifest['items']),251,f"Expected 251 canvases but got: {len(manifest['items'])}")
self.assertEqual("h.264 MPEG4".lower() in resp.text.lower(), True, f"Expected the string 'h.264 MPEG4'")
self.assertEqual("OGG Theora".lower() in resp.text.lower(), True, f"Expected the string 'OGG Theora'")

def test_v3_aiff(self):
resp = self.test_app.get("/iiif/3/PDextend_AIFF/manifest.json")
self.assertEqual(resp.status_code, 200)
Expand Down
68 changes: 68 additions & 0 deletions tests/test_video.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
import unittest
from flask.testing import FlaskClient
from iiify.app import app

class TestVideo(unittest.TestCase):

def setUp(self) -> None:
self.test_app = FlaskClient(app)

def test_v3_single_video_manifest(self):
resp = self.test_app.get("/iiif/3/youtube-7w8F2Xi3vFw/manifest.json")
self.assertEqual(resp.status_code, 200)
manifest = resp.json

self.assertEqual(len(manifest['items']),1,f"Expected 1 canvas but got: {len(manifest['items'])}")

def test_v3_h264_MPEG4_OGG_Theora(self):
resp = self.test_app.get("/iiif/3/taboca_201002_03/manifest.json")
self.assertEqual(resp.status_code, 200)
manifest = resp.json
self.assertEqual(len(manifest['items']),251,f"Expected 251 canvases but got: {len(manifest['items'])}")
self.assertEqual("h.264 MPEG4".lower() in resp.text.lower(), True, f"Expected the string 'h.264 MPEG4'")
self.assertEqual("OGG Theora".lower() in resp.text.lower(), True, f"Expected the string 'OGG Theora'")

def test_vtt_autogenerated(self):
resp = self.test_app.get("/iiif/3/youtube-SvH4fbjOT0A/manifest.json?recache=true")
self.assertEqual(resp.status_code, 200)
manifest = resp.json

self.assertEqual(len(manifest['items']),1,f"Expected 1 canvas but got: {len(manifest['items'])}")
self.assertTrue('annotations' in manifest['items'][0], "Expected annotations in manifest")
self.assertTrue(isinstance(manifest['items'][0]['annotations'], list), "Expected annotations to be a list")
self.assertEqual(len(manifest['items'][0]['annotations']), 1, "Expected 1 item in annotations")
annotationPage = manifest['items'][0]['annotations'][0]
self.assertEqual(annotationPage['type'], 'AnnotationPage', "Expected annotations to contain annotation page")

self.assertTrue('items' in annotationPage and isinstance(annotationPage['items'],list) and len(annotationPage['items']) == 1, f"Expected annotation page to contain a list of items which contains 1 item. Found {annotationPage['items']}")
annotation = annotationPage['items'][0]
self.assertEqual(annotation['type'], 'Annotation', "Expected annotationPage to contain annotations")
self.assertEqual(annotation['motivation'], 'supplementing', "Expected annotation to have the supplementing annotation")
self.assertTrue('body' in annotation, "Expected annotation to have a body")
body = annotation['body']
self.assertEqual(body['type'],'Text', "Expected body to have a type text")
self.assertEqual(body['format'],'text/vtt', "Expected body to have a type text")
self.assertEqual(body['label']['en'][0], "autogenerated", "Expected VTT file to have the label autogenerated")
self.assertFalse("language" in body, "We don't know the language for this item so there shouldn't be a language specified")
self.assertEqual(body['id'], "https://localhost/iiif/resource/youtube-SvH4fbjOT0A/34C3_-_International_Image_Interoperability_Framework_IIIF_Kulturinstitutionen_schaffen_interop-SvH4fbjOT0A.autogenerated.vtt","Unexpected URL for the VTT file")

def test_vtt_multilingual(self):
resp = self.test_app.get("/iiif/3/cruz-test/manifest.json?recache=true")
self.assertEqual(resp.status_code, 200)
manifest = resp.json

canvas = manifest['items'][0]
self.assertTrue('annotations' in canvas, 'Expected annotations in Canvas')
self.assertEqual(len(canvas['annotations']), 1, 'Expected one AnnotationPage')
annotations = canvas['annotations'][0]['items']
self.assertEqual(len(annotations), 104, 'Expected all 104 langues')

# Check welsh
for item in annotations:
self.assertTrue('language' in item['body'], f"All vtt files should have a language: {item}")
if item['body']['language'] == 'cy':
self.assertEqual(item['body']['id'], 'https://localhost/iiif/resource/cruz-test/cruz-test.cy.vtt', 'Unexpected link for the Welsh vtt file')


if __name__ == '__main__':
unittest.main()

0 comments on commit 01848a3

Please sign in to comment.