From 25eb95d349c97fd032ee51cb064803f1df36dbc3 Mon Sep 17 00:00:00 2001 From: James S Perrin Date: Wed, 24 Oct 2018 15:10:39 +0100 Subject: [PATCH] Add #18, script to simulate ingestion of multiple types of mediapackages --- simulate-ingests/README.md | 12 + simulate-ingests/__init__.py | 1 + .../mediapackages/templates/episode.xml | 9 + .../mediapackages/templates/manifest.xml | 48 ++++ ...g.opencastproject.capture.agent.properties | 7 + .../mediapackages/templates/series.xml | 7 + .../mediapackages/tracks/README.md | 46 +++ simulate-ingests/simulateIngests.py | 261 ++++++++++++++++++ .../simulation-example.properties | 124 +++++++++ 9 files changed, 515 insertions(+) create mode 100644 simulate-ingests/README.md create mode 100644 simulate-ingests/__init__.py create mode 100644 simulate-ingests/mediapackages/templates/episode.xml create mode 100644 simulate-ingests/mediapackages/templates/manifest.xml create mode 100644 simulate-ingests/mediapackages/templates/org.opencastproject.capture.agent.properties create mode 100644 simulate-ingests/mediapackages/templates/series.xml create mode 100644 simulate-ingests/mediapackages/tracks/README.md create mode 100644 simulate-ingests/simulateIngests.py create mode 100644 simulate-ingests/simulation-example.properties diff --git a/simulate-ingests/README.md b/simulate-ingests/README.md new file mode 100644 index 0000000..2a09dfd --- /dev/null +++ b/simulate-ingests/README.md @@ -0,0 +1,12 @@ +# Opencast Simulate Ingests +Simulate ingesting of recordings of using multiple mediapackage profiles. + +## Requirements + +* depends on python-jinja2, python-requests +* appropriate presentation and presenter tracks in that match the mediapackage profiles, +see mediapackages/tracks/README.md for suggestions on how to locate suitable data. + +## Configuration + +Copy simulation-example.properties to simulation.properties and edit according to the comments. \ No newline at end of file diff --git a/simulate-ingests/__init__.py b/simulate-ingests/__init__.py new file mode 100644 index 0000000..36becd3 --- /dev/null +++ b/simulate-ingests/__init__.py @@ -0,0 +1 @@ +__author__ = 'andrew wilson, james perrin' diff --git a/simulate-ingests/mediapackages/templates/episode.xml b/simulate-ingests/mediapackages/templates/episode.xml new file mode 100644 index 0000000..1797829 --- /dev/null +++ b/simulate-ingests/mediapackages/templates/episode.xml @@ -0,0 +1,9 @@ + + + {{ created }} + {{ identifier }} + {{ title }} + {{ is_part_of }} + {{ source }} + {{ location }} + \ No newline at end of file diff --git a/simulate-ingests/mediapackages/templates/manifest.xml b/simulate-ingests/mediapackages/templates/manifest.xml new file mode 100644 index 0000000..2482b00 --- /dev/null +++ b/simulate-ingests/mediapackages/templates/manifest.xml @@ -0,0 +1,48 @@ + + + + {%- if has_presentation_video %} + + video/{{ video_mime }} + presentation.{{ video_ext }} + {{ duration }} + + {% endif -%} + {%- if has_presenter_video %} + + video/{{ video_mime }} + presenter.{{ video_ext }} + {{ duration }} + + {% endif -%} + {%- if has_presentation_audio %} + + audio/{{ audio_mime }} + presentation.{{ audio_ext }} + {{ duration }} + + {% endif -%} + {%- if has_presenter_audio %} + + audio/{{ audio_mime }} + presenter.{{ audio_ext }} + {{ duration }} + + {% endif %} + + + + text/xml + episode.xml + + + text/xml + series.xml + + + + + org.opencastproject.capture.agent.properties + + + \ No newline at end of file diff --git a/simulate-ingests/mediapackages/templates/org.opencastproject.capture.agent.properties b/simulate-ingests/mediapackages/templates/org.opencastproject.capture.agent.properties new file mode 100644 index 0000000..563b146 --- /dev/null +++ b/simulate-ingests/mediapackages/templates/org.opencastproject.capture.agent.properties @@ -0,0 +1,7 @@ +event.location={{ location }} +event.series={{ series_id }} +event.title={{ title }} +org.opencastproject.workflow.definition={{ workflow }} +org.opencastproject.workflow.config.emailAddresses={{ email }} +org.opencastproject.workflow.config.publishToOaiPmh={{ publish }} +org.opencastproject.workflow.config.editRecording={{ edit }} diff --git a/simulate-ingests/mediapackages/templates/series.xml b/simulate-ingests/mediapackages/templates/series.xml new file mode 100644 index 0000000..bbac6b5 --- /dev/null +++ b/simulate-ingests/mediapackages/templates/series.xml @@ -0,0 +1,7 @@ + +{{ title }} +{{ created }} +{{ source }} +false +{{ identifier }} + \ No newline at end of file diff --git a/simulate-ingests/mediapackages/tracks/README.md b/simulate-ingests/mediapackages/tracks/README.md new file mode 100644 index 0000000..9a66f49 --- /dev/null +++ b/simulate-ingests/mediapackages/tracks/README.md @@ -0,0 +1,46 @@ +# Mediapackage Tracks Directory +Video and audio tracks should be present here that match the tracks listed in the mediapackage +profiles. The filenames must have the form: + + [presenation|presenter]--.[avi|mp3] + +e.g + + presenter-single-1.avi + +## Filtering Sample Mediapackage from Database + +Analysing the mediapackage xml in the archive is expensive, therefore create a temporary table (#tablename) for +selecting prospective tracks: + + /* create temp table */ + with xmlnamespaces (default 'http://mediapackage.opencastproject.org') + select + id, + mp.value('(/mediapackage/media/track/@type)[1]', 'varchar(256)') as track_1, + left(mp.value('(/mediapackage/media/track/mimetype)[1]', 'varchar(256)'), 5) as mime_1, + mp.value('(/mediapackage/media/track/@type)[2]', 'varchar(256)') as track_2, + left(mp.value('(/mediapackage/media/track/mimetype)[2]', 'varchar(256)'), 5) as mime_2, + mp.value('(/mediapackage/media/track/@type)[3]', 'varchar(256)') as track_3, + left(mp.value('(/mediapackage/media/track/mimetype)[3]', 'varchar(256)'), 5) as mime_3, + mp.value('(/mediapackage/@duration)[1]', 'int')/60000 as duration + into #mp_data + from + /* Cast the xml strings to UTF-16 encoded xml fields */ + (select id, cast(replace(cast(mediapackage_xml as nvarchar(max)), 'UTF-8', 'UTF-16') as xml) as mp + from dbo.mh_archive_episode + /* Pre filter the data */ + where modification_date > '2017-01-10 00:00:00' + and version = 0 and deleted = 0) as mpxml; + +Once the temporary table #mp_data has been created queries can be run against it get ids of prospective example +mediapackages: + + select id + from #mp_data + where + duration=55 + and track_1='presenter/source' and mime_1='video' + and track_2='presenter/source' and mime_2='audio' + and track_3 is null and mime_3 is null; + diff --git a/simulate-ingests/simulateIngests.py b/simulate-ingests/simulateIngests.py new file mode 100644 index 0000000..fb8cfec --- /dev/null +++ b/simulate-ingests/simulateIngests.py @@ -0,0 +1,261 @@ +# DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE +# Version 2, December 2004 +# +# Copyright (C) 2004 Sam Hocevar +# +# Everyone is permitted to copy and distribute verbatim or modified +# copies of this license document, and changing it is allowed as long +# as the name is changed. +# +# DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE +# TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION +# +# 0. You just DO WHAT THE FUCK YOU WANT TO. + +# requires: python-requests, python-jinja2 + +import ConfigParser +import datetime +from jinja2 import Environment, FileSystemLoader +import logging +import os +import random +import requests +from requests.auth import HTTPDigestAuth +import shutil +import uuid + +LOG_LEVEL = logging.INFO +EVENT_SOURCE='simulated:opencast-simulate-ingests:' +INGEST_ENDPOINT = '/ingest/addZippedMediaPackage/' +MEDIAPACKAGE_DIR = 'mediapackages/' +TEMPLATE_DIR = MEDIAPACKAGE_DIR + 'templates/' +TRACK_DIR = MEDIAPACKAGE_DIR + 'tracks/' +TMP_DIR = MEDIAPACKAGE_DIR + 'tmp/' +ZIP_DIR = MEDIAPACKAGE_DIR + 'zips/' + + +def get_config_section(config, section): + dict1 = {} + options = config.options(section) + for option in options: + try: + value = config.get(section, option) + try: + dict1[option] = int(value) + except: + try: + dict1[option] = float(value) + except: + dict1[option] = value + if dict1[option] == -1: + print('skip: %s' % option) + except: + print('exception on %s!' % option) + dict1[option] = None + return dict1 + + +def get_profile(index): + prefix = 'profile.{0}.'.format(index) + return { + 'type': Mediapackages[prefix + 'type'], + 'duration': Mediapackages[prefix + 'duration'], + 'freq': Mediapackages[prefix + 'freq'], + 'video.presentation': Mediapackages[prefix + 'video.presentation'], + 'video.presenter': Mediapackages[prefix + 'video.presenter'], + 'audio.presentation': Mediapackages[prefix + 'audio.presentation'], + 'audio.presenter': Mediapackages[prefix + 'audio.presenter'], + 'video.ext': Mediapackages[prefix + 'video.ext'] if prefix + 'video.ext' in Mediapackages else 'avi', + 'audio.ext': Mediapackages[prefix + 'audio.ext'] if prefix + 'audio.ext' in Mediapackages else 'mp3', + } + + +def create_mediapackage(profile): + mime = dict(avi='msvideo', mp4='mp4', mov='quicktime', mp3='mp3') + + # create tmp workspace + mp_id = unicode(uuid.uuid4()) + mp_dir = TMP_DIR + mp_id + '/' + os.makedirs(mp_dir) + + try: + duration = profile['duration'] + if duration >= 1: + duration_ms = int((55 + 60*(duration-1))*60000) + else: + duration_ms = int(duration*3600000) + + # Edit the episode.xml and manifest.xml datetime params + now = datetime.datetime.utcnow() + today = now.strftime('%Y-%m-%d') + delta = datetime.timedelta(milliseconds=duration_ms) + iso_datetime = (now-delta).strftime('%Y-%m-%dT%H:%M:%SZ') + + # Create series xml unique per day + series_title = 'Simulated Ingests ' + today + series_id = uuid.uuid3(uuid.NAMESPACE_URL, "{0}{1}".format(Opencast['admin'], today)) + series_vars = dict(identifier=series_id, created=today + 'T00:00:00Z', title=series_title, source=EVENT_SOURCE) + series_template = Templates.get_template('series.xml') + series_template.stream(series_vars).dump(mp_dir + 'series.xml') + log.debug('series id {0} title {1}'.format(series_id, series_title)) + + # Create episode xml + location = os.uname()[1] # nodename + title = 'Test Recording from {0} - {1}'.format(location, iso_datetime) + ep_vars = dict(identifier=mp_id, created=iso_datetime, title=title, source=EVENT_SOURCE, spatial=location, + is_part_of=series_id) + ep_template = Templates.get_template('episode.xml') + ep_template.stream(ep_vars).dump(mp_dir + 'episode.xml') + log.debug('mediapackage id {0} title {1}'.format(mp_id, title)) + + # Create manifest/mediapackage xml + type = profile['type'] + video_ext = profile['video.ext'] + audio_ext = profile['audio.ext'] + mp_vars = dict(indentifer=mp_id, created=iso_datetime, duration=duration_ms, + title=title, series_title=series_title, + has_presentation_video=profile['video.presentation'] == 'True', + has_presenter_video=profile['video.presenter'] == 'True', + has_presentation_audio=profile['audio.presentation'] == 'True', + has_presenter_audio=profile['audio.presenter'] == 'True', + video_ext=video_ext, audio_ext=audio_ext, video_mime=mime[video_ext], audio_mime=mime[audio_ext]) + mp_template = Templates.get_template('manifest.xml') + mp_template.stream(mp_vars).dump(mp_dir + 'manifest.xml') + + # Create capture agent properties, largely btw as superceded by POST parameters + edit = False + publish = Ingest['publish'] + + ca_vars = dict(location=location, title=title, series_id=series_id, email=Ingest['email'], + workflow=Ingest['workflow'], edit=edit, publish=publish) + ca_template = Templates.get_template('org.opencastproject.capture.agent.properties') + ca_template.stream(ca_vars).dump(mp_dir + 'org.opencastproject.capture.agent.properties') + + # Copy tracks + if mp_vars['has_presentation_video']: + track = 'presentation-{0}-{1}.{2}'.format(type, duration, video_ext) + os.link(TRACK_DIR + track, mp_dir + 'presentation.' + video_ext) + if mp_vars['has_presenter_video']: + track = 'presenter-{0}-{1}.{2}'.format(type, duration, video_ext) + os.link(TRACK_DIR + track, mp_dir + 'presenter.' + video_ext) + if mp_vars['has_presentation_audio']: + track = 'presentation-{0}-{1}.{2}'.format(type, duration, audio_ext) + os.link(TRACK_DIR + track, mp_dir + 'presentation.' + audio_ext) + if mp_vars['has_presenter_audio']: + track = 'presenter-{0}-{1}.{2}'.format(type, duration, audio_ext) + os.link(TRACK_DIR + track, mp_dir + 'presenter.' + audio_ext) + + # Make a zip + mp_archive = 'mediapackage-' + mp_id + shutil.make_archive(ZIP_DIR + mp_archive, 'zip', mp_dir) + except: + log.error('failed to create mediapackage zip:', exc_info=True) + return None + finally: + # Clean up + shutil.rmtree(mp_dir, True) + + log.info("created mediapackage zip") + return mp_archive + '.zip' + + +def submit_mediapackage(filename): + # choose a random ingest server if more than one + ingest_server = random.choice(Opencast['ingests'].split(',')) + log.debug('ingest server ' + ingest_server) + + # Create and ingest the mediapackage POST request to opencast ingest endpoint + edit = Ingest['edit.freq'] > random.random() + if edit: + publish = False + else: + publish = Ingest['publish'] == 'True' + + url = '{0}{1}{2}'.format(ingest_server, INGEST_ENDPOINT, Ingest['workflow']) + headers = {'X-Requested-Auth': 'Digest'} + files = {'mediapackage': open(ZIP_DIR + filename, 'rb')} + fields = {'publishOaiPmh': publish, + 'editRecording': edit, + 'emailAddresses': Ingest['email']} + log.info('workflow parameters, publish: %(publishOaiPmh)s, edit: %(editRecording)s', fields) + + try: + log.info('starting ingest of mediapackage') + response = requests.post(url, headers=headers, auth=HTTPDigestAuth(Opencast['account'], Opencast['password']), + data=fields, files=files) + + # Check the response status code is 200 + if response.status_code is 200: + log.info('server successfully ingested mediapackage') + return True + else: + log.error('server failed to ingest mediapackage, returned status code: ' + str(response.status_code)) + return False + except: + log.error('failed to upload mediapackage:', exc_info=True) + return False + finally: + # Clean up + os.remove(ZIP_DIR + filename) + + +# START + +# logging +logging.basicConfig(format='%(asctime)-15s %(levelname)s - (%(module)s:%(lineno)d) %(message)s') +log = logging.getLogger(__name__) +log.setLevel(LOG_LEVEL) + +# read configuration +config = ConfigParser.ConfigParser() +config.read('simulation.properties') + +# config dicts +Opencast = {} +Ingest = {} +Mediapackages = {} + +if config.has_section('opencast'): + Opencast = get_config_section(config, 'opencast') +else: + log.error('config missing [opencast] section') + exit(1) + +if config.has_section('ingest'): + Ingest = get_config_section(config, 'ingest') +else: + log.error('config missing [ingest] section') + exit(1) + +if config.has_section('mediapackages'): + Mediapackages = get_config_section(config, 'mediapackages') +else: + log.error('config missing [mediapackages] section') + exit(1) + +# set up template env +Templates = Environment( + loader=FileSystemLoader(TEMPLATE_DIR) +) + +# run the simulation +count = Ingest['count'] +num_profiles = Mediapackages['profiles'] + +for i in range(count): + select_freq = random.random() + freq = 0.0 + select_profile = 0 + while freq < select_freq and select_profile < num_profiles: + select_profile += 1 + freq += Mediapackages['profile.{0}.freq'.format(select_profile)] + + profile = get_profile(select_profile) + + log.info('creating mediapackage {0}/{1} type {2} duration {3}h'.format(i+1, count, profile['type'], + profile['duration'])) + mediapackage = create_mediapackage(profile) + + if mediapackage is not None: + submit_mediapackage(mediapackage) diff --git a/simulate-ingests/simulation-example.properties b/simulate-ingests/simulation-example.properties new file mode 100644 index 0000000..db6aefc --- /dev/null +++ b/simulate-ingests/simulation-example.properties @@ -0,0 +1,124 @@ +[opencast] +# admin server url +admin=https://admin.opencast.org +# ingest servers urls, comma separated +ingests=https://ingest.opencast.org +# REST credentials +account=opencast_system_account +password=CHANGE_ME + +[ingest] +# number of ingests to make +count=1 +workflow=default +publish=True +# fraction that will be set for editing (and not published) +edit.freq=0.04 +email=test@opencast.org + +[mediapackages] +# number of mediapackage profiles +profiles=9 + +# Mediapackage profiles +# profile 0 is not read and is just an example +# the type + duration must be unique +# short description +profile.0.type=single +# duration in hours - approx +profile.0.duration=1 +# fraction of total that this profile should be upload +profile.0.freq=0.8 +# which tracks are present the extension parameter is optional defaults shown +profile.0.video.ext=avi +profile.0.video.presentation=True +profile.0.video.presenter=False +profile.0.audio.ext=mp3 +profile.0.audio.presentation=False +profile.0.audio.presenter=True + +# Profiles based on 2017/2018 recordings stats, MO-238 +#screen w/audio 20m 5% uploads +#screen w/audio 55m 1% uploads +#screen + audio 55m 43% standard 1h lectures +#screen + audio 115m 38% standard 2h lectures +#screen + audio 175m 8% standard 3h lectures +#screen + audio 235m 1% standard 4h lectures +#screen + camera + audio 55m 2% 1h lectures with screen and tracking cameras +#screen + camera + audio 115m 1% 2h lectures with screen and tracking cameras +#camera + audio 55m 1% 1h lectures with tracking cameras + + +profile.1.type=upload +profile.1.duration=0.5 +profile.1.freq=0.05 +profile.1.video.ext=mov +profile.1.video.presentation=True +profile.1.video.presenter=False +profile.1.audio.presentation=False +profile.1.audio.presenter=False + +profile.2.type=upload +profile.2.duration=1 +profile.2.freq=0.01 +profile.2.video.ext=mp4 +profile.2.video.presentation=True +profile.2.video.presenter=False +profile.2.audio.presentation=False +profile.2.audio.presenter=False + +profile.3.type=screen +profile.3.duration=1 +profile.3.freq=0.43 +profile.3.video.presentation=True +profile.3.video.presenter=False +profile.3.audio.presentation=False +profile.3.audio.presenter=True + +profile.4.type=screen +profile.4.duration=2 +profile.4.freq=0.38 +profile.4.video.presentation=True +profile.4.video.presenter=False +profile.4.audio.presentation=False +profile.4.audio.presenter=True + +profile.5.type=screen +profile.5.duration=3 +profile.5.freq=0.08 +profile.5.video.presentation=True +profile.5.video.presenter=False +profile.5.audio.presentation=False +profile.5.audio.presenter=True + +profile.6.type=screen +profile.6.duration=4 +profile.6.freq=0.01 +profile.6.video.presentation=True +profile.6.video.presenter=False +profile.6.audio.presentation=False +profile.6.audio.presenter=True + +profile.7.type=screen-camera +profile.7.duration=1 +profile.7.freq=0.02 +profile.7.video.presentation=True +profile.7.video.presenter=True +profile.7.audio.presentation=False +profile.7.audio.presenter=True + +profile.8.type=screen-camera +profile.8.duration=2 +profile.8.freq=0.01 +profile.8.video.presentation=True +profile.8.video.presenter=True +profile.8.audio.presentation=False +profile.8.audio.presenter=True + +profile.9.type=camera +profile.9.duration=1 +profile.9.freq=0.01 +profile.9.video.presentation=False +profile.9.video.presenter=True +profile.9.audio.presentation=False +profile.9.audio.presenter=True