Merge pull request #40 from beeldengeluid/37-extract-wav+spectogram-i…

…mage 37 extract wav+spectogram image
beeldengeluid · Dec 7, 2023 · 1b1394c · 1b1394c
2 parents ba1d1e0 + 18750e6
commit 1b1394c
Show file tree

Hide file tree

Showing 8 changed files with 910 additions and 41 deletions.
diff --git a/config/config.yml b/config/config.yml
@@ -27,6 +27,8 @@ VISXP_PREP:
     SPECTOGRAM_WINDOW_SIZE_MS: 1000
     SPECTOGRAM_SAMPLERATE_HZ:  # this cause x amount of files and will cause a mismatch with the keyframes
         - 24000
+    GENERATE_SPECTOGRAM_IMAGES: true
+    EXTRACT_AUDIO_SAMPLES: true
     TEST_INPUT_FILE: https://openbeelden.nl/files/13/66/1411058.1366653.WEEKNUMMER404-HRE000042FF_924200_1089200.mp4
 INPUT:
     DELETE_ON_COMPLETION: false  # NOTE: set to True in production environment

diff --git a/main_data_processor.py b/main_data_processor.py
@@ -183,10 +183,9 @@ def generate_input_for_feature_extraction(
             )
 
         spectogram_provenance = spectogram.run(
-            input_file_path,
-            keyframe_timestamps,  # TODO check if this matches the actual keyframe timestamps
-            output_dirs[OutputType.SPECTOGRAMS.value],
-            output_dirs[OutputType.TMP.value],
+            input_file_path=input_file_path,
+            keyframe_timestamps=keyframe_timestamps,  # TODO check if this matches the actual keyframe timestamps
+            output_dirs=output_dirs,
         )
 
     return VisXPFeatureExtractionInput(

diff --git a/models.py b/models.py
@@ -10,7 +10,8 @@ class OutputType(Enum):
     METADATA = "metadata"  # produced by hecate.py
     PROVENANCE = "provenance"  # produced by provenance.py
     SPECTOGRAMS = "spectograms"  # produced by spectogram.py
-    TMP = "tmp"  # produced by spectogram.py
+    AUDIO = "audio"  # produced by spectogram.py
+    SPECTOGRAM_IMAGES = "spectogram_images"  # produced by spectogram.py
 
 
 # Hecate outputs these files into OutputType.METADATA

diff --git a/poetry.lock b/poetry.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -16,6 +16,7 @@ numpy = "^1.24.3"
 ffmpeg-python = "^0.2.0"
 validators = "^0.22.0"
 dane = "^0.4.2"
+matplotlib = "^3.8.2"
 
 
 [tool.poetry.group.dev.dependencies]

diff --git a/requirements.txt b/requirements.txt
@@ -6,4 +6,5 @@ opencv-python >= 4.8.0.76
 numpy >= 1.24.3
 ffmpeg-python >= 0.2.0
 python_speech_features @ git+https://github.com/jameslyons/python_speech_features
-validators >= 0.22.0
+validators >= 0.22.0
+matplotlib >= 3.8.2