init

Tony607 · Mar 2, 2018 · 9618c49 · 9618c49 · jeremydub · May 29, 2018
commit 9618c49
Show file tree

Hide file tree

Showing 50 changed files with 2,517 additions and 0 deletions.
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,87 @@
+### https://raw.github.com/github/gitignore/f57304e9762876ae4c9b02867ed0cb887316387e/Python.gitignore
+
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+env/
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*,cover
+.hypothesis/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# pyenv
+.python-version
+
+# celery beat schedule file
+celerybeat-schedule
+
+# SageMath parsed files
+*.sage.py
+
+# dotenv
+.env
+
+# virtualenv
+.venv
+venv/
+ENV/
+
+# Spyder project settings
+.spyderproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+/.idea/
+
+/checkpoints/
+.DS_Store
+
+XY_dev/
+XY_train/
+*.npy
diff --git a/README.md b/README.md
@@ -0,0 +1,39 @@
+# [How to do Real Time Trigger Word Detection with Keras](https://www.dlology.com/blog/how-to-do-real-time-trigger-word-detection-with-keras/).
+
+Trigger word detection, aka. wake/hot word detection. Like Amazon's "Alexa" or Google Home's "OK, Google" to wake them up.
+Will it be cool to build one yourself and run it in **Real-time**?
+
+In this post, I am going to show you exactly how to build a Keras model to do the same thing from scratch. No third party voice API or network connection required to make it functional.
+
+Background information is shown in my blog post.
+
+## How to Run
+Require [Python 3.5+](https://www.python.org/ftp/python/3.6.4/python-3.6.4.exe) and [Jupyter notebook](https://jupyter.readthedocs.io/en/latest/install.html) installed
+### Clone or download this repo
+```
+git clone https://github.com/Tony607/Keras-Trigger-Word
+```
+### Install required libraries
+`pip3 install -r requirements.txt`
+
+
+### Real-time demo
+
+In the project directory start a command line, then run command
+```
+jupyter notebook
+```
+If you are only interested in playing with the pre-trained trigger word model with real-time demo.
+In the opened browser window choose
+```
+trigger_word_real_time_demo.ipynb
+```
+
+Optionally if you want to learn about data preparation and model training. Continue on with my [write up](https://www.dlology.com/blog/how-to-do-real-time-trigger-word-detection-with-keras/). In the opened browser window choose this notebook.
+```
+Trigger word detection - v1.ipynb
+```
+Download the train/dev Data from the releases if you want to follow along the notebook, [Data.zip](https://github.com/Tony607/Keras-Trigger-Word/releases/download/V0.1/Data.zip). Extract 
+`XY_dev` and `XY_train` folders to the root of the project directory.
+
+Happy coding! Leave a comment if you have any question.
diff --git a/Trigger word detection - v1.ipynb b/Trigger word detection - v1.ipynb
diff --git a/audio_examples/chime.wav b/audio_examples/chime.wav
diff --git a/audio_examples/example_train.wav b/audio_examples/example_train.wav
diff --git a/audio_examples/insert_reference.wav b/audio_examples/insert_reference.wav
diff --git a/audio_examples/my_audio.wav b/audio_examples/my_audio.wav
diff --git a/audio_examples/train_reference.wav b/audio_examples/train_reference.wav
diff --git a/chime_output.wav b/chime_output.wav
diff --git a/images/date_attention.png b/images/date_attention.png
diff --git a/images/date_attention2.png b/images/date_attention2.png
diff --git a/images/label_diagram.png b/images/label_diagram.png
diff --git a/images/model_trigger.png b/images/model_trigger.png
diff --git a/images/music_gen.png b/images/music_gen.png
diff --git a/images/ones_reference.png b/images/ones_reference.png
diff --git a/images/poorly_trained_model.png b/images/poorly_trained_model.png
diff --git a/images/sound.png b/images/sound.png
diff --git a/images/spectrogram.png b/images/spectrogram.png
diff --git a/images/train_label.png b/images/train_label.png
diff --git a/images/train_reference.png b/images/train_reference.png
diff --git a/images/woebot.png b/images/woebot.png
diff --git a/insert_test.wav b/insert_test.wav
diff --git a/models/tr_model.h5 b/models/tr_model.h5
diff --git a/raw_data/activates/1.wav b/raw_data/activates/1.wav
diff --git a/raw_data/activates/1_act2.wav b/raw_data/activates/1_act2.wav
diff --git a/raw_data/activates/1_act3.wav b/raw_data/activates/1_act3.wav
diff --git a/raw_data/activates/2.wav b/raw_data/activates/2.wav
diff --git a/raw_data/activates/2_act2.wav b/raw_data/activates/2_act2.wav
diff --git a/raw_data/activates/3.wav b/raw_data/activates/3.wav
diff --git a/raw_data/activates/3_act2.wav b/raw_data/activates/3_act2.wav
diff --git a/raw_data/activates/3_act3.wav b/raw_data/activates/3_act3.wav
diff --git a/raw_data/activates/4_act2.wav b/raw_data/activates/4_act2.wav
diff --git a/raw_data/backgrounds/1.wav b/raw_data/backgrounds/1.wav
diff --git a/raw_data/backgrounds/2.wav b/raw_data/backgrounds/2.wav
diff --git a/raw_data/dev/1.wav b/raw_data/dev/1.wav
diff --git a/raw_data/dev/2.wav b/raw_data/dev/2.wav
diff --git a/raw_data/negatives/1.wav b/raw_data/negatives/1.wav
diff --git a/raw_data/negatives/1_0.wav b/raw_data/negatives/1_0.wav
diff --git a/raw_data/negatives/2.wav b/raw_data/negatives/2.wav
diff --git a/raw_data/negatives/2_1.wav b/raw_data/negatives/2_1.wav
diff --git a/raw_data/negatives/3.wav b/raw_data/negatives/3.wav
diff --git a/raw_data/negatives/3_2.wav b/raw_data/negatives/3_2.wav
diff --git a/raw_data/negatives/4.wav b/raw_data/negatives/4.wav
diff --git a/raw_data/negatives/4_0.wav b/raw_data/negatives/4_0.wav
diff --git a/raw_data/negatives/5.wav b/raw_data/negatives/5.wav
diff --git a/raw_data/negatives/5_1.wav b/raw_data/negatives/5_1.wav
diff --git a/requirements.txt b/requirements.txt
@@ -0,0 +1,6 @@
+numpy
+keras
+h5py
+pydub
+scipy
+matplotlib
diff --git a/td_utils.py b/td_utils.py
@@ -0,0 +1,46 @@
+import matplotlib.pyplot as plt
+from scipy.io import wavfile
+import os
+from pydub import AudioSegment
+
+# Calculate and plot spectrogram for a wav audio file
+def graph_spectrogram(wav_file):
+    rate, data = get_wav_info(wav_file)
+    nfft = 200 # Length of each window segment
+    fs = 8000 # Sampling frequencies
+    noverlap = 120 # Overlap between windows
+    nchannels = data.ndim
+    if nchannels == 1:
+        pxx, freqs, bins, im = plt.specgram(data, nfft, fs, noverlap = noverlap)
+    elif nchannels == 2:
+        pxx, freqs, bins, im = plt.specgram(data[:,0], nfft, fs, noverlap = noverlap)
+    return pxx
+
+# Load a wav file
+def get_wav_info(wav_file):
+    rate, data = wavfile.read(wav_file)
+    return rate, data
+
+# Used to standardize volume of audio clip
+def match_target_amplitude(sound, target_dBFS):
+    change_in_dBFS = target_dBFS - sound.dBFS
+    return sound.apply_gain(change_in_dBFS)
+
+# Load raw audio files for speech synthesis
+def load_raw_audio():
+    activates = []
+    backgrounds = []
+    negatives = []
+    for filename in os.listdir("./raw_data/activates"):
+        if filename.endswith("wav"):
+            activate = AudioSegment.from_wav("./raw_data/activates/"+filename)
+            activates.append(activate)
+    for filename in os.listdir("./raw_data/backgrounds"):
+        if filename.endswith("wav"):
+            background = AudioSegment.from_wav("./raw_data/backgrounds/"+filename)
+            backgrounds.append(background)
+    for filename in os.listdir("./raw_data/negatives"):
+        if filename.endswith("wav"):
+            negative = AudioSegment.from_wav("./raw_data/negatives/"+filename)
+            negatives.append(negative)
+    return activates, negatives, backgrounds
diff --git a/train.wav b/train.wav