-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathaudio.py
111 lines (87 loc) · 3.03 KB
/
audio.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
"""
FFMPEG AUDIO
============
This is a thin wrapper around the ffmpeg software. It can be used
to read probably very many audio file formats into python. It was
tested with TTA and WAV formats. It only supports signed 16 bit audio
as of now.
Author: 2018 (c) Robin Scheibler
License: MIT License
"""
import re
import subprocess as sp
import numpy as np
def read(filename, ffmpeg_bin="ffmpeg", debug=False):
"""
Read an audio file into python using FFMPEG. The syntax
is similar to `scipy.io.wavfile.read`.
Note: only supports signed 16 bit audio
Parameters
----------
filename: str
The audio filename
ffmpeg_bin: str, optional
The name of the ffmpeg executable
debug: bool, optional
Print some debug information
Returns
-------
samplerate: The samplerate of the audio signal.
audio: An ndarray containing the audio samples. For multichannel audio it returns
a 2D array with every column corresponding to a channel.
"""
command = [ffmpeg_bin, "-i", filename]
with sp.Popen(command, stdout=sp.PIPE, stderr=sp.PIPE, bufsize=100000) as pipe:
_, stderr = pipe.communicate()
for l in stderr.decode("utf-8").split("\n"):
if debug:
print(l)
words = l.split()
if len(words) >= 1 and words[0] != "Stream":
continue
if re.search("Audio:", l) is None:
continue
fmt = re.search("Audio: (.*?) ", l).group(1)
samplerate = re.search(", (\d+) Hz,", l).group(1)
n_channels, sampleformat, n_bits = re.search(
", (\d+ channels|mono|stereo), ([a-z]+)([0-9]*)", l
).group(1, 2, 3)
break
if n_channels == "mono":
n_channels = 1
elif n_channels == "stereo":
n_channels = 2
else:
n_channels = int(re.search("(\d+) channels", n_channels).group(1))
samplerate = int(samplerate)
if sampleformat == "flt" or sampleformat == "fltp":
dtype = np.float32
out_format = "f32le"
n_bits = 32
elif sampleformat == "s":
n_bits = int(n_bits)
if n_bits == 16:
dtype = np.int16
out_format = "s16le"
elif n_bits == 32:
dtype = np.int32
out_format = "s32le"
else:
raise ValueError("For now only signed 16/32 bit audio is supported. Sorry")
else:
raise ValueError(
"For now only signed 16/32 or float 32 bit audio is supported. Sorry"
)
n_bytes = n_bits // 8
# chunks of a second of audio
n_chunk = samplerate
# now read the samples
command = [ffmpeg_bin, "-i", filename, "-f", out_format, "-"]
with sp.Popen(
command, stdout=sp.PIPE, stderr=sp.PIPE, bufsize=n_channels * n_bytes * n_chunk
) as pipe:
raw_audio, _ = pipe.communicate()
audio = np.frombuffer(raw_audio, dtype=dtype).reshape((-1, n_channels))
if audio.shape[1] == 1:
audio = audio[:, 0]
return samplerate, audio