Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

zenodo-B #12

Open
wants to merge 3 commits into
base: leaderboard_B
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 0 additions & 5 deletions .gitattributes
Original file line number Diff line number Diff line change
@@ -1,5 +0,0 @@
*.wav filter=lfs diff=lfs merge=lfs -text
*.pth filter=lfs diff=lfs merge=lfs -text
*.onnx filter=lfs diff=lfs merge=lfs -text
model/mixer.ckpt filter=lfs diff=lfs merge=lfs -text
model/mixer_val.ckpt filter=lfs diff=lfs merge=lfs -text
36 changes: 23 additions & 13 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,37 +11,38 @@ The separated stems each have a different <a href="https://ws-choi.github.io/per

## Installation

Setup [git-lfs](https://git-lfs.github.com/) first. You will need it to download the models inside this repository.
You'd also need [conda](https://docs.conda.io/en/latest/miniconda.html).

After all those are installed, clone this branch:
Set up [conda](https://docs.conda.io/en/latest/miniconda.html) first. After it's installed, clone this branch:

```bash
git clone -b leaderboard_B https://github.com/kuielab/mdx-net-submission.git
```

### Linux

In the cloned repository directory, do

```bash
conda env create -f environment.yml -n mdx-submit
conda activate mdx-submit
pip install -r requirements.txt
python download_demucs.py
wget https://zenodo.org/record/5717356/files/onnx_B.zip
unzip onnx_B
```

Every time when you open a new terminal, conda will default to environment `base`.
Just do
### Windows

In the cloned repository directory, using the conda powershell prompt:

```bash
conda env create -f environment.yml -n mdx-submit
conda activate mdx-submit
pip install -r requirements.txt
python download_demucs.py
Invoke-WebRequest -Uri https://zenodo.org/record/5717356/files/onnx_B.zip -OutFile onnx_B.zip
Expand-Archive onnx_B.zip -DestinationPath .
```

to go back into the environment you have installed MDX's dependencies in.

## Custom models

For custom models (such as the [higher quality vocal model trained by UVR team](https://github.com/Anjok07/ultimatevocalremovergui/releases/tag/MDX-Net-B)), please replace the relevant models in `./onnx/`.

## Usage

After successful installation, you can put the songs you wish to separate as `./data/test/SONGNAME/mixture.wav`, and run either `run.sh` or
Expand All @@ -50,4 +51,13 @@ After successful installation, you can put the songs you wish to separate as `./
python predict_blend.py
```

After the separation completes, the results will be saved in `./data/results/baseline/SONGNAME/`.
After the separation completes, the results will be saved in `./data/results/kuielab_mdxnet_A/SONGNAME/`.
Zokhoi marked this conversation as resolved.
Show resolved Hide resolved

Also, every time when you open a new terminal / conda prompt, conda will default to environment `base`.
Just do

```bash
conda activate mdx-submit
```

to go back into the environment you have installed MDX's dependencies in.
2 changes: 1 addition & 1 deletion aicrowd.json
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,6 @@
"authors": [
"kim_min_seok"
],
"description": "",
"description": "KUIELAB-MDX-Net-B",
"external_dataset_used": true
}
Binary file not shown.
3 changes: 0 additions & 3 deletions data/test/Mu - Too Bright/bass.wav

This file was deleted.

3 changes: 0 additions & 3 deletions data/test/Mu - Too Bright/drums.wav

This file was deleted.

3 changes: 0 additions & 3 deletions data/test/Mu - Too Bright/mixture.wav

This file was deleted.

3 changes: 0 additions & 3 deletions data/test/Mu - Too Bright/other.wav

This file was deleted.

3 changes: 0 additions & 3 deletions data/test/Mu - Too Bright/vocals.wav

This file was deleted.

15 changes: 8 additions & 7 deletions evaluator/music_demixing.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,16 +24,17 @@ def time_limit(seconds):
def signal_handler(signum, frame):
raise TimeoutException("Prediction timed out!")

signal.signal(signal.SIGALRM, signal_handler)
signal.alarm(seconds)
try:
yield
finally:
signal.alarm(0)
# signal.signal(signal.SIGALRM, signal_handler)
# signal.alarm(seconds)
# try:
# yield
# finally:
# signal.alarm(0)
yield


class MusicDemixingPredictor:
def __init__(self, model_name='baseline'):
def __init__(self, model_name='kuielab_mdxnet_B'):
self.test_data_path = os.getenv("TEST_DATASET_PATH", os.getcwd() + "/data/test/")
self.results_data_path = os.getenv("RESULTS_DATASET_PATH", os.getcwd() + "/data/results/" + model_name)
self.inference_setup_timeout = int(os.getenv("INFERENCE_SETUP_TIMEOUT_SECONDS", "900"))
Expand Down
1 change: 0 additions & 1 deletion model/.directory
Original file line number Diff line number Diff line change
@@ -1 +0,0 @@

3 changes: 3 additions & 0 deletions model/mixer.ckpt
Git LFS file not shown
2 changes: 1 addition & 1 deletion models.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

dim_c = 4
k = 3
model_path = 'model'
model_path = 'data/weights'
n_fft_scale = {'bass': 8, 'drums':2, 'other':4, 'vocals':3, '*':2}


Expand Down
38 changes: 19 additions & 19 deletions predict_blend.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,23 +11,23 @@
device = torch.device('cpu')

class Predictor(MusicDemixingPredictor):

def prediction_setup(self):
self.models = get_models(model_name, load=False, device=device)
self.demucs = Demucs(sources=["drums", "bass", "other", "vocals"], channels=48 if '48' in demucs_name else 64)
self.demucs.load_state_dict(torch.load(f'model/{demucs_name}.ckpt'))
self.demucs.eval()

def prediction(self, mixture_file_path, bass_file_path, drums_file_path, other_file_path, vocals_file_path):
file_paths = [bass_file_path, drums_file_path, other_file_path, vocals_file_path]
mix, rate = sf.read(mixture_file_path)
sources = self.demix(mix.T)
file_paths = [bass_file_path, drums_file_path, other_file_path, vocals_file_path]
sources = self.demix(mixture_file_path)
for i in range(len(sources)):
sf.write(file_paths[i], sources[i].T, rate)
sf.write(file_paths[i], sources[i].T, samplerate=44100)

def demix(self, mix):
def demix(self, mix_path):
mix = sf.read(mix_path)[0].T
demucs_out = self.demix_demucs(mix)
base_out = self.demix_base(mix)
base_out = self.demix_base(mix)
sources = base_out * b + demucs_out * (1-b)
return sources

Expand All @@ -36,36 +36,35 @@ def demix_base(self, mix):
sources = []
n_sample = mix.shape[1]
for model in self.models:
trim = model.n_fft//2
gen_size = model.chunk_size-2*trim
pad = gen_size - n_sample%gen_size
mix_p = np.concatenate((np.zeros((2,trim)), mix, np.zeros((2,pad)), np.zeros((2,trim))), 1)
trim = model.n_fft // 2
gen_size = model.chunk_size - 2 * trim
pad = gen_size - n_sample % gen_size
mix_p = np.concatenate((np.zeros((2, trim)), mix, np.zeros((2, pad)), np.zeros((2, trim))), 1)

mix_waves = []
i = 0
while i < n_sample + pad:
waves = np.array(mix_p[:, i:i+model.chunk_size])
waves = np.array(mix_p[:, i:i + model.chunk_size])
mix_waves.append(waves)
i += gen_size
mix_waves = torch.tensor(mix_waves, dtype=torch.float32)
i += gen_size
mix_waves = torch.tensor(mix_waves, dtype=torch.float32)

with torch.no_grad():
_ort = ort.InferenceSession(f'onnx/{model.target_name}.onnx')
_ort = ort.InferenceSession(f'{onnx_name}/{model.target_name}.onnx')
tar_waves = model.istft(torch.tensor(
_ort.run(None, {'input': model.stft(mix_waves).numpy()})[0]
))
tar_signal = tar_waves[:,:,trim:-trim].transpose(0,1).reshape(2, -1).numpy()[:, :-pad]

tar_signal = tar_waves[:, :, trim:-trim].transpose(0, 1).reshape(2, -1).numpy()[:, :-pad]
sources.append(tar_signal)
print(time()-start_time)
return np.array(sources)

def demix_demucs(self, mix):
start_time = time()
mix = torch.tensor(mix, dtype=torch.float32)
ref = mix.mean(0)
ref = mix.mean(0)
mix = (mix - ref.mean()) / ref.std()

with torch.no_grad():
sources = apply_model(self.demucs, mix, split=True, overlap=0.5)

Expand All @@ -77,6 +76,7 @@ def demix_demucs(self, mix):

model_name = 'tdf_extra'
demucs_name = 'demucs_extra'
onnx_name = 'onnx_B'

b = np.array([[[0.5]], [[0.5]], [[0.7]], [[0.9]]])

Expand Down