From 8bae973a1e274df6000d97240f5a7b51493553cc Mon Sep 17 00:00:00 2001 From: haoxiangsnr Date: Wed, 20 Jul 2022 03:26:28 +0000 Subject: [PATCH] chore: initial sphinx docs --- .readthedocs.yml | 16 ++++++ CHANGELOG.md | 3 + CODE_OF_CONDUCT.md | 3 + CONTRIBUTING.md | 36 ++++++++++++ README.md | 17 +++--- docs/Makefile | 19 ++++++ docs/fullsubnet_dns_interspeech.md | 45 --------------- docs/source/_static/css/custom.css | 3 + docs/source/conf.py | 54 ++++++++++++++++++ docs/source/index.rst | 33 +++++++++++ docs/source/reference/changelog.md | 2 + docs/source/reference/conduct.md | 2 + docs/source/reference/contributing.md | 2 + docs/{ => source/usage}/fullsubnet-result.png | Bin docs/{ => source/usage}/getting_started.md | 0 docs/source/usage/perf.md | 3 + docs/{ => source/usage}/prerequisites.md | 0 docs/source/usage/presentation.md | 3 + docs/source/usage/release.md | 31 ++++++++++ docs/{ => source/usage}/workflow.png | Bin pyproject.toml | 42 ++++++++++++++ 21 files changed, 259 insertions(+), 55 deletions(-) create mode 100644 .readthedocs.yml create mode 100644 CHANGELOG.md create mode 100644 CODE_OF_CONDUCT.md create mode 100644 CONTRIBUTING.md create mode 100644 docs/Makefile delete mode 100644 docs/fullsubnet_dns_interspeech.md create mode 100644 docs/source/_static/css/custom.css create mode 100644 docs/source/conf.py create mode 100644 docs/source/index.rst create mode 100644 docs/source/reference/changelog.md create mode 100644 docs/source/reference/conduct.md create mode 100644 docs/source/reference/contributing.md rename docs/{ => source/usage}/fullsubnet-result.png (100%) rename docs/{ => source/usage}/getting_started.md (100%) create mode 100644 docs/source/usage/perf.md rename docs/{ => source/usage}/prerequisites.md (100%) create mode 100644 docs/source/usage/presentation.md create mode 100644 docs/source/usage/release.md rename docs/{ => source/usage}/workflow.png (100%) create mode 100644 pyproject.toml diff --git a/.readthedocs.yml b/.readthedocs.yml new file mode 100644 index 0000000..9ed1274 --- /dev/null +++ b/.readthedocs.yml @@ -0,0 +1,16 @@ +# Required +version: 2 + +build: + os: "ubuntu-20.04" + apt_packages: + - libsndfile1 + tools: + python: "3.10" + +python: + install: + - method: pip + path: . + extra_requirements: + - docs diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..d29d5af --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,3 @@ +# Changelog + + diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md new file mode 100644 index 0000000..e2758a2 --- /dev/null +++ b/CODE_OF_CONDUCT.md @@ -0,0 +1,3 @@ +# Code of Conduct + +Everyone interacting in the project's codebases and documentation is expected to follow the [PyPA Code of Conduct](https://www.pypa.io/en/latest/code-of-conduct/). This includes, but is not limited to, issue trackers, chat rooms, mailing lists, and other virtual or real-life communication. \ No newline at end of file diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 0000000..f0c58df --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,36 @@ +# Contributing + +## Development workflow + +Hi there! This repository follows the [GitHub flow](https://docs.github.com/en/get-started/quickstart/github-flow). The GitHub flow contains the main branch and many feature branches. Generally speaking, the main branch always uses no direct commit and only can be integrated by rebase and merge. The feature branches, like new features, bug fixes, refactoring, experiments, etc., are used for development. The GitHub flow keeps the main branch working well with documents and tests. + +## Commit + +This repository uses the [Angular commit style](https://github.com/angular/angular.js/blob/master/DEVELOPERS.md#commit-message-format), which looks like this: + +```shell +(optional scope): short summary in present tense + +(optional body: explains motivation for the change) + +(optional footer: note BREAKING CHANGES here, and issues to be closed) +``` + +Generally speaking, you need to at least specify a type and a short summary for each commit. `` refers to the kind of change made and is usually one of: + +- `feat`: A new feature. +- `fix`: A bug fix. +- `docs`: Documentation changes. +- `style`: Changes that do not affect the meaning of the code (white space, formatting, missing semi-colons, etc). +- `refactor`: A code change that neither fixes a bug nor adds a feature. +- `perf`: A code change that improves performance. +- `test`: Changes to the test framework. +- `build`: Changes to the build process or tools. + +By using the standardized commit message in this Angular commit style, the continuous integration configuration will automatically bump version numbers based on keywords it finds in commit messages. + +## References + +- [Git for Professionals Tutorial - Tools & Concepts for Mastering Version Control with Git](https://www.youtube.com/watch?v=Uszj_k0DGsg) +- [GitHub flow](https://docs.github.com/en/get-started/quickstart/github-flow) +- [How to Write a Git Commit Message](https://cbea.ms/git-commit/) \ No newline at end of file diff --git a/README.md b/README.md index cb69464..4b323c9 100644 --- a/README.md +++ b/README.md @@ -4,6 +4,8 @@ ![Python version](https://img.shields.io/badge/Python-%3E%3D3.8.0-orange) ![Pytorch Version](https://img.shields.io/badge/PyTorch-%3E%3D1.10-brightgreen) ![GitHub repo size](https://img.shields.io/github/repo-size/haoxiangsnr/FullSubNet) +[![Documentation Status](https://readthedocs.org/projects/fullsubnet/badge/?version=latest)](https://fullsubnet.readthedocs.io/en/latest/?badge=latest) + This Git repository for the official PyTorch implementation of ["FullSubNet: A Full-Band and Sub-Band Fusion Model for Real-Time Single-Channel Speech Enhancement"](https://arxiv.org/abs/2010.15508), accepted @@ -11,9 +13,10 @@ to ICASSP 2021. :bulb:[[Demo\]](https://www.haoxiangsnr.com/demo/fullsubnet/) | :page_facing_up:[[PDF\]](https://arxiv.org/abs/2010.15508) | :floppy_disk:[[Model Checkpoint\]](https://github.com/haoxiangsnr/FullSubNet/releases) -## Introduction +## Documentation -[![Click it to show a video](https://i.imgur.com/s3mq7NNl.png)](https://youtu.be/XJeE-MWDlk0 "FullSubNet: A Full-Band and Sub-Band Fusion Model for Real-Time Single-Channel Speech Enhancement") +- [Prerequisites](docs/prerequisites.md) +- [Getting Started](docs/getting_started.md) ## Key Features @@ -29,11 +32,6 @@ You can use all of these things: - [x] Deep Noise Suppression Challenge - INTERSPEECH 2020 - [ ] Demand + CSTR VCTK Corpus -## Documentation - -- [Prerequisites](docs/prerequisites.md) -- [Getting Started](docs/getting_started.md) - ## Citation If you use this code for your research, please consider citing: @@ -41,8 +39,8 @@ If you use this code for your research, please consider citing: ```text @INPROCEEDINGS{hao2020fullsubnet, author={Hao, Xiang and Su, Xiangdong and Horaud, Radu and Li, Xiaofei}, - booktitle={ICASSP 2021 - 2021 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)}, - title={Fullsubnet: A Full-Band and Sub-Band Fusion Model for Real-Time Single-Channel Speech Enhancement}, + booktitle={ICASSP 2021 - 2021 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)}, + title={Fullsubnet: A Full-Band and Sub-Band Fusion Model for Real-Time Single-Channel Speech Enhancement}, year={2021}, pages={6633-6637}, doi={10.1109/ICASSP39728.2021.9414177} @@ -52,4 +50,3 @@ If you use this code for your research, please consider citing: ## License [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://github.com/haoxiangsnr/FullSubNet/blob/main/LICENSE) - diff --git a/docs/Makefile b/docs/Makefile new file mode 100644 index 0000000..9edfe2d --- /dev/null +++ b/docs/Makefile @@ -0,0 +1,19 @@ +# Minimal makefile for Sphinx documentation + +# You can set these variables from the command line. +SPHINXOPTS = +SPHINXBUILD = python -m sphinx +SPHINXPROJ = mixsim +SOURCEDIR = source +BUILDDIR = _build + +# Put it first so that "make" without argument is like "make help". +help: + @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) + +.PHONY: help Makefile + +# Catch-all target: route all unknown targets to Sphinx using the new +# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). +%: Makefile + @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) \ No newline at end of file diff --git a/docs/fullsubnet_dns_interspeech.md b/docs/fullsubnet_dns_interspeech.md deleted file mode 100644 index f53da1d..0000000 --- a/docs/fullsubnet_dns_interspeech.md +++ /dev/null @@ -1,45 +0,0 @@ -# Performance - -## DNS INTERSPEECH 2020: - -enhanced dir: -/mnt/inspurfs/home/haoxiang/Enhancement/fullsubnet_dns_interspeech_no_reverb/enhanced_0170 -clean dir: -/mnt/inspurfs/home/haoxiang/Datasets/DNS-Challenge-INTERSPEECH/datasets/test_set/synthetic/no_reverb/clean -metrics: - -```text -150it [00:01, 105.41it/s] -SI_SDR: 17.81835764249166 -150it [00:00, 290.71it/s] -STOI: 0.9645223653574398 -150it [00:00, 242.65it/s] -WB_PESQ: 2.9503208065032958 -150it [00:00, 190.35it/s] -NB_PESQ: 3.419909480412801 -``` - -enhanced dir: -/mnt/inspurfs/home/haoxiang/Enhancement/fullsubnet_dns_interspeech_with_reverb/enhanced_0170 -clean dir: -/mnt/inspurfs/home/haoxiang/Datasets/DNS-Challenge-INTERSPEECH/datasets/test_set/synthetic/with_reverb/clean -metrics: - -```text -150it [00:02, 69.17it/s] -SI_SDR: 15.858081384499867 -150it [00:00, 193.11it/s] -STOI: 0.9269622049639774 -150it [00:00, 175.21it/s] -WB_PESQ: 3.046394157409668 -150it [00:00, 152.80it/s] -NB_PESQ: 3.5234271907806396 -``` - -## DNS ICASSP 2021: - -enhanced dir: -/mnt/inspurfs/home/haoxiang/Enhancement/fullsubnet_dns_icassp_dev_dataset - -enhanced dir: -/mnt/inspurfs/home/haoxiang/Enhancement/fullsubnet_dns_icassp_blind_testset \ No newline at end of file diff --git a/docs/source/_static/css/custom.css b/docs/source/_static/css/custom.css new file mode 100644 index 0000000..6813142 --- /dev/null +++ b/docs/source/_static/css/custom.css @@ -0,0 +1,3 @@ +div.wy-nav-content { + max-width: 800px; +} \ No newline at end of file diff --git a/docs/source/conf.py b/docs/source/conf.py new file mode 100644 index 0000000..3db3328 --- /dev/null +++ b/docs/source/conf.py @@ -0,0 +1,54 @@ +import importlib_metadata + +# -- Project information ----------------------------------------------------- +project = "FullSubNet" +author = "HAO Xiang " +project_copyright = "2022, HAO Xiang" +release = importlib_metadata.version(project) +version = ".".join(release.split(".")[:2]) # e.g., "0.3" stand for the major is "0" and the minor is "3" + +# -- MetaConfig configuration --------------------------------------------------- +exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"] +extensions = [ + "myst_parser", # markdown file parser. + "sphinx.ext.todo", # enable the todo. + "sphinx.ext.autodoc", # provide automatic documentation for module (*.py), class, function, and typehints. + "sphinx.ext.autosummary", # auto-generate the summary (include links) of the modules. + "sphinx.ext.intersphinx", # enable cross-referencing between Sphinx projects. + "sphinx.ext.viewcode", # add a helpful link to the source code of each object in the API reference sheet. + "sphinx.ext.mathjax", # enable math support in the documentation. + "sphinx.ext.napoleon", # [ordered] parse our docstrings and generate Google-style docstrings. + "sphinxcontrib.autodoc_pydantic", # generate the suitable docstrings to pydantic models. +] + +# -- Extension configuration ------------------------------------------------- +napoleon_numpy_docstring = False +napoleon_attr_annotations = True +intersphinx_mapping = { + "python": ("https://docs.python.org/3", None), + "numpy": ("https://numpy.org/doc/stable/", None), +} +autosummary_generate = True +autodoc_mock_imports = ["soundfile", "gpuRIR"] +autodoc_pydantic_model_signature_prefix = "Config" +autodoc_pydantic_member_order = "bysource" +autodoc_pydantic_model_show_field_summary = False +autodoc_pydantic_model_show_json = False +autodoc_pydantic_model_show_validator_members = False +autodoc_pydantic_model_show_validator_summary = False +autodoc_pydantic_model_summary_list_order = "bysource" +autodoc_pydantic_model_list_validators = False +autodoc_pydantic_field_signature_prefix = "option" + +# -- Options for HTML output ------------------------------------------------- +html_theme = "sphinx_rtd_theme" +html_context = { + "display_github": True, # edit on Github, see https://github.com/readthedocs/sphinx_rtd_theme/issues/529 + "github_user": "haoxiangsnr", + "github_repo": "FullSubNet", + "github_version": "main", +} +html_static_path = ["_static"] +html_css_files = [ + "css/custom.css", +] diff --git a/docs/source/index.rst b/docs/source/index.rst new file mode 100644 index 0000000..5710e0c --- /dev/null +++ b/docs/source/index.rst @@ -0,0 +1,33 @@ +============================================ +Welcome to FullSubNet's documentation! +============================================ + +The FullSubNet a full-band and sub-band fusion model for single-channel real-time speech enhancement. The full-band and sub-band refer to the models that input full-band and sub-band noisy spectral feature, output full-band and sub-band speech target, respectively. The sub-band model processes each frequency independently. Its input consists of one frequency and several context frequencies. The output is the prediction of the clean speech target for the corresponding frequency. These two types of models have distinct characteristics. The full-band model can capture the global spectral context and the long-distance cross-band dependencies. However, it lacks the ability to modeling signal stationarity and attending the local spectral pattern. The sub-band model is just the opposite. In our proposed FullSubNet, we connect a pure full-band model and a pure sub-band model sequentially and use practical joint training to integrate these two types of models' advantages. We conducted experiments on the DNS challenge (INTERSPEECH 2020) dataset to evaluate the proposed method. Experimental results show that full-band and sub-band information are complementary, and the FullSubNet can effectively integrate them. Besides, the performance of the FullSubNet also exceeds that of the top-ranked methods in the DNS Challenge (INTERSPEECH 2020). + +.. toctree:: + :caption: Getting started + :maxdepth: 1 + :titlesonly: + + usage/prerequisites.md + usage/getting_started.md + usage/release.md + usage/perf.md + usage/presentation.md + +.. toctree:: + :caption: Reference + :maxdepth: 1 + :titlesonly: + + reference/contributing.md + reference/conduct.md + reference/changelog.md + + +Indices and tables +------------------ + +* :ref:`genindex` +* :ref:`modindex` +* :ref:`search` \ No newline at end of file diff --git a/docs/source/reference/changelog.md b/docs/source/reference/changelog.md new file mode 100644 index 0000000..14e66a7 --- /dev/null +++ b/docs/source/reference/changelog.md @@ -0,0 +1,2 @@ +```{include} ../../../CHANGELOG.md +``` diff --git a/docs/source/reference/conduct.md b/docs/source/reference/conduct.md new file mode 100644 index 0000000..03ce547 --- /dev/null +++ b/docs/source/reference/conduct.md @@ -0,0 +1,2 @@ +```{include} ../../../CODE_OF_CONDUCT.md +``` diff --git a/docs/source/reference/contributing.md b/docs/source/reference/contributing.md new file mode 100644 index 0000000..9eb1fae --- /dev/null +++ b/docs/source/reference/contributing.md @@ -0,0 +1,2 @@ +```{include} ../../../CONTRIBUTING.md +``` \ No newline at end of file diff --git a/docs/fullsubnet-result.png b/docs/source/usage/fullsubnet-result.png similarity index 100% rename from docs/fullsubnet-result.png rename to docs/source/usage/fullsubnet-result.png diff --git a/docs/getting_started.md b/docs/source/usage/getting_started.md similarity index 100% rename from docs/getting_started.md rename to docs/source/usage/getting_started.md diff --git a/docs/source/usage/perf.md b/docs/source/usage/perf.md new file mode 100644 index 0000000..de296ee --- /dev/null +++ b/docs/source/usage/perf.md @@ -0,0 +1,3 @@ +# Performance + +![perf](fullsubnet-result.png) \ No newline at end of file diff --git a/docs/prerequisites.md b/docs/source/usage/prerequisites.md similarity index 100% rename from docs/prerequisites.md rename to docs/source/usage/prerequisites.md diff --git a/docs/source/usage/presentation.md b/docs/source/usage/presentation.md new file mode 100644 index 0000000..9a818c3 --- /dev/null +++ b/docs/source/usage/presentation.md @@ -0,0 +1,3 @@ +# FullSubNet presentation + +[![Click it to show a video](https://i.imgur.com/s3mq7NNl.png)](https://youtu.be/XJeE-MWDlk0 "FullSubNet: A Full-Band and Sub-Band Fusion Model for Real-Time Single-Channel Speech Enhancement") \ No newline at end of file diff --git a/docs/source/usage/release.md b/docs/source/usage/release.md new file mode 100644 index 0000000..a7a787c --- /dev/null +++ b/docs/source/usage/release.md @@ -0,0 +1,31 @@ +# FullSubNet Checkpoints and RIRs + +## Checkpoints + +This [release](https://github.com/haoxiangsnr/FullSubNet/releases) has two model checkpoints. All checkpoints include "model_state_dict", "optimizer_state_dict", and some other meta information. + +The first model checkpoint is the original model checkpoint at the 58th epoch. The performance is shown in this table: + +| | With Reverb | | | | No Reverb | | | | +|:----------:|:-----------:|:-------:|:------:|:-----:|:---------:|:-------:|:------:|:-----:| +| Method | WB-PESQ | NB-PESQ | SI-SDR | STOI | WB-PESQ | NB-PESQ | SI-SDR | STOI | +| FullSubNet | 2.987 | 3.496 | 15.756 | 0.926 | 2.889 | 3.385 | 17.635 | 0.964 | + +In addition, some people are interested in the performance when using cumulative normalization. The below one is a pre-trained FullSubNet using cumulative normalization: + +| | With Reverb | | | | No Reverb | | | | +|:----------:|:-----------:|:-------:|:------:|:-----:|:---------:|:-------:|:------:|:-----:| +| Method | WB-PESQ | NB-PESQ | SI-SDR | STOI | WB-PESQ | NB-PESQ | SI-SDR | STOI | +|FullSubNet (Cumulative Norm)| 2.978| 3.503 | 15.820 | 0.928 | 2.863| 3.376 | 17.913 | 0.964 | + +If you want to inference or fine-tune based on these checkpoints, please check the usage in the documents. + +## Room Impulse Responses + +As mentioned in the paper, the room impulse responses (RIRs) come from the Multichannel Impulse Response Database and the Reverb Challenge dataset. Please download the zip package "RIR (Multichannel Impulse Response Database + The REVERB challenge).zip" if you would like to retrain the FullSubNet. + +Note that the zip package includes a folder "rir" and a file "rir.txt." The folder "rir" contains all separated single-channel RIRs extracted from the above two datasets. The suffix (e.g., "m_") of the filename is the index of a microphone. The file "rir.txt" is just a path list of all RIRs. Please modify it to fit your case before you use it. + +For some cases, if you would like to extract channel by yourself, you can download these RIRs from pages: +1. Multichannel Impulse Response Database: https://www.eng.biu.ac.il/~gannot/RIR_DATABASE/ +2. The REVERB challenge data: https://reverb2014.dereverberation.com/tools/reverb_tools_for_Generate_mcTrainData.tgz and https://reverb2014.dereverberation.com/tools/reverb_tools_for_Generate_SimData.tgz \ No newline at end of file diff --git a/docs/workflow.png b/docs/source/usage/workflow.png similarity index 100% rename from docs/workflow.png rename to docs/source/usage/workflow.png diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..1f5b28c --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,42 @@ +# ----------------- Build System ----------------- +[build-system] +requires = ["flit_core >=3.2,<4"] +build-backend = "flit_core.buildapi" + +# ----------------- Metadata ----------------- +[project] +name = "FullSubNet" +description = "The FullSubNet a full-band and sub-band fusion model for single-channel real-time speech enhancement." +authors = [{ name = "HAO Xiang", email = "haoxiangsnr@gmail.com" }] +readme = "README.md" +requires-python = ">=3.10" +version = "0.0.1" +classifiers = [ + "Programming Language :: Python :: 3.10", + "License :: OSI Approved :: MIT License", + "Environment :: GPU :: NVIDIA CUDA", + "Operating System :: OS Independent", +] +keywords = ["speech enhancement", "single-channel"] +dependencies = [ + "webrtcvad", + "numpy", + "scipy", + "matplotlib", + "geomdl", + "joblib", + "librosa", + "pyroomacoustics", + "soundfile", + "toml", + "tqdm", + "pydantic", + "typing_inspect", +] +[project.optional-dependencies] +test = ["pytest", "pytest-cov"] +docs = ["sphinx-rtd-theme", "myst-nb", "autodoc_pydantic"] # TODO add python-semantic-release? +build = ["flit"] +[project.urls] +Documentation = "https://FullSubNet.readthedocs.io/en/latest/" +Source = "https://github.com/haoxiangsnr/FullSubNet"