Skip to content

Commit

Permalink
Feat/diarization v1 (#206)
Browse files Browse the repository at this point in the history
* feat: initial support for diarization

* fix build errors

* bundle models

* more checks

* translations

* translations

* fix build

* fix windows build

* fix pre build with static crt

* misc

* fix pre build

* v2.4.0-beta.0

* fix pre build

* fix pre build

* misc

* fix pyannote rs

* faet: download models for diarization instead of embed

* fix speaker search
  • Loading branch information
thewh1teagle authored Aug 8, 2024
1 parent 3fd723d commit d7172ae
Show file tree
Hide file tree
Showing 38 changed files with 615 additions and 389 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -24,3 +24,4 @@ node_modules/
.DS_Store
venv/
*.bin
*.onnx
2 changes: 1 addition & 1 deletion .vscode/settings.json
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
},
"files.eol": "\n",
// Rust analyzer
"rust-analyzer.checkOnSave": false,
"rust-analyzer.checkOnSave": true,
// "rust-analyzer.check.command": "clippy",
"[rust]": {
"editor.defaultFormatter": "rust-lang.rust-analyzer"
Expand Down
124 changes: 122 additions & 2 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

7 changes: 4 additions & 3 deletions core/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ tokio = { version = "1.35.1", features = [
] }
serde_json = { workspace = true }
futures-util = "0.3.30"
# sherpa-rs = { version = "0.1.4-beta.2", path = "../../sherpa-rs" }
pyannote-rs = "0.2.2"

[dev-dependencies]

Expand All @@ -47,13 +47,14 @@ glob = "0.3.1"

[features]
default = []
diarize = ["sherpa-rs"]
coreml = ["whisper-rs/coreml"]
coreml = ["whisper-rs/coreml", "pyannote-rs/coreml"]
directml = ["pyannote-rs/directml"]
cuda = ["whisper-rs/cuda"]
opencl = ["whisper-rs/opencl"]
openblas = ["whisper-rs/openblas"]
metal = ["whisper-rs/metal"]
rocm = ["whisper-rs/hipblas"]


[profile.test]
inherits = "release"
2 changes: 1 addition & 1 deletion core/build.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use std::env;
use std::path::{Path, PathBuf};
use std::process::Command;
use std::{env, fs};

fn copy_file(src: &Path, dst: &Path) {
if dst.exists() {
Expand Down
4 changes: 4 additions & 0 deletions core/src/audio.rs
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,10 @@ pub fn normalize(input: PathBuf, output: PathBuf) -> Result<()> {
if !pid.wait()?.success() {
bail!("unable to convert file")
}

if !output.exists() {
bail!("seems like ffmpeg failed for some reason. output not exists")
}
Ok(())
}

Expand Down
7 changes: 0 additions & 7 deletions core/src/config.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
use core::fmt;
use serde::{Deserialize, Serialize};
use std::path::PathBuf;
use utoipa::ToSchema;

#[derive(Deserialize, Serialize, ToSchema)]
Expand All @@ -18,12 +17,6 @@ pub struct TranscribeOptions {
pub max_sentence_len: Option<i32>,
}

#[derive(Deserialize, Serialize, ToSchema)]
pub struct DiarizeOptions {
pub vad_model_path: PathBuf,
pub speaker_id_model_path: PathBuf,
}

impl fmt::Debug for TranscribeOptions {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let json_string = serde_json::to_string_pretty(self).map_err(|_| fmt::Error)?;
Expand Down
Loading

0 comments on commit d7172ae

Please sign in to comment.