From 8c4f576f1bbf9ef16b1bc34e6ab0afe79fad254e Mon Sep 17 00:00:00 2001 From: Fangjun Kuang Date: Fri, 28 Jun 2024 11:27:19 +0800 Subject: [PATCH] Support .Net framework 2.0 (#1062) --- .../offline-decode-files/Program.cs | 45 +++++++++---------- dotnet-examples/offline-tts/Program.cs | 32 ++++++------- .../online-decode-files/Program.cs | 43 +++++++++--------- scripts/dotnet/OfflineRecognizer.cs | 10 ++++- scripts/dotnet/OnlineRecognizer.cs | 11 ++++- scripts/dotnet/OnlineRecognizerResult.cs | 2 +- scripts/dotnet/run.sh | 10 ++--- scripts/dotnet/sherpa-onnx.csproj.in | 2 +- scripts/dotnet/sherpa-onnx.csproj.runtime.in | 2 +- sherpa-onnx/kotlin-api/OfflineRecognizer.kt | 12 ++--- 10 files changed, 90 insertions(+), 79 deletions(-) diff --git a/dotnet-examples/offline-decode-files/Program.cs b/dotnet-examples/offline-decode-files/Program.cs index 301774f8e..9e0dd792c 100644 --- a/dotnet-examples/offline-decode-files/Program.cs +++ b/dotnet-examples/offline-decode-files/Program.cs @@ -17,75 +17,74 @@ class Options { [Option("sample-rate", Required = false, Default = 16000, HelpText = "Sample rate of the data used to train the model")] - public int SampleRate { get; set; } + public int SampleRate { get; set; } = 16000; [Option("feat-dim", Required = false, Default = 80, HelpText = "Dimension of the features used to train the model")] - public int FeatureDim { get; set; } + public int FeatureDim { get; set; } = 80; [Option(Required = false, HelpText = "Path to tokens.txt")] - public string Tokens { get; set; } + public string Tokens { get; set; } = ""; [Option(Required = false, Default = "", HelpText = "Path to transducer encoder.onnx. Used only for transducer models")] - public string Encoder { get; set; } + public string Encoder { get; set; } = ""; [Option(Required = false, Default = "", HelpText = "Path to transducer decoder.onnx. Used only for transducer models")] - public string Decoder { get; set; } + public string Decoder { get; set; } = ""; [Option(Required = false, Default = "",HelpText = "Path to transducer joiner.onnx. Used only for transducer models")] - public string Joiner { get; set; } + public string Joiner { get; set; } = ""; [Option("model-type", Required = false, Default = "", HelpText = "model type")] - public string ModelType { get; set; } + public string ModelType { get; set; } = ""; [Option("whisper-encoder", Required = false, Default = "", HelpText = "Path to whisper encoder.onnx. Used only for whisper models")] - public string WhisperEncoder { get; set; } + public string WhisperEncoder { get; set; } = ""; [Option("whisper-decoder", Required = false, Default = "", HelpText = "Path to whisper decoder.onnx. Used only for whisper models")] - public string WhisperDecoder { get; set; } + public string WhisperDecoder { get; set; } = ""; [Option("whisper-language", Required = false, Default = "", HelpText = "Language of the input file. Can be empty")] - public string WhisperLanguage{ get; set; } + public string WhisperLanguage{ get; set; } = ""; [Option("whisper-task", Required = false, Default = "transcribe", HelpText = "transcribe or translate")] - public string WhisperTask{ get; set; } + public string WhisperTask{ get; set; } = "transcribe"; [Option("tdnn-model", Required = false, Default = "", HelpText = "Path to tdnn yesno model")] - public string TdnnModel { get; set; } - + public string TdnnModel { get; set; } = ""; [Option(Required = false, HelpText = "Path to model.onnx. Used only for paraformer models")] - public string Paraformer { get; set; } + public string Paraformer { get; set; } = ""; [Option("nemo-ctc", Required = false, HelpText = "Path to model.onnx. Used only for NeMo CTC models")] - public string NeMoCtc { get; set; } + public string NeMoCtc { get; set; } = ""; [Option("telespeech-ctc", Required = false, HelpText = "Path to model.onnx. Used only for TeleSpeech CTC models")] - public string TeleSpeechCtc { get; set; } + public string TeleSpeechCtc { get; set; } = ""; [Option("num-threads", Required = false, Default = 1, HelpText = "Number of threads for computation")] - public int NumThreads { get; set; } + public int NumThreads { get; set; } = 1; [Option("decoding-method", Required = false, Default = "greedy_search", HelpText = "Valid decoding methods are: greedy_search, modified_beam_search")] - public string DecodingMethod { get; set; } + public string DecodingMethod { get; set; } = "greedy_search"; [Option("rule-fsts", Required = false, Default = "", HelpText = "If not empty, path to rule fst for inverse text normalization")] - public string RuleFsts { get; set; } + public string RuleFsts { get; set; } = ""; [Option("max-active-paths", Required = false, Default = 4, HelpText = @"Used only when --decoding--method is modified_beam_search. It specifies number of active paths to keep during the search")] - public int MaxActivePaths { get; set; } + public int MaxActivePaths { get; set; } = 4; [Option("hotwords-file", Required = false, Default = "", HelpText = "Path to hotwords.txt")] - public string HotwordsFile { get; set; } + public string HotwordsFile { get; set; } = ""; [Option("hotwords-score", Required = false, Default = 1.5F, HelpText = "hotwords score")] - public float HotwordsScore { get; set; } + public float HotwordsScore { get; set; } = 1.5F; [Option("files", Required = true, HelpText = "Audio files for decoding")] - public IEnumerable Files { get; set; } + public IEnumerable Files { get; set; } = new string[] {}; } static void Main(string[] args) diff --git a/dotnet-examples/offline-tts/Program.cs b/dotnet-examples/offline-tts/Program.cs index a3fbe2afd..6216095f4 100644 --- a/dotnet-examples/offline-tts/Program.cs +++ b/dotnet-examples/offline-tts/Program.cs @@ -18,49 +18,49 @@ class Options { [Option("tts-rule-fsts", Required = false, Default = "", HelpText = "path to rule.fst")] - public string RuleFsts { get; set; } + public string RuleFsts { get; set; } = ""; [Option("tts-rule-fars", Required = false, Default = "", HelpText = "path to rule.far")] - public string RuleFars { get; set; } + public string RuleFars { get; set; } = ""; [Option("vits-dict-dir", Required = false, Default = "", HelpText = "Path to the directory containing dict for jieba.")] - public string DictDir { get; set; } + public string DictDir { get; set; } = ""; [Option("vits-data-dir", Required = false, Default = "", HelpText = "Path to the directory containing dict for espeak-ng.")] - public string DataDir { get; set; } + public string DataDir { get; set; } = ""; [Option("vits-length-scale", Required = false, Default = 1, HelpText = "speech speed. Larger->Slower; Smaller->faster")] - public float LengthScale { get; set; } + public float LengthScale { get; set; } = 1; [Option("vits-noise-scale", Required = false, Default = 0.667f, HelpText = "noise_scale for VITS models")] - public float NoiseScale { get; set; } + public float NoiseScale { get; set; } = 0.667F; - [Option("vits-noise-scale-w", Required = false, Default = 0.8f, HelpText = "noise_scale_w for VITS models")] - public float NoiseScaleW { get; set; } + [Option("vits-noise-scale-w", Required = false, Default = 0.8F, HelpText = "noise_scale_w for VITS models")] + public float NoiseScaleW { get; set; } = 0.8F; [Option("vits-lexicon", Required = false, Default = "", HelpText = "Path to lexicon.txt")] - public string Lexicon { get; set; } + public string Lexicon { get; set; } = ""; [Option("vits-tokens", Required = false, Default = "", HelpText = "Path to tokens.txt")] - public string Tokens { get; set; } + public string Tokens { get; set; } = ""; [Option("tts-max-num-sentences", Required = false, Default = 1, HelpText = "Maximum number of sentences that we process at a time.")] - public int MaxNumSentences { get; set; } + public int MaxNumSentences { get; set; } = 1; [Option(Required = false, Default = 0, HelpText = "1 to show debug messages.")] - public int Debug { get; set; } + public int Debug { get; set; } = 0; [Option("vits-model", Required = true, HelpText = "Path to VITS model")] - public string Model { get; set; } + public string Model { get; set; } = ""; [Option("sid", Required = false, Default = 0, HelpText = "Speaker ID")] - public int SpeakerId { get; set; } + public int SpeakerId { get; set; } = 0; [Option("text", Required = true, HelpText = "Text to synthesize")] - public string Text { get; set; } + public string Text { get; set; } = ""; [Option("output-filename", Required = true, Default = "./generated.wav", HelpText = "Path to save the generated audio")] - public string OutputFilename { get; set; } + public string OutputFilename { get; set; } = "./generated.wav"; } static void Main(string[] args) diff --git a/dotnet-examples/online-decode-files/Program.cs b/dotnet-examples/online-decode-files/Program.cs index 3e8ee93e8..ad53624de 100644 --- a/dotnet-examples/online-decode-files/Program.cs +++ b/dotnet-examples/online-decode-files/Program.cs @@ -18,81 +18,80 @@ class OnlineDecodeFiles class Options { [Option(Required = true, HelpText = "Path to tokens.txt")] - public string Tokens { get; set; } + public string Tokens { get; set; } = ""; [Option(Required = false, Default = "cpu", HelpText = "Provider, e.g., cpu, coreml")] - public string Provider { get; set; } + public string Provider { get; set; } = ""; [Option(Required = false, HelpText = "Path to transducer encoder.onnx")] - public string Encoder { get; set; } + public string Encoder { get; set; } = ""; [Option(Required = false, HelpText = "Path to transducer decoder.onnx")] - public string Decoder { get; set; } + public string Decoder { get; set; } = ""; [Option(Required = false, HelpText = "Path to transducer joiner.onnx")] - public string Joiner { get; set; } + public string Joiner { get; set; } = ""; [Option("paraformer-encoder", Required = false, HelpText = "Path to paraformer encoder.onnx")] - public string ParaformerEncoder { get; set; } + public string ParaformerEncoder { get; set; } = ""; [Option("paraformer-decoder", Required = false, HelpText = "Path to paraformer decoder.onnx")] - public string ParaformerDecoder { get; set; } + public string ParaformerDecoder { get; set; } = ""; [Option("zipformer2-ctc", Required = false, HelpText = "Path to zipformer2 CTC onnx model")] - public string Zipformer2Ctc { get; set; } + public string Zipformer2Ctc { get; set; } = ""; [Option("num-threads", Required = false, Default = 1, HelpText = "Number of threads for computation")] - public int NumThreads { get; set; } + public int NumThreads { get; set; } = 1; [Option("decoding-method", Required = false, Default = "greedy_search", HelpText = "Valid decoding methods are: greedy_search, modified_beam_search")] - public string DecodingMethod { get; set; } + public string DecodingMethod { get; set; } = "greedy_search"; [Option(Required = false, Default = false, HelpText = "True to show model info during loading")] - public bool Debug { get; set; } + public bool Debug { get; set; } = false; [Option("sample-rate", Required = false, Default = 16000, HelpText = "Sample rate of the data used to train the model")] - public int SampleRate { get; set; } + public int SampleRate { get; set; } = 16000; [Option("max-active-paths", Required = false, Default = 4, HelpText = @"Used only when --decoding--method is modified_beam_search. It specifies number of active paths to keep during the search")] - public int MaxActivePaths { get; set; } + public int MaxActivePaths { get; set; } = 4; [Option("enable-endpoint", Required = false, Default = false, HelpText = "True to enable endpoint detection.")] - public bool EnableEndpoint { get; set; } + public bool EnableEndpoint { get; set; } = false; [Option("rule1-min-trailing-silence", Required = false, Default = 2.4F, HelpText = @"An endpoint is detected if trailing silence in seconds is larger than this value even if nothing has been decoded. Used only when --enable-endpoint is true.")] - public float Rule1MinTrailingSilence { get; set; } + public float Rule1MinTrailingSilence { get; set; } = 2.4F; [Option("rule2-min-trailing-silence", Required = false, Default = 1.2F, HelpText = @"An endpoint is detected if trailing silence in seconds is larger than this value after something that is not blank has been decoded. Used only when --enable-endpoint is true.")] - public float Rule2MinTrailingSilence { get; set; } + public float Rule2MinTrailingSilence { get; set; } = 1.2F; [Option("rule3-min-utterance-length", Required = false, Default = 20.0F, HelpText = @"An endpoint is detected if the utterance in seconds is larger than this value. Used only when --enable-endpoint is true.")] - public float Rule3MinUtteranceLength { get; set; } + public float Rule3MinUtteranceLength { get; set; } = 20.0F; [Option("hotwords-file", Required = false, Default = "", HelpText = "Path to hotwords.txt")] - public string HotwordsFile { get; set; } + public string HotwordsFile { get; set; } = ""; [Option("hotwords-score", Required = false, Default = 1.5F, HelpText = "hotwords score")] - public float HotwordsScore { get; set; } + public float HotwordsScore { get; set; } = 1.5F; [Option("rule-fsts", Required = false, Default = "", HelpText = "If not empty, path to rule fst for inverse text normalization")] - public string RuleFsts { get; set; } + public string RuleFsts { get; set; } = ""; [Option("files", Required = true, HelpText = "Audio files for decoding")] - public IEnumerable Files { get; set; } - + public IEnumerable Files { get; set; } = new string[] {}; } static void Main(string[] args) diff --git a/scripts/dotnet/OfflineRecognizer.cs b/scripts/dotnet/OfflineRecognizer.cs index 1acda5dfd..761b5dbfb 100644 --- a/scripts/dotnet/OfflineRecognizer.cs +++ b/scripts/dotnet/OfflineRecognizer.cs @@ -2,7 +2,6 @@ using System; using System.Collections.Generic; -using System.Linq; using System.Runtime.InteropServices; namespace SherpaOnnx @@ -29,7 +28,14 @@ public void Decode(OfflineStream stream) // The caller should ensure all passed streams are ready for decoding. public void Decode(IEnumerable streams) { - IntPtr[] ptrs = streams.Select(s => s.Handle).ToArray(); + // TargetFramework=net20 does not support System.Linq + // IntPtr[] ptrs = streams.Select(s => s.Handle).ToArray(); + List list = new List(); + foreach (OfflineStream s in streams) + { + list.Add(s.Handle); + } + IntPtr[] ptrs = list.ToArray(); Decode(_handle.Handle, ptrs, ptrs.Length); } diff --git a/scripts/dotnet/OnlineRecognizer.cs b/scripts/dotnet/OnlineRecognizer.cs index f2b7cdea8..48a2c92b9 100644 --- a/scripts/dotnet/OnlineRecognizer.cs +++ b/scripts/dotnet/OnlineRecognizer.cs @@ -3,7 +3,6 @@ /// Copyright (c) 2024.5 by 东风破 using System; using System.Collections.Generic; -using System.Linq; using System.Runtime.InteropServices; namespace SherpaOnnx @@ -49,7 +48,15 @@ public void Decode(OnlineStream stream) // The caller should ensure all passed streams are ready for decoding. public void Decode(IEnumerable streams) { - IntPtr[] ptrs = streams.Select(s => s.Handle).ToArray(); + // TargetFramework=net20 does not support System.Linq + // IntPtr[] ptrs = streams.Select(s => s.Handle).ToArray(); + List list = new List(); + foreach (OnlineStream s in streams) + { + list.Add(s.Handle); + } + + IntPtr[] ptrs = list.ToArray(); Decode(_handle.Handle, ptrs, ptrs.Length); } diff --git a/scripts/dotnet/OnlineRecognizerResult.cs b/scripts/dotnet/OnlineRecognizerResult.cs index d13fb0cc0..328653ee5 100644 --- a/scripts/dotnet/OnlineRecognizerResult.cs +++ b/scripts/dotnet/OnlineRecognizerResult.cs @@ -77,7 +77,7 @@ public OnlineRecognizerResult(IntPtr handle) } else { - _timestamps = Array.Empty(); + _timestamps = new float[] {}; } } } diff --git a/scripts/dotnet/run.sh b/scripts/dotnet/run.sh index 4b6d2bf00..8beb8f34d 100755 --- a/scripts/dotnet/run.sh +++ b/scripts/dotnet/run.sh @@ -38,7 +38,7 @@ macos_arm64_wheel=$src_dir/$macos_arm64_wheel_filename windows_x64_wheel_filename=sherpa_onnx-${SHERPA_ONNX_VERSION}-cp38-cp38-win_amd64.whl windows_x64_wheel=$src_dir/$windows_x64_wheel_filename -windows_x86_wheel_filename=sherpa_onnx-${SHERPA_ONNX_VERSION}-cp38-cp38-win32.whl +windows_x86_wheel_filename=sherpa-onnx-${SHERPA_ONNX_VERSION}-win-x86.tar.bz2 windows_x86_wheel=$src_dir/$windows_x86_wheel_filename windows_arm64_wheel_filename=sherpa-onnx-${SHERPA_ONNX_VERSION}-win-arm64.tar.bz2 @@ -138,10 +138,10 @@ if [ ! -f $src_dir/windows-x86/sherpa-onnx-core.dll ]; then if [ -f $windows_x86_wheel ]; then cp -v $windows_x86_wheel . else - curl -OL https://$HF_MIRROR/csukuangfj/sherpa-onnx-wheels/resolve/main/$windows_x86_wheel_filename + curl -OL https://$HF_MIRROR/csukuangfj/sherpa-onnx-libs/resolve/main/windows-for-dotnet/$windows_x86_wheel_filename fi - unzip $windows_x86_wheel_filename - cp -v sherpa_onnx-${SHERPA_ONNX_VERSION}.data/data/bin/*.dll ../ + tar xvf $windows_x86_wheel_filename + cp -v sherpa-onnx-${SHERPA_ONNX_VERSION}-win-x86/*dll ../ cd .. rm -rf wheel @@ -159,7 +159,7 @@ if [ ! -f $src_dir/windows-arm64/sherpa-onnx-core.dll ]; then else curl -OL https://$HF_MIRROR/csukuangfj/sherpa-onnx-libs/resolve/main/windows-for-dotnet/$windows_arm64_wheel_filename fi - unzip $windows_arm64_wheel_filename + tar xvf $windows_arm64_wheel_filename cp -v sherpa-onnx-${SHERPA_ONNX_VERSION}-win-arm64/*dll ../ cd .. diff --git a/scripts/dotnet/sherpa-onnx.csproj.in b/scripts/dotnet/sherpa-onnx.csproj.in index 25a2b5c1f..b155c60fe 100644 --- a/scripts/dotnet/sherpa-onnx.csproj.in +++ b/scripts/dotnet/sherpa-onnx.csproj.in @@ -4,7 +4,7 @@ README.md Library 10.0 - netstandard2.0 + net6.0;net45;net40;net35;net20;netstandard2.0 linux-x64;osx-x64;osx-arm64;win-x64;win-x86;win-arm64 true sherpa-onnx diff --git a/scripts/dotnet/sherpa-onnx.csproj.runtime.in b/scripts/dotnet/sherpa-onnx.csproj.runtime.in index f90ae2c3c..2a387ccea 100644 --- a/scripts/dotnet/sherpa-onnx.csproj.runtime.in +++ b/scripts/dotnet/sherpa-onnx.csproj.runtime.in @@ -3,7 +3,7 @@ Apache-2.0 README.md Library - netstandard2.0 + net6.0;net45;net40;net35;net20;netstandard2.0 {{ dotnet_rid }} sherpa-onnx {{ version }} diff --git a/sherpa-onnx/kotlin-api/OfflineRecognizer.kt b/sherpa-onnx/kotlin-api/OfflineRecognizer.kt index fdf91a6d6..e5f1547d3 100644 --- a/sherpa-onnx/kotlin-api/OfflineRecognizer.kt +++ b/sherpa-onnx/kotlin-api/OfflineRecognizer.kt @@ -171,7 +171,7 @@ fun getOfflineModelConfig(type: Int): OfflineModelConfig? { joiner = "$modelDir/joiner-epoch-30-avg-4.onnx", ), tokens = "$modelDir/tokens.txt", - modelType = "zipformer", + modelType = "transducer", ) } @@ -209,7 +209,7 @@ fun getOfflineModelConfig(type: Int): OfflineModelConfig? { joiner = "$modelDir/joiner-epoch-12-avg-4.int8.onnx", ), tokens = "$modelDir/tokens.txt", - modelType = "zipformer", + modelType = "transducer", ) } @@ -222,7 +222,7 @@ fun getOfflineModelConfig(type: Int): OfflineModelConfig? { joiner = "$modelDir/joiner-epoch-20-avg-1.int8.onnx", ), tokens = "$modelDir/tokens.txt", - modelType = "zipformer2", + modelType = "transducer", ) } @@ -281,7 +281,7 @@ fun getOfflineModelConfig(type: Int): OfflineModelConfig? { return OfflineModelConfig( teleSpeech = "$modelDir/model.int8.onnx", tokens = "$modelDir/tokens.txt", - modelType = "tele_speech", + modelType = "telespeech_ctc", ) } @@ -294,7 +294,7 @@ fun getOfflineModelConfig(type: Int): OfflineModelConfig? { joiner = "$modelDir/joiner-epoch-12-avg-5.int8.onnx", ), tokens = "$modelDir/tokens.txt", - modelType = "zipformer2", + modelType = "transducer", ) } @@ -307,7 +307,7 @@ fun getOfflineModelConfig(type: Int): OfflineModelConfig? { joiner = "$modelDir/joiner-epoch-99-avg-1.int8.onnx", ), tokens = "$modelDir/tokens.txt", - modelType = "zipformer2", + modelType = "transducer", ) } }