From 9926c82fe35f6bd432c534a5c9836824a8d965a0 Mon Sep 17 00:00:00 2001 From: christhetree Date: Wed, 7 Feb 2024 14:41:42 +0000 Subject: [PATCH] [cm] Removing examples for PR --- examples/example_clipper_gen.py | 124 ---------------------------- examples/music_gen.py | 140 -------------------------------- 2 files changed, 264 deletions(-) delete mode 100644 examples/example_clipper_gen.py delete mode 100644 examples/music_gen.py diff --git a/examples/example_clipper_gen.py b/examples/example_clipper_gen.py deleted file mode 100644 index 03e534d..0000000 --- a/examples/example_clipper_gen.py +++ /dev/null @@ -1,124 +0,0 @@ -import logging -import os -import pathlib -from argparse import ArgumentParser -from typing import Dict, List, Optional - -import torch as tr -import torch.nn as nn -from torch import Tensor - -from neutone_sdk import NeutoneParameter, TextNeutoneParameter, \ - ContinuousNeutoneParameter, CategoricalNeutoneParameter -from neutone_sdk.non_realtime_wrapper import NonRealtimeBase - -logging.basicConfig() -log = logging.getLogger(__name__) -log.setLevel(level=os.environ.get("LOGLEVEL", "INFO")) - - -class ClipperModel(nn.Module): - def forward( - self, x: Tensor, min_val: Tensor, max_val: Tensor, gain: Tensor - ) -> Tensor: - tr.neg(min_val, out=min_val) - tr.mul(gain, min_val, out=min_val) - tr.mul(gain, max_val, out=max_val) - tr.clip(x, min=min_val, max=max_val, out=x) - return x - - -class ClipperModelWrapper(NonRealtimeBase): - def get_model_name(self) -> str: - return "clipper" - - def get_model_authors(self) -> List[str]: - return ["Christopher Mitcheltree"] - - def get_model_short_description(self) -> str: - return "Audio clipper." - - def get_model_long_description(self) -> str: - return "Clips the input audio between -1 and 1." - - def get_technical_description(self) -> str: - return "Clips the input audio between -1 and 1." - - def get_technical_links(self) -> Dict[str, str]: - return { - "Code": "https://github.com/QosmoInc/neutone_sdk/blob/main/examples/example_clipper_gen.py" - } - - def get_tags(self) -> List[str]: - return ["clipper"] - - def get_model_version(self) -> str: - return "1.0.0" - - def is_experimental(self) -> bool: - return False - - def get_neutone_parameters(self) -> List[NeutoneParameter]: - return [ - TextNeutoneParameter("text_param", "testing"), - ContinuousNeutoneParameter("min", "min clip threshold", default_value=0.15), - CategoricalNeutoneParameter("cat", "catty", n_values=3, default_value=2), - ContinuousNeutoneParameter("max", "max clip threshold", default_value=0.15), - ContinuousNeutoneParameter("gain", "scale clip threshold", default_value=1.0), - # ContinuousNeutoneParameter("gain2", "scale clip threshold", default_value=1.0), - ] - - @tr.jit.export - def get_audio_in_channels(self) -> List[int]: - return [2] - - @tr.jit.export - def get_audio_out_channels(self) -> List[int]: - return [2] - - @tr.jit.export - def get_native_sample_rates(self) -> List[int]: - return [] # Supports all sample rates - - @tr.jit.export - def get_native_buffer_sizes(self) -> List[int]: - return [] # Supports all buffer sizes - - @tr.jit.export - def is_one_shot_model(self) -> bool: - return False - - def aggregate_continuous_params(self, cont_params: Tensor) -> Tensor: - return cont_params # We want sample-level control, so no aggregation - - def do_forward_pass(self, - curr_block_idx: int, - audio_in: List[Tensor], - cont_params: Dict[str, Tensor], - text_params: List[str]) -> List[Tensor]: - # print(cont_params) - # print(text_params) - # exit() - min_val, max_val, gain = cont_params["min"], cont_params["max"], cont_params["gain"] - audio_out = [] - for x in audio_in: - x = self.model.forward(x, min_val, max_val, gain) - audio_out.append(x) - return audio_out - - -if __name__ == "__main__": - parser = ArgumentParser() - parser.add_argument("-o", "--output", default="export_model") - args = parser.parse_args() - root_dir = pathlib.Path(args.output) - - model = ClipperModel() - wrapper = ClipperModelWrapper(model) - # wrapper.forward(0, [tr.rand(2, 2048)], knob_params=tr.tensor([[0.5], [0.1], [0.2], [0.3]])) - # wrapper.forward(0, [tr.rand(2, 2048)], text_params=["ayy"]) - wrapper.forward(0, [tr.rand(2, 2048)]) - - ts = tr.jit.script(wrapper) - ts.forward(0, [tr.rand(2, 2048)]) - # ts.forward(0, [tr.rand(2, 2048)], text_params=["ayy"]) diff --git a/examples/music_gen.py b/examples/music_gen.py deleted file mode 100644 index 4842c0c..0000000 --- a/examples/music_gen.py +++ /dev/null @@ -1,140 +0,0 @@ -import logging -import os -from typing import Dict, List - -import torch as tr -from torch import Tensor - -from neutone_sdk import NeutoneParameter, TextNeutoneParameter, \ - CategoricalNeutoneParameter -from neutone_sdk.non_realtime_wrapper import NonRealtimeBase - -logging.basicConfig() -log = logging.getLogger(__name__) -log.setLevel(level=os.environ.get("LOGLEVEL", "INFO")) - - -class MusicGenModelWrapper(NonRealtimeBase): - def get_model_name(self) -> str: - return "MusicGen.example" - - def get_model_authors(self) -> List[str]: - return ["Christopher Mitcheltree"] - - def get_model_short_description(self) -> str: - return "MusicGen model." - - def get_model_long_description(self) -> str: - return "MusicGen model." - - def get_technical_description(self) -> str: - return "MusicGen model." - - def get_technical_links(self) -> Dict[str, str]: - return { - "Paper": "https://arxiv.org/abs/2306.05284", - "Code": "https://github.com/facebookresearch/audiocraft/" - } - - def get_tags(self) -> List[str]: - return ["musicgen"] - - def get_model_version(self) -> str: - return "1.0.0" - - def is_experimental(self) -> bool: - return False - - def get_neutone_parameters(self) -> List[NeutoneParameter]: - return [ - TextNeutoneParameter(name="prompt", - description="text prompt for generation", - max_n_chars=128, - default_value="techno kick drum"), - CategoricalNeutoneParameter(name="duration", - description="how many seconds to generate", - n_values=8, - default_value=0, - labels=[str(idx) for idx in range(1, 9)]), - ] - - @tr.jit.export - def get_audio_in_channels(self) -> List[int]: - return [] - - @tr.jit.export - def get_audio_out_channels(self) -> List[int]: - return [1] - - @tr.jit.export - def get_native_sample_rates(self) -> List[int]: - return [32000] - - @tr.jit.export - def get_native_buffer_sizes(self) -> List[int]: - return [] - - @tr.jit.export - def is_one_shot_model(self) -> bool: - return True - - def do_forward_pass(self, - curr_block_idx: int, - audio_in: List[Tensor], - knob_params: Dict[str, Tensor], - text_params: List[str]) -> List[Tensor]: - # The extra cast to int is needed for TorchScript - n_seconds = int(knob_params["duration"].item()) + 1 - # Convert duration to number of tokens - n_tokens = (n_seconds * 50) + 4 - if self.use_debug_mode: - assert len(text_params) == 1 - # TorchScript does not support logging statements - print("Preprocessing...") - # Preprocess - input_ids, encoder_outputs, delay_pattern_mask, encoder_attention_mask = ( - self.model.preprocess(text_params, n_tokens) - ) - # Generate - for idx in range(n_tokens - 1): - if self.should_cancel_forward_pass(): - return [] - input_ids = self.model.sample_step(input_ids, - encoder_outputs, - delay_pattern_mask, - encoder_attention_mask) - percentage_progress = int((idx + 1) / n_tokens * 100) - self.set_progress_percentage(percentage_progress) - if self.use_debug_mode: - # TorchScript does not support logging statements - print(f"Generating token {idx + 1}/{n_tokens}...") - print(f"Progress: {self.get_progress_percentage()}%") - if self.use_debug_mode: - # TorchScript does not support logging statements - print("Postprocessing...") - # Postprocess - audio_out = self.model.postprocess(input_ids, delay_pattern_mask, text_params) - # Remove batch dimension - audio_out = audio_out.squeeze(0) - return [audio_out] - - -if __name__ == "__main__": - import torchtext - model = tr.jit.load("../out/musicgen.ts") - wrapper = MusicGenModelWrapper(model) - # audio_out = wrapper.forward(curr_block_idx=0, - # audio_in=[], - # knob_params=tr.tensor([0.0]).unsqueeze(1), - # text_params=["testing"]) - # log.info(audio_out[0].shape) - - # wrapper.prepare_for_inference() - ts = tr.jit.script(wrapper) - audio_out = ts.forward(curr_block_idx=0, - audio_in=[], - knob_params=tr.tensor([0.0]).unsqueeze(1), - text_params=["testing"]) - log.info(audio_out[0].shape) - - log.info("done")