-
Notifications
You must be signed in to change notification settings - Fork 0
/
generate.py
188 lines (184 loc) · 9.58 KB
/
generate.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
import subprocess
def generate_sound(prompt, model_name="audioldm-s-full", duration=2.5, batch_size=1, num_candidates=3, guidance_scale=2.5, seed=100):
"""
Generates an audio clip based on the given prompt using the specified model and parameters.
Args:
- prompt (str): The text prompt to use for audio generation.
- model_name (str): The name of the checkpoint to use for the model. Default is "audioldm-s-full".
- duration ( int): The duration of the audio clip to generate, in seconds. Default is 10.
- batch_size (int): The number of audio clips to generate in parallel. Default is 1.
- num_candidates (int): The number of audio clips to generate for each prompt, to choose the best one. Default is 3.
- guidance_scale (float): A scaling factor for how much the audio should match the prompt. Higher values lead to better quality, but less diversity. Default is 2.5.
- seed (int): A seed value to use for the generation process. Default is 42.
Returns:
- audio (bytes): The raw audio data as a bytes object.
"""
command = ["audioldm", "--mode", "generation", "--text", prompt, "--duration", str(duration), "--batchsize", str(batch_size), "--n_candidate_gen_per_text", str(num_candidates), "--guidance_scale", str(guidance_scale), "--seed", str(seed), "--model_name", model_name]
result = subprocess.run(command, capture_output=True)
audio = result.stdout
return audio
prompts = [ "The thumping of bass from a music festival",
"The sound of a car engine revving up",
"The whooshing of wind through a car window",
"The clinking of glasses in a bar",
"The splashing of water in a swimming pool",
"The crunching of leaves underfoot on a autumn day",
"The sound of a bag of chips being opened",
"The squeaking of a door hinge",
"The sound of a person snoring",
"The whistle of a tea kettle boiling",
"The ringing of a telephone",
"The pitter-patter of rain on a roof",
"The sound of a person laughing",
"The clinking of silverware on a plate",
"The sound of a person yawning",
"The sound of a person coughing",
"The crackling of ice in a glass",
"The sound of a person sneezing",
"The bubbling of a hot tub",
"The sound of a person singing",
"The screeching of brakes on a car",
"The sound of a person playing guitar",
"The sound of a person typing on a keyboard",
"The thumping of a bass drum",
"The buzzing of an electric razor",
"The sound of a person brushing their teeth",
"The chime of a grandfather clock",
"The sound of a person whistling",
"The clanging of a bell in a clock tower",
"The sound of a person humming",
"The chattering of teeth in cold weather",
"The clinking of coins in a piggy bank",
"The sound of a person blowing their nose",
"The sound of a person sniffling",
"The whooshing of a vacuum cleaner",
"The sound of a person walking in high heels",
"The sound of a person blowing bubbles in gum",
"The clinking of ice cubes in a drink",
"The rustling of clothes as a person moves",
"The sound of a person drinking from a straw",
"The sound of a person cracking their knuckles",
"The sound of a person blowing on hot food",
"The whooshing of a ceiling fan",
"The sound of a person gasping",
"The sound of a person swallowing",
"The sound of a person hiccoughing",
"The sound of a person slurping soup",
"The sound of a person gargling mouthwash",
"The clicking of a computer mouse",
"The sound of a person tapping their foot",
"The sound of a person shaking a can of spray paint",
"The sound of a person chewing gum",
"The sound of a person blowing up a balloon",
"The sound of a person opening a can of soda",
"The sound of a person cracking open a beer",
"The sound of a person opening a bottle of wine",
"The sound of a person sipping coffee",
"The sound of a person pouring water into a glass",
"The sound of a person closing a car door",
"The sound of a person tying their shoes",
"The sound of a person zipping up a jacket",
"The sound of a person unzipping a backpack",
"The sound of a person unlocking a door",
"The sound of a person clicking a pen",
"The sound of a person turning a page in a book",
"The sound of a person setting down a cup on a saucer",
"The sound of a person setting down a heavy object",
"The sound of a person blowing up a balloon",
"The sound of a person tapping their fingernails on a surface",
"The sound of a person using a stapler",
"The sound of a person ripping a piece of paper",
"The sound of a person opening a package",
"The sound of a person tearing open an envelope",
"The sound of a person crumpling a piece of paper",
"The sound of a person tearing a piece of tape",
"The sound of a person rolling dice on a table",
"The sound of a person shaking a can of soda",
"The sound of a person sharpening a pencil",
"The sound of a person using a paper cutter",
"The sound of a person punching",
"The buzz of an electric razor",
"The sound of a tea kettle whistling",
"The rustling of clothes as someone walks",
"The sound of a car engine revving",
"The clink of ice cubes in a glass",
"The sound of a person sneezing",
"The thumping of music from a nearby party",
"The sound of a camera shutter clicking",
"The crackling of ice as it freezes",
"The sound of a snowplow clearing a road",
"The clicking of a computer mouse",
"The swoosh of a basketball going through a net",
"The sound of a foghorn in the distance",
"The hum of a refrigerator running",
"The sound of a person yawning",
"The scraping of a knife against a cutting board",
"The sound of a person laughing",
"The buzz of a drone in the sky",
"The sound of a person whistling",
"The tapping of fingers on a table",
"The sound of a person coughing",
"The clicking of a pen being clicked",
"The sound of a person sighing",
"The chime of a doorbell ringing",
"The sound of a person snoring",
"The rustling of a bag of chips being opened",
"The sound of a person humming",
"The popping of popcorn kernels in a microwave",
"The sound of a person singing",
"The clinking of dishes being set on a table",
"The sound of a person crying",
"The sizzling of food cooking on a hot pan",
"The sound of a person talking on the phone",
"The crunching of leaves underfoot",
"The sound of a person playing a musical instrument",
"The splashing of water in a pool",
"The sound of a person typing on a keyboard",
"The tapping of a foot on the ground",
"The sound of a person walking in high heels",
"The crackling of a bag of popcorn being opened",
"The sound of a person whispering",
"The thudding of a heartbeat",
"The sound of a person brushing their teeth",
"The swishing of a broom sweeping a floor",
"The sound of a person exercising",
"The clanging of weights in a gym",
"The sound of a person meditating",
"The gentle ringing of wind chimes",
"The sound of a person shuffling cards",
"The squeaking of a door hinge",
"The sound of a person knitting",
"The shuffling of papers in a folder",
"The sound of a person yawning",
"The whirring of a sewing machine",
"The sound of a person painting",
"The tapping of a drumstick on a drum",
"The sound of a person blowing a whistle",
"The clinking of a champagne glass being raised for a toast",
"The sound of a person playing a video game",
"The roaring of a crowd at a sports game",
"The sound of a person using a power tool",
"The sound of a person blowing their nose",
"The bubbling of a pot on a stove",
"The sound of a person blowing up a balloon",
"The swoosh of a broom hitting a basketball",
"The sound of a person riding a rollercoaster",
"The sound of a person using a chainsaw",
"The thud of a bowling ball hitting the pins",
"The sound of a person playing chess",
"The ringing of an alarm clock in the morning",
"The sound of a person playing with a Rubik's Cube",
"The patter of rain on a roof",
"The sound of a person playing a pinball machine",
"The jangling of keys in a pocket",
"The sound of a person typing on a typewriter",
"The whistle of a tea kettle boiling",
"The sound of a person using a hand mixer",
"The crackling of a fireworks display",
"The sound of a person playing a harmonica",
"The popping of a cork from a bottle",
"The sound of a person playing with a fidget spinner",
"The sound of a person walking on a gravel path", "The bubbling of a pot of boiling water on the stove", "The sound of a car engine revving", "The beep of a digital alarm clock", "The sound of a paper shredder", "The whirring of a food processor", "The rustling of a plastic bag being opened", "The sound of a guitar being strummed", "The clinking of glasses at a dinner party", "The sound of a baby crying", "The rumbling of a motorcycle passing by", "The clanking of weights in a gym", "The sound of a woodpecker pecking at a tree", "The beep of a microwave finishing its cycle", "The crackling of a bag of chips being opened", "The sound of a person sneezing", "The rustling of a curtain in the wind", "The sound of a door slamming shut", "The buzzing of an electric razor", "The whirring of a vacuum cleaner", "The clinking of silverware being set on a table", "The sound of a person yawning", "The splash of water in a swimming pool", "The sound of a person whistling a tune", "The beep of a car horn being honked", "The sound of a person laughing", "The ringing of a telephone", "The sound of a person sighing", "The crackling of a firework exploding", "The sound of a person coughing", "The rustling of leaves being blown by the wind", "The sound of a person blowing out a candle", "The swooshing of a basketball going through a hoop", "The sound of a person humming a song", "The beep of a heart rate monitor in a hospital", "The sound of a person snoring", "The splashing of water in a fountain", "The clinking of a spoon stirring a cup of tea", "The sound of a person hiccuping." ]
for prompt in prompts:
generate_sound(prompt)
print(f'genereated: {prompt}')