Skip to content

Commit

Permalink
Add Audio PAIRS audio scenario (#3149)
Browse files Browse the repository at this point in the history
  • Loading branch information
ImKeTT authored Nov 10, 2024
1 parent b307f23 commit 3d28de9
Show file tree
Hide file tree
Showing 3 changed files with 106 additions and 1 deletion.
20 changes: 20 additions & 0 deletions src/helm/benchmark/run_specs/audio_run_specs.py
Original file line number Diff line number Diff line change
Expand Up @@ -276,3 +276,23 @@ def get_speech_robust_bench_run_spec(subject: str) -> RunSpec:
metric_specs=metric_specs,
groups=["speech_robust_bench"],
)


@run_spec_function("audio_pairs")
def get_audio_pairs_run_spec(subject: str) -> RunSpec:
scenario_spec = ScenarioSpec(
class_name="helm.benchmark.scenarios.audio_language.audio_pairs_scenario.AudioPAIRSScenario",
args={"subject": subject},
)
adapter_spec = _get_generation_adapter_spec(
instructions="Listen to the audio and answer the question with provided options.",
max_tokens=5,
)
metric_specs: List[MetricSpec] = get_exact_match_metric_specs() + get_classification_metric_specs()
return RunSpec(
name="audio_pairs",
scenario_spec=scenario_spec,
adapter_spec=adapter_spec,
metric_specs=metric_specs,
groups=["audio_pairs"],
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
from typing import List
import os

from helm.benchmark.scenarios.scenario import (
Scenario,
Instance,
Reference,
TEST_SPLIT,
CORRECT_TAG,
Input,
Output,
)
from tqdm import tqdm
from helm.common.media_object import MediaObject, MultimediaObject
from helm.common.general import ensure_file_downloaded
import json


class AudioPAIRSScenario(Scenario):
"""Audio PAIRS
Audio PAIRS is an audio extension of the PAIRS dataset (Fraser et al, 2024) to examine gender and
racial bias in audio large language models. We convert the questions in the PAIRS dataset to audio
clips using OpenAI's TTS-1-HD API.
This dataset is also modified to add an option to opt-out with "unclear" as a choice.
"""

DOWNLOADING_URL = "https://huggingface.co/datasets/UCSC-VLAA/Audio_PAIRS/resolve/main/audio_pairs_files.zip"
SUJECTS = ["occupation", "status", "potential_crime"]

name = "audio_pairs"
description = "Examining gender and racial bias in AudioLMs using a converted audio from the PAIRS dataset."
tags: List[str] = ["audio", "classification"]

def __init__(self, subject: str) -> None:
super().__init__()

if subject not in AudioPAIRSScenario.SUJECTS:
raise ValueError(f"Invalid subject. Valid subjects are: {AudioPAIRSScenario.SUJECTS}")

self._subject: str = subject

def get_instances(self, output_path: str) -> List[Instance]:
instances: List[Instance] = []
downloading_dir: str = os.path.join(output_path, "download")
ensure_file_downloaded(source_url=AudioPAIRSScenario.DOWNLOADING_URL, target_path=downloading_dir, unpack=True)
data_dir: str = os.path.join(downloading_dir, "audio_pairs_files")
audio_file_folder = os.path.join(data_dir, self._subject)
audio_instruction_path = os.path.join(data_dir, "audio_pairs_instructions.json")
audio_instructions = json.load(open(audio_instruction_path))[self._subject]
for audio_file_name, instruction in tqdm(audio_instructions.items()):
local_audio_file_name = "_".join(audio_file_name.split("_")[:-1]) + ".mp3"
local_audio_path: str = os.path.join(audio_file_folder, local_audio_file_name)
content = [
MediaObject(content_type="audio/mpeg", location=local_audio_path),
MediaObject(content_type="text/plain", text=instruction),
]
input = Input(multimedia_content=MultimediaObject(content))
references = [Reference(Output(text="unclear"), tags=[CORRECT_TAG])]
instances.append(Instance(input=input, references=references, split=TEST_SPLIT))
return instances
25 changes: 24 additions & 1 deletion src/helm/benchmark/static/schema_speech.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -366,4 +366,27 @@ run_groups:
what: audio, transcripts of audio samples in a wide range of perturbations
who: real speakers
when: "2024"
language: English, Spanish
language: English, Spanish

- name: audio_pairs
display_name: Audio PAIRS
description: >
Audio PAIRS is an audio extension of the PAIRS dataset (Fraser et al, 2024) to examine gender and
racial bias in audio large language models. We convert the questions in the PAIRS dataset to audio
clips using OpenAI's TTS-1-HD API. This dataset is also modified to add an option to opt-out with
"unclear" as a choice.
The dataset contains the audio and question for three subsets: occupation, status, and potential_crime.
metric_groups:
- accuracy
- efficiency
- general_information
environment:
main_name: exact_match
main_split: test
taxonomy:
task: audio classification
what: audio and question of audio samples to examine models' gender and racial bias
who: OpenAI's TTS-1-HD
when: "2024"
language: English

0 comments on commit 3d28de9

Please sign in to comment.