Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Split pragmatics into presuppositions and scalar implicatures #2938

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 6 additions & 6 deletions src/helm/benchmark/presentation/run_entries_bhasa.conf
Original file line number Diff line number Diff line change
Expand Up @@ -56,17 +56,17 @@ entries: [

## D. Linguistic Diagnostics (LINDSEA)

### 1. Syntax: Minimal Pairs
### 1. Syntax: LINDSEA Minimal Pairs
### Use this to run the minimal pairs evaluation as a MCQ task
{description: "lindsea_syntax_minimal_pairs:model=text,method=mcq,language=id", priority: 1},

### Use this instead of the above in order to run the minimal pairs evaluation using logprobs
# {description: "lindsea_syntax_minimal_pairs:model=text,method=probs,language=id" priority: 1},

### 2. Pragmatics: Pragmatic Reasoning (single sentence)
{description: "lindsea_pragmatics_pragmatic_reasoning_single:model=text,language=id", priority: 1},
### 2. Pragmatics: LINDSEA Presuppositions
{description: "lindsea_pragmatics_presuppositions:model=text,subset=all,language=id", priority: 1},

### 3. Pragmatics: Pragmatic Reasoning (sentence pair)
{description: "lindsea_pragmatics_pragmatic_reasoning_pair:model=text,language=id", priority: 1},
### 3. Pragmatics: LINDSEA Scalar Implicatures
{description: "lindsea_pragmatics_scalar_implicatures:model=text,subset=all,language=id", priority: 1},

]
]
12 changes: 6 additions & 6 deletions src/helm/benchmark/presentation/run_entries_bhasa_zero_shot.conf
Original file line number Diff line number Diff line change
Expand Up @@ -56,17 +56,17 @@ entries: [

## D. Linguistic Diagnostics (LINDSEA)

### 1. Syntax: Minimal Pairs
### 1. Syntax: LINDSEA Minimal Pairs
### Use this to run the minimal pairs evaluation as a MCQ task
{description: "lindsea_syntax_minimal_pairs:model=text,max_train_instances=0,method=mcq,language=id", priority: 1},

### Use this instead of the above in order to run the minimal pairs evaluation using logprobs
# {description: "lindsea_syntax_minimal_pairs:model=text,max_train_instances=0,method=probs,language=id" priority: 1},

### 2. Pragmatics: Pragmatic Reasoning (single sentence)
{description: "lindsea_pragmatics_pragmatic_reasoning_single:model=text,max_train_instances=0,language=id", priority: 1},
### 2. Pragmatics: LINDSEA Presuppositions
{description: "lindsea_pragmatics_presuppositions:model=text,max_train_instances=0,subset=all,language=id", priority: 1},

### 3. Pragmatics: Pragmatic Reasoning (sentence pair)
{description: "lindsea_pragmatics_pragmatic_reasoning_pair:model=text,max_train_instances=0,language=id", priority: 1},
### 3. Pragmatics: LINDSEA Scalar Implicatures
{description: "lindsea_pragmatics_scalar_implicatures:model=text,max_train_instances=0,subset=all,language=id", priority: 1},

]
]
40 changes: 27 additions & 13 deletions src/helm/benchmark/run_specs/bhasa_run_specs.py
Original file line number Diff line number Diff line change
Expand Up @@ -578,14 +578,18 @@ def get_lindsea_syntax_minimal_pairs_spec(language: str = "id", method: str = "m
scenario_spec=scenario_spec,
adapter_spec=adapter_spec,
metric_specs=get_exact_match_metric_specs(),
groups=["bhasa_linguistic", f"lindsea_syntax_minimal_pairs_{language}"],
groups=[
"bhasa_linguistic",
f"lindsea_syntax_minimal_pairs_{language}",
f"lindsea_syntax_minimal_pairs_{method}_{language}",
],
)


# 2.1. Pragmatics: LINDSEA Pragmatic Reasoning (single sentence)
@run_spec_function("lindsea_pragmatics_pragmatic_reasoning_single")
def get_lindsea_pragmatics_pragmatic_reasoning_single_spec(language="id") -> RunSpec:
name = f"lindsea_pragmatics_pragmatic_reasoning_single_{language}"
# 2.1. Pragmatics: LINDSEA Presuppositions
@run_spec_function("lindsea_pragmatics_presuppositions")
def get_lindsea_pragmatics_presuppositions_spec(language: str = "id", subset: str = "all") -> RunSpec:
yifanmai marked this conversation as resolved.
Show resolved Hide resolved
name = f"lindsea_pragmatics_presuppositions_{subset}_{language}"

adapter_spec = get_generation_adapter_spec(
output_noun=LINDSEA_OUTPUT_NOUNS[language],
Expand All @@ -595,9 +599,10 @@ def get_lindsea_pragmatics_pragmatic_reasoning_single_spec(language="id") -> Run
)

scenario_spec = ScenarioSpec(
class_name="helm.benchmark.scenarios.bhasa_scenario.LINDSEAPragmaticsPragmaticReasoningSingleScenario",
class_name="helm.benchmark.scenarios.bhasa_scenario.LINDSEAPragmaticsPresuppositionsScenario",
args={
"language": language,
"subset": subset,
},
)

Expand All @@ -606,14 +611,18 @@ def get_lindsea_pragmatics_pragmatic_reasoning_single_spec(language="id") -> Run
scenario_spec=scenario_spec,
adapter_spec=adapter_spec,
metric_specs=get_exact_match_metric_specs(),
groups=["bhasa_linguistic", f"lindsea_pragmatics_pragmatic_reasoning_single_{language}"],
groups=[
"bhasa_linguistic",
f"lindsea_pragmatics_presuppositions_{language}",
f"lindsea_pragmatics_presuppositions_{subset}_{language}",
],
)


# 2.2. Pragmatics: LINDSEA Pragmatic Reasoning (sentence pair)
@run_spec_function("lindsea_pragmatics_pragmatic_reasoning_pair")
def get_lindsea_pragmatics_pragmatic_reasoning_pair_spec(language="id") -> RunSpec:
name = f"lindsea_pragmatics_pragmatic_reasoning_pair_{language}"
# 2.2. Pragmatics: LINDSEA Scalar Implicatures
@run_spec_function("lindsea_pragmatics_scalar_implicatures")
def get_lindsea_pragmatics_scalar_implicatures_spec(language: str = "id", subset: str = "all") -> RunSpec:
name = f"lindsea_pragmatics_scalar_implicatures_{subset}_{language}"

adapter_spec = get_generation_adapter_spec(
output_noun=LINDSEA_OUTPUT_NOUNS[language],
Expand All @@ -623,9 +632,10 @@ def get_lindsea_pragmatics_pragmatic_reasoning_pair_spec(language="id") -> RunSp
)

scenario_spec = ScenarioSpec(
class_name="helm.benchmark.scenarios.bhasa_scenario.LINDSEAPragmaticsPragmaticReasoningPairScenario",
class_name="helm.benchmark.scenarios.bhasa_scenario.LINDSEAPragmaticsScalarImplicaturesScenario",
args={
"language": language,
"subset": subset,
},
)

Expand All @@ -634,5 +644,9 @@ def get_lindsea_pragmatics_pragmatic_reasoning_pair_spec(language="id") -> RunSp
scenario_spec=scenario_spec,
adapter_spec=adapter_spec,
metric_specs=get_exact_match_metric_specs(),
groups=["bhasa_linguistic", f"lindsea_pragmatics_pragmatic_reasoning_pair_{language}"],
groups=[
"bhasa_linguistic",
f"lindsea_pragmatics_scalar_implicatures_{language}",
f"lindsea_pragmatics_scalar_implicatures_{subset}_{language}",
],
)
Loading