Skip to content

Commit

Permalink
Split pragmatics into presuppositions and scalar implicatures (#2938)
Browse files Browse the repository at this point in the history
  • Loading branch information
raileymontalan authored Oct 1, 2024
1 parent 4584803 commit 64f23d3
Show file tree
Hide file tree
Showing 5 changed files with 275 additions and 117 deletions.
12 changes: 6 additions & 6 deletions src/helm/benchmark/presentation/run_entries_bhasa.conf
Original file line number Diff line number Diff line change
Expand Up @@ -56,17 +56,17 @@ entries: [

## D. Linguistic Diagnostics (LINDSEA)

### 1. Syntax: Minimal Pairs
### 1. Syntax: LINDSEA Minimal Pairs
### Use this to run the minimal pairs evaluation as a MCQ task
{description: "lindsea_syntax_minimal_pairs:model=text,method=mcq,language=id", priority: 1},

### Use this instead of the above in order to run the minimal pairs evaluation using logprobs
# {description: "lindsea_syntax_minimal_pairs:model=text,method=probs,language=id" priority: 1},

### 2. Pragmatics: Pragmatic Reasoning (single sentence)
{description: "lindsea_pragmatics_pragmatic_reasoning_single:model=text,language=id", priority: 1},
### 2. Pragmatics: LINDSEA Presuppositions
{description: "lindsea_pragmatics_presuppositions:model=text,subset=all,language=id", priority: 1},

### 3. Pragmatics: Pragmatic Reasoning (sentence pair)
{description: "lindsea_pragmatics_pragmatic_reasoning_pair:model=text,language=id", priority: 1},
### 3. Pragmatics: LINDSEA Scalar Implicatures
{description: "lindsea_pragmatics_scalar_implicatures:model=text,subset=all,language=id", priority: 1},

]
]
12 changes: 6 additions & 6 deletions src/helm/benchmark/presentation/run_entries_bhasa_zero_shot.conf
Original file line number Diff line number Diff line change
Expand Up @@ -56,17 +56,17 @@ entries: [

## D. Linguistic Diagnostics (LINDSEA)

### 1. Syntax: Minimal Pairs
### 1. Syntax: LINDSEA Minimal Pairs
### Use this to run the minimal pairs evaluation as a MCQ task
{description: "lindsea_syntax_minimal_pairs:model=text,max_train_instances=0,method=mcq,language=id", priority: 1},

### Use this instead of the above in order to run the minimal pairs evaluation using logprobs
# {description: "lindsea_syntax_minimal_pairs:model=text,max_train_instances=0,method=probs,language=id" priority: 1},

### 2. Pragmatics: Pragmatic Reasoning (single sentence)
{description: "lindsea_pragmatics_pragmatic_reasoning_single:model=text,max_train_instances=0,language=id", priority: 1},
### 2. Pragmatics: LINDSEA Presuppositions
{description: "lindsea_pragmatics_presuppositions:model=text,max_train_instances=0,subset=all,language=id", priority: 1},

### 3. Pragmatics: Pragmatic Reasoning (sentence pair)
{description: "lindsea_pragmatics_pragmatic_reasoning_pair:model=text,max_train_instances=0,language=id", priority: 1},
### 3. Pragmatics: LINDSEA Scalar Implicatures
{description: "lindsea_pragmatics_scalar_implicatures:model=text,max_train_instances=0,subset=all,language=id", priority: 1},

]
]
40 changes: 27 additions & 13 deletions src/helm/benchmark/run_specs/bhasa_run_specs.py
Original file line number Diff line number Diff line change
Expand Up @@ -578,14 +578,18 @@ def get_lindsea_syntax_minimal_pairs_spec(language: str = "id", method: str = "m
scenario_spec=scenario_spec,
adapter_spec=adapter_spec,
metric_specs=get_exact_match_metric_specs(),
groups=["bhasa_linguistic", f"lindsea_syntax_minimal_pairs_{language}"],
groups=[
"bhasa_linguistic",
f"lindsea_syntax_minimal_pairs_{language}",
f"lindsea_syntax_minimal_pairs_{method}_{language}",
],
)


# 2.1. Pragmatics: LINDSEA Pragmatic Reasoning (single sentence)
@run_spec_function("lindsea_pragmatics_pragmatic_reasoning_single")
def get_lindsea_pragmatics_pragmatic_reasoning_single_spec(language="id") -> RunSpec:
name = f"lindsea_pragmatics_pragmatic_reasoning_single_{language}"
# 2.1. Pragmatics: LINDSEA Presuppositions
@run_spec_function("lindsea_pragmatics_presuppositions")
def get_lindsea_pragmatics_presuppositions_spec(language: str = "id", subset: str = "all") -> RunSpec:
name = f"lindsea_pragmatics_presuppositions_{subset}_{language}"

adapter_spec = get_generation_adapter_spec(
output_noun=LINDSEA_OUTPUT_NOUNS[language],
Expand All @@ -595,9 +599,10 @@ def get_lindsea_pragmatics_pragmatic_reasoning_single_spec(language="id") -> Run
)

scenario_spec = ScenarioSpec(
class_name="helm.benchmark.scenarios.bhasa_scenario.LINDSEAPragmaticsPragmaticReasoningSingleScenario",
class_name="helm.benchmark.scenarios.bhasa_scenario.LINDSEAPragmaticsPresuppositionsScenario",
args={
"language": language,
"subset": subset,
},
)

Expand All @@ -606,14 +611,18 @@ def get_lindsea_pragmatics_pragmatic_reasoning_single_spec(language="id") -> Run
scenario_spec=scenario_spec,
adapter_spec=adapter_spec,
metric_specs=get_exact_match_metric_specs(),
groups=["bhasa_linguistic", f"lindsea_pragmatics_pragmatic_reasoning_single_{language}"],
groups=[
"bhasa_linguistic",
f"lindsea_pragmatics_presuppositions_{language}",
f"lindsea_pragmatics_presuppositions_{subset}_{language}",
],
)


# 2.2. Pragmatics: LINDSEA Pragmatic Reasoning (sentence pair)
@run_spec_function("lindsea_pragmatics_pragmatic_reasoning_pair")
def get_lindsea_pragmatics_pragmatic_reasoning_pair_spec(language="id") -> RunSpec:
name = f"lindsea_pragmatics_pragmatic_reasoning_pair_{language}"
# 2.2. Pragmatics: LINDSEA Scalar Implicatures
@run_spec_function("lindsea_pragmatics_scalar_implicatures")
def get_lindsea_pragmatics_scalar_implicatures_spec(language: str = "id", subset: str = "all") -> RunSpec:
name = f"lindsea_pragmatics_scalar_implicatures_{subset}_{language}"

adapter_spec = get_generation_adapter_spec(
output_noun=LINDSEA_OUTPUT_NOUNS[language],
Expand All @@ -623,9 +632,10 @@ def get_lindsea_pragmatics_pragmatic_reasoning_pair_spec(language="id") -> RunSp
)

scenario_spec = ScenarioSpec(
class_name="helm.benchmark.scenarios.bhasa_scenario.LINDSEAPragmaticsPragmaticReasoningPairScenario",
class_name="helm.benchmark.scenarios.bhasa_scenario.LINDSEAPragmaticsScalarImplicaturesScenario",
args={
"language": language,
"subset": subset,
},
)

Expand All @@ -634,5 +644,9 @@ def get_lindsea_pragmatics_pragmatic_reasoning_pair_spec(language="id") -> RunSp
scenario_spec=scenario_spec,
adapter_spec=adapter_spec,
metric_specs=get_exact_match_metric_specs(),
groups=["bhasa_linguistic", f"lindsea_pragmatics_pragmatic_reasoning_pair_{language}"],
groups=[
"bhasa_linguistic",
f"lindsea_pragmatics_scalar_implicatures_{language}",
f"lindsea_pragmatics_scalar_implicatures_{subset}_{language}",
],
)
Loading

0 comments on commit 64f23d3

Please sign in to comment.