Skip to content

Commit

Permalink
QC Plugin: Remove handling of triple quoted transcripts
Browse files Browse the repository at this point in the history
  • Loading branch information
raina-rudra committed Jul 2, 2024
1 parent e5b8e34 commit 06a8db8
Show file tree
Hide file tree
Showing 2 changed files with 0 additions and 13 deletions.
3 changes: 0 additions & 3 deletions dialogy/plugins/text/qc_plugin/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,9 +53,6 @@ def identify_conflicting_labels(training_data: pd.DataFrame) -> pd.DataFrame:

logger.debug(f"Finding data points with conflicting labels...")

training_data["alternatives"] = training_data["alternatives"].apply(
lambda x: x.replace("""\"\"""", """\"""") if isinstance(x, str) else x
)
training_data["frozen_set_hash"] = training_data["alternatives"].apply(
lambda x: hashlib.md5(
pickle.dumps(
Expand Down
10 changes: 0 additions & 10 deletions tests/plugin/text/test_qc_plugin/test_qc_plugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,16 +40,6 @@
True,
2,
),
(
[
'[[{"transcript": "hello"}]]',
'[[{"transcript": "hello"}]]',
"""[[{\""confidence\"": 0.801317, \""transcript"" :\""hello\""}]]""",
],
["x1", "x2", "x3"],
True,
3,
),
],
)
async def test_drop_conflicting_labels(alternatives, tags, drop, discard_size, tmp_path) -> None:
Expand Down

0 comments on commit 06a8db8

Please sign in to comment.