Skip to content

Commit

Permalink
Add PII audio redaction quality (#66)
Browse files Browse the repository at this point in the history
Co-authored-by: Patrick Loeber <98830383+ploeber@users.noreply.github.com>
  • Loading branch information
dweekly and ploeber committed Apr 19, 2024
1 parent 230f004 commit 743b15b
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 2 deletions.
4 changes: 2 additions & 2 deletions assemblyai/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
LemurTaskResponse,
LemurTranscriptSource,
Paragraph,
PIIRedactedAudioQuality,
PIIRedactionPolicy,
PIISubstitutionPolicy,
RawTranscriptionConfig,
Expand Down Expand Up @@ -97,8 +98,6 @@
"LemurQuestionResponse",
"LemurSummaryResponse",
"LemurTaskResponse",
"PIIRedactionPolicy",
"PIISubstitutionPolicy",
"Sentence",
"Sentiment",
"SentimentType",
Expand All @@ -117,6 +116,7 @@
"Utterance",
"UtteranceWord",
"Paragraph",
"PIIRedactedAudioQuality",
"PIISubstitutionPolicy",
"PIIRedactionPolicy",
"RawTranscriptionConfig",
Expand Down
22 changes: 22 additions & 0 deletions assemblyai/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -202,6 +202,11 @@ class WordBoost(str, Enum):
high = "high"


class PIIRedactedAudioQuality(str, Enum):
mp3 = "mp3"
wav = "wav"


class EntityType(str, Enum):
"""
Used for AssemblyAI's Entity Detection feature.
Expand Down Expand Up @@ -454,6 +459,8 @@ class RawTranscriptionConfig(BaseModel):
"Redact PII from the transcribed text."
redact_pii_audio: Optional[bool]
"Generate a copy of the original media file with spoken PII 'beeped' out."
redact_pii_audio_quality: Optional[PIIRedactedAudioQuality]
"The quality of the redacted audio file in case `redact_pii_audio` is enabled."
redact_pii_policies: Optional[List[PIIRedactionPolicy]]
"The list of PII Redaction policies to enable."
redact_pii_sub: Optional[PIISubstitutionPolicy]
Expand Down Expand Up @@ -543,6 +550,7 @@ def __init__(
filter_profanity: Optional[bool] = None,
redact_pii: Optional[bool] = None,
redact_pii_audio: Optional[bool] = None,
redact_pii_audio_quality: Optional[PIIRedactedAudioQuality] = None,
redact_pii_policies: Optional[List[PIIRedactionPolicy]] = None,
redact_pii_sub: Optional[PIISubstitutionPolicy] = None,
speaker_labels: Optional[bool] = None,
Expand Down Expand Up @@ -580,6 +588,7 @@ def __init__(
filter_profanity: Filter profanity from the transcribed text.
redact_pii: Redact PII from the transcribed text.
redact_pii_audio: Generate a copy of the original media file with spoken PII 'beeped' out (new audio only available for 24 hours).
redact_pii_audio_quality: The quality of the redacted audio file in case `redact_pii_audio` is enabled.
redact_pii_policies: The list of PII Redaction policies to enable.
redact_pii_sub: The replacement logic for detected PII.
speaker_labels: Enable Speaker Diarization.
Expand Down Expand Up @@ -623,6 +632,7 @@ def __init__(
self.set_redact_pii(
redact_pii,
redact_pii_audio,
redact_pii_audio_quality,
redact_pii_policies,
redact_pii_sub,
)
Expand Down Expand Up @@ -773,6 +783,12 @@ def redact_pii_audio(self) -> Optional[bool]:

return self._raw_transcription_config.redact_pii_audio

@property
def redact_pii_audio_quality(self) -> Optional[PIIRedactedAudioQuality]:
"The quality of the redacted audio file in case `redact_pii_audio` is enabled."

return self._raw_transcription_config.redact_pii_audio_quality

@property
def redact_pii_policies(self) -> Optional[List[PIIRedactionPolicy]]:
"Returns a list of set of defined PII redaction policies."
Expand Down Expand Up @@ -1122,6 +1138,7 @@ def set_redact_pii(
self,
enable: Optional[bool] = True,
redact_audio: Optional[bool] = None,
redact_audio_quality: Optional[PIIRedactedAudioQuality] = None,
policies: Optional[List[PIIRedactionPolicy]] = None,
substitution: Optional[PIISubstitutionPolicy] = None,
) -> Self:
Expand All @@ -1131,13 +1148,15 @@ def set_redact_pii(
Args:
enable: whether to enable or disable the PII Redaction feature.
redact_audio: Generate a copy of the original media file with spoken PII 'beeped' out. NOTE: The copy is available for 24h
redact_audio_quality: The quality of the redacted audio file in case `redact_audio` is enabled.
policies: A list of PII redaction policies to enable.
substitution: The replacement logic for detected PII (`PIISubstutionPolicy.hash` by default).
"""

if not enable:
self._raw_transcription_config.redact_pii = None
self._raw_transcription_config.redact_pii_audio = None
self._raw_transcription_config.redact_pii_audio_quality = None
self._raw_transcription_config.redact_pii_policies = None
self._raw_transcription_config.redact_pii_sub = None

Expand All @@ -1148,6 +1167,7 @@ def set_redact_pii(

self._raw_transcription_config.redact_pii = True
self._raw_transcription_config.redact_pii_audio = redact_audio
self._raw_transcription_config.redact_pii_audio_quality = redact_audio_quality
self._raw_transcription_config.redact_pii_policies = policies
self._raw_transcription_config.redact_pii_sub = substitution

Expand Down Expand Up @@ -1527,6 +1547,8 @@ class BaseTranscript(BaseModel):
"Redact PII from the transcribed text."
redact_pii_audio: Optional[bool]
"Generate a copy of the original media file with spoken PII 'beeped' out."
redact_pii_audio_quality: Optional[PIIRedactedAudioQuality]
"The quality of the redacted audio file in case `redact_pii_audio` is enabled."
redact_pii_policies: Optional[List[PIIRedactionPolicy]]
"The list of PII Redaction policies to enable."
redact_pii_sub: Optional[PIISubstitutionPolicy]
Expand Down

0 comments on commit 743b15b

Please sign in to comment.