Skip to content
This repository has been archived by the owner on Oct 9, 2023. It is now read-only.

speech recognition auto processor #1075

Merged
merged 7 commits into from
Dec 15, 2021
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 11 additions & 2 deletions flash/audio/speech_recognition/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@
)

if _AUDIO_AVAILABLE:
from transformers import Wav2Vec2Processor
from transformers import AutoProcessor


class SpeechRecognition(Task):
Expand All @@ -64,6 +64,7 @@ class SpeechRecognition(Task):
def __init__(
self,
backbone: str = "facebook/wav2vec2-base-960h",
processor_backbone: str = None,
optimizer: OPTIMIZER_TYPE = "Adam",
lr_scheduler: LR_SCHEDULER_TYPE = None,
learning_rate: float = 1e-5,
Expand All @@ -89,7 +90,15 @@ def __init__(
self.save_hyperparameters()

self.set_state(SpeechRecognitionBackboneState(backbone))
self.set_state(CollateFn(DataCollatorCTCWithPadding(Wav2Vec2Processor.from_pretrained(backbone))))
self.set_state(
CollateFn(
DataCollatorCTCWithPadding(
AutoProcessor.from_pretrained(backbone)
if processor_backbone is None
else AutoProcessor.from_pretrained(processor_backbone)
)
)
)

def forward(self, batch: Dict[str, torch.Tensor]):
return self.model(batch["input_values"])
Expand Down
1 change: 1 addition & 0 deletions flash/image/detection/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,7 @@ def from_voc(
) -> "ObjectDetectionData":
"""Creates a :class:`~flash.image.detection.data.ObjectDetectionData` object from the given data folders
and annotation files in the `PASCAL VOC (Visual Obect Challenge)

<http://host.robots.ox.ac.uk/pascal/VOC/>`_ XML format.

Args:
Expand Down