diff --git a/Dockerfile b/Dockerfile index 0e8696d..b8779f0 100644 --- a/Dockerfile +++ b/Dockerfile @@ -22,6 +22,7 @@ RUN pip install --upgrade pip && \ make && \ make install +RUN pip install git+https://github.com/MycroftAI/lingua-franca.git@5bfd75fe5996fd364102a0eec3f714c9ddc9275c WORKDIR /app COPY ./requirements.txt /app diff --git a/server/server.py b/server/server.py index 4dddf3c..ecd5667 100644 --- a/server/server.py +++ b/server/server.py @@ -7,6 +7,13 @@ import sys import traceback import tempfile +import datetime +import re + +# Lingua franca +from lingua_franca import load_language +from lingua_franca.format import nice_time +from lingua_franca.time import default_timezone # Libraries for multiprocessing import multiprocessing as mp @@ -59,6 +66,9 @@ # Initialize sentence segmenter segmenter = Segmenter(language="en") +# Load lingua franca language +load_language('ca-es') + def create_argparser(): def convert_boolean(x): return x.lower() in ["true", "1", "yes"] @@ -318,6 +328,29 @@ async def details(request: Request): def worker(sentence, speaker_id, model, use_aliases, new_speaker_ids): + def substitute_time(sentence): + # Regular expression to find time pattern (HH:MM) + time_pattern = re.compile(r'((?<=\s)\d{1,2}):(\d{2}(?=\s))') + + # Find all matches of time pattern in the sentence + matches = re.findall(time_pattern, sentence) + + if not matches: + return sentence + + sentence = re.sub(r'les\s+', '', sentence, count=1) + + # Iterate through matches and substitute with formatted time string + for match in matches: + H = int(match[0]) + M = int(match[1]) + dt = datetime.datetime(2017, 1, 31, H, M, 0, tzinfo=default_timezone()) # Using UTC timezone for simplicity + formatted_time = nice_time(dt, lang="ca", use_24hour=True) # Assuming you have a function to format time in Catalan + sentence = sentence.replace(f'{match[0]}:{match[1]}', formatted_time) + + return sentence + + sentence = substitute_time(sentence) print(" > Model input: {}".format(sentence)) print(" > Speaker Idx: {}".format(speaker_id))