From c51db60cb8287285a2a0141e10c7ab28169e2d97 Mon Sep 17 00:00:00 2001 From: Luis Nassif Date: Mon, 7 Oct 2024 21:27:28 -0300 Subject: [PATCH] '#1539: code formatting --- .../resources/scripts/tasks/WhisperProcess.py | 4 +- .../transcript/AbstractTranscriptTask.java | 16 ++-- .../RemoteTranscriptionService.java | 89 +++++++------------ .../transcript/WhisperTranscriptTask.java | 30 +++---- 4 files changed, 52 insertions(+), 87 deletions(-) diff --git a/iped-app/resources/scripts/tasks/WhisperProcess.py b/iped-app/resources/scripts/tasks/WhisperProcess.py index 6c669920b2..e31c9a1854 100644 --- a/iped-app/resources/scripts/tasks/WhisperProcess.py +++ b/iped-app/resources/scripts/tasks/WhisperProcess.py @@ -74,7 +74,7 @@ def main(): print(ping, file=stdout, flush=True) continue - files=line.split(",") + files = line.split(",") transcription = [] logprobs = [] for file in files: @@ -84,7 +84,7 @@ def main(): if whisperx_found: result = model.transcribe(files, batch_size=batch_size, language=language,wav=True) for segment in result['segments']: - idx=segment["audio"] + idx = segment["audio"] transcription[idx] += segment['text'] if 'avg_logprob' in segment: logprobs[idx].append(segment['avg_logprob']) diff --git a/iped-engine/src/main/java/iped/engine/task/transcript/AbstractTranscriptTask.java b/iped-engine/src/main/java/iped/engine/task/transcript/AbstractTranscriptTask.java index d3573f785d..bbd2637ea7 100644 --- a/iped-engine/src/main/java/iped/engine/task/transcript/AbstractTranscriptTask.java +++ b/iped-engine/src/main/java/iped/engine/task/transcript/AbstractTranscriptTask.java @@ -70,7 +70,7 @@ public abstract class AbstractTranscriptTask extends AbstractTask { private static final int MAX_WAV_SIZE = 16000 * 2 * MAX_WAV_TIME; protected AudioTranscriptConfig transcriptConfig; - + // Variables to store some statistics private static final AtomicLong wavTime = new AtomicLong(); private static final AtomicLong transcriptionTime = new AtomicLong(); @@ -91,8 +91,7 @@ public boolean isEnabled() { protected boolean isToProcess(IItem evidence) { - if (evidence.getLength() == null || evidence.getLength() == 0 || !evidence.isToAddToCase() - || evidence.getMetadata().get(ExtraProperties.TRANSCRIPT_ATTR) != null) { + if (evidence.getLength() == null || evidence.getLength() == 0 || !evidence.isToAddToCase() || evidence.getMetadata().get(ExtraProperties.TRANSCRIPT_ATTR) != null) { return false; } if (transcriptConfig.getSkipKnownFiles() && evidence.getExtraAttribute(HashDBLookupTask.STATUS_ATTRIBUTE) != null) { @@ -192,8 +191,7 @@ public void init(ConfigurationManager configurationManager) throws Exception { } - public static TextAndScore transcribeWavBreaking(File tmpFile, String itemPath, - Function transcribeWavPart) throws Exception { + public static TextAndScore transcribeWavBreaking(File tmpFile, String itemPath, Function transcribeWavPart) throws Exception { if (tmpFile.length() <= MAX_WAV_SIZE) { return transcribeWavPart.apply(tmpFile); } else { @@ -316,7 +314,7 @@ public void finish() throws Exception { conn.close(); conn = null; } - + long totWavConversions = wavSuccess.longValue() + wavFail.longValue(); if (totWavConversions != 0) { LOGGER.info("Total conversions to WAV: " + totWavConversions); @@ -340,8 +338,7 @@ public void finish() throws Exception { } } - protected File getTempFileToTranscript(IItem evidence, TemporaryResources tmp) - throws IOException, InterruptedException { + protected File getTempFileToTranscript(IItem evidence, TemporaryResources tmp) throws IOException, InterruptedException { long t = System.currentTimeMillis(); File tempWav = null; try { @@ -373,8 +370,7 @@ protected void process(IItem evidence) throws Exception { return; } - if (evidence.getMetadata().get(ExtraProperties.TRANSCRIPT_ATTR) != null - && evidence.getMetadata().get(ExtraProperties.CONFIDENCE_ATTR) != null) + if (evidence.getMetadata().get(ExtraProperties.TRANSCRIPT_ATTR) != null && evidence.getMetadata().get(ExtraProperties.CONFIDENCE_ATTR) != null) return; TextAndScore prevResult = getTextFromDb(evidence.getHash()); diff --git a/iped-engine/src/main/java/iped/engine/task/transcript/RemoteTranscriptionService.java b/iped-engine/src/main/java/iped/engine/task/transcript/RemoteTranscriptionService.java index e6d99db766..f67a42fef5 100644 --- a/iped-engine/src/main/java/iped/engine/task/transcript/RemoteTranscriptionService.java +++ b/iped-engine/src/main/java/iped/engine/task/transcript/RemoteTranscriptionService.java @@ -41,38 +41,28 @@ public class RemoteTranscriptionService { // 30 minutos private static final int MAX_WAV_TIME = 30 * 60; private static final int MAX_WAV_SIZE = 16000 * 2 * MAX_WAV_TIME; + static enum MESSAGES { - ACCEPTED, - AUDIO_SIZE, - BUSY, - DISCOVER, - DONE, - ERROR, - REGISTER, - STATS, - WARN, VERSION_1_1, - VERSION_1_2, - VERSION_1_0, - PING + ACCEPTED, AUDIO_SIZE, BUSY, DISCOVER, DONE, ERROR, REGISTER, STATS, WARN, VERSION_1_1, VERSION_1_2, VERSION_1_0, PING } + static class TranscribeRequest { File wavAudio; - TextAndScore result=null; - Exception error=null; - + TextAndScore result = null; + Exception error = null; public TranscribeRequest(File wavAudio) { - this.wavAudio=wavAudio; + this.wavAudio = wavAudio; } } + static class OpenConnectons { Socket conn; BufferedInputStream bis; PrintWriter writer; Thread t; File wavAudio; - TextAndScore result=null; - + TextAndScore result = null; public OpenConnectons(Socket conn, BufferedInputStream bis, PrintWriter writer, Thread t) { this.conn = conn; @@ -108,8 +98,8 @@ public void sendBeacon() { * Control number of simultaneous audio conversions to WAV. */ private static Semaphore wavConvSemaphore; - - private static int BATCH_SIZE=1; + + private static int BATCH_SIZE = 1; private static final AtomicLong audiosTranscripted = new AtomicLong(); private static final AtomicLong audiosDuration = new AtomicLong(); @@ -119,16 +109,11 @@ public void sendBeacon() { private static final AtomicLong requestsAccepted = new AtomicLong(); private static final List beaconQueq = new LinkedList<>(); private static final Deque toTranscribe = new LinkedList<>(); - private static Logger logger; private static void printHelpAndExit() { - System.out.println( - "Params: IP:Port [LocalPort]\n" - + "IP:Port IP and port of the naming node.\n" - + "LocalPort [optional] local port to listen for connections.\n" - + " If not provided, a random port will be used."); + System.out.println("Params: IP:Port [LocalPort]\n" + "IP:Port IP and port of the naming node.\n" + "LocalPort [optional] local port to listen for connections.\n" + " If not provided, a random port will be used."); System.exit(1); } @@ -169,7 +154,7 @@ public static void main(String[] args) throws Exception { AbstractTranscriptTask task = (AbstractTranscriptTask) Class.forName(audioConfig.getClassName()).getDeclaredConstructor().newInstance(); audioConfig.setEnabled(true); task.init(cm); - BATCH_SIZE=audioConfig.getBatchSize(); + BATCH_SIZE = audioConfig.getBatchSize(); int numConcurrentTranscriptions = Wav2Vec2TranscriptTask.getNumConcurrentTranscriptions(); int numLogicalCores = Runtime.getRuntime().availableProcessors(); @@ -195,10 +180,9 @@ public static void main(String[] args) throws Exception { startSendStatsThread(discoveryIp, discoveryPort, localPort, numConcurrentTranscriptions, numLogicalCores); startBeaconThread(); - for(int i=0;i(); - TranscribeRequest last=null; + TranscribeRequest last = null; if (wavFile.length() <= MAX_WAV_SIZE) { TranscribeRequest req = new TranscribeRequest(wavFile); reqs.add(req); } else { - for (File wavPart : AbstractTranscriptTask.getAudioSplits(wavFile, - wavFile.getPath(), MAX_WAV_TIME)) { + for (File wavPart : AbstractTranscriptTask.getAudioSplits(wavFile, wavFile.getPath(), MAX_WAV_TIME)) { TranscribeRequest req = new TranscribeRequest(wavPart); reqs.add(req); } @@ -501,15 +478,15 @@ public void run() { } wavFile = null; - + // dispatch all parts to be executed for (TranscribeRequest req : reqs) { synchronized (toTranscribe) { toTranscribe.add(req); } - last=req; + last = req; } - + // wait until the last wav part is transcribed synchronized (last) { last.wait(); @@ -521,7 +498,7 @@ public void run() { error = false; throw new Exception("Error processing the audio", req.error); } - + if (result.score > 0) result.text += " "; result.text += partResult.text; @@ -530,7 +507,6 @@ public void run() { } result.score /= reqs.size(); - } catch (ProcessCrashedException e) { // retry audio @@ -542,12 +518,12 @@ public void run() { executor.shutdown(); server.close(); throw e; - } + } audiosTranscripted.incrementAndGet(); audiosDuration.addAndGet(durationMillis); conversionTime.addAndGet(t1 - t0); - + logger.info(prefix + "Transcritpion done."); // removes from the beacon queue to prevent beacons in the middle of the @@ -617,7 +593,7 @@ public void run() { } }); } - + private static void startTrancribeThreads(AbstractTranscriptTask task) { executor.execute(new Runnable() { @Override @@ -628,12 +604,12 @@ public void run() { empty = toTranscribe.isEmpty(); } if (empty) { - try { + try { Thread.sleep(100); - + } catch (Exception e) { // TODO: handle exception - } + } continue; } try { @@ -646,10 +622,9 @@ public void run() { transcriptSemaphore.release(); } - } + } } }); } - } diff --git a/iped-engine/src/main/java/iped/engine/task/transcript/WhisperTranscriptTask.java b/iped-engine/src/main/java/iped/engine/task/transcript/WhisperTranscriptTask.java index f34893a203..aa519387d8 100644 --- a/iped-engine/src/main/java/iped/engine/task/transcript/WhisperTranscriptTask.java +++ b/iped-engine/src/main/java/iped/engine/task/transcript/WhisperTranscriptTask.java @@ -130,11 +130,11 @@ protected Server startServer0(int device) throws IOException { protected TextAndScore transcribeAudio(File tmpFile) throws Exception { return transcribeWavPart(tmpFile); } - + protected List transcribeAudios(ArrayList tmpFiles) throws Exception { ArrayList textAndScores = new ArrayList<>(); - for(int i=0;i transcribeAudios(ArrayList tmpFiles) throws E } StringBuilder filePaths = new StringBuilder(); - for(int i=0;i0) { - filePaths.append(","); - } + for (int i = 0; i < tmpFiles.size(); i++) { + if (i > 0) { + filePaths.append(","); + } filePaths.append(tmpFiles.get(i).getAbsolutePath().replace('\\', '/')); - - } + + } server.process.getOutputStream().write(filePaths.toString().getBytes("UTF-8")); server.process.getOutputStream().write(NEW_LINE); server.process.getOutputStream().flush(); @@ -165,10 +165,10 @@ protected List transcribeAudios(ArrayList tmpFiles) throws E throw new RuntimeException("Transcription failed, returned: " + line); } } - for(int i=0;i transcribeAudios(ArrayList tmpFiles) throws E return textAndScores; } - - @Override protected void logInputStream(InputStream is) { - List ignoreMsgs = Arrays.asList( - "With dispatcher enabled, this function is no-op. You can remove the function call.", - "torchvision is not available - cannot save figures", - "Lightning automatically upgraded your loaded checkpoint from", - "Model was trained with pyannote.audio 0.0.1, yours is", - "Model was trained with torch 1.10.0+cu102, yours is"); + List ignoreMsgs = Arrays.asList("With dispatcher enabled, this function is no-op. You can remove the function call.", "torchvision is not available - cannot save figures", + "Lightning automatically upgraded your loaded checkpoint from", "Model was trained with pyannote.audio 0.0.1, yours is", "Model was trained with torch 1.10.0+cu102, yours is"); Thread t = new Thread() { public void run() { byte[] buf = new byte[1024];