-
Notifications
You must be signed in to change notification settings - Fork 2.8k
/
Detect.java
139 lines (127 loc) · 5.54 KB
/
Detect.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
/*
* Copyright 2018 Google Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package beta.video;
import com.google.api.gax.longrunning.OperationFuture;
import com.google.cloud.videointelligence.v1p1beta1.AnnotateVideoProgress;
import com.google.cloud.videointelligence.v1p1beta1.AnnotateVideoRequest;
import com.google.cloud.videointelligence.v1p1beta1.AnnotateVideoResponse;
import com.google.cloud.videointelligence.v1p1beta1.Feature;
import com.google.cloud.videointelligence.v1p1beta1.SpeechRecognitionAlternative;
import com.google.cloud.videointelligence.v1p1beta1.SpeechTranscription;
import com.google.cloud.videointelligence.v1p1beta1.SpeechTranscriptionConfig;
import com.google.cloud.videointelligence.v1p1beta1.VideoAnnotationResults;
import com.google.cloud.videointelligence.v1p1beta1.VideoContext;
import com.google.cloud.videointelligence.v1p1beta1.VideoIntelligenceServiceClient;
import com.google.cloud.videointelligence.v1p1beta1.WordInfo;
import java.io.IOException;
import java.util.concurrent.TimeUnit;
public class Detect {
/**
* Detects video transcription using the Video Intelligence API
*
* @param args specifies features to detect and the path to the video on Google Cloud Storage.
*/
public static void main(String[] args) {
try {
argsHelper(args);
} catch (Exception e) {
System.out.println("Exception while running:\n" + e.getMessage() + "\n");
e.printStackTrace(System.out);
}
}
/**
* Helper that handles the input passed to the program.
*
* @param args specifies features to detect and the path to the video on Google Cloud Storage.
* @throws IOException on Input/Output errors.
*/
public static void argsHelper(String[] args) throws Exception {
if (args.length < 1) {
System.out.println("Usage:");
System.out.printf(
"\tjava %s \"<command>\" \"<path-to-video>\"\n"
+ "Commands:\n"
+ "\tspeech-transcription\n"
+ "Path:\n\tA URI for a Cloud Storage resource (gs://...)\n"
+ "Examples: ",
Detect.class.getCanonicalName());
return;
}
String command = args[0];
String path = args.length > 1 ? args[1] : "";
if (command.equals("speech-transcription")) {
speechTranscription(path);
}
}
// [START video_speech_transcription_gcs_beta]
/**
* Transcribe speech from a video stored on GCS.
*
* @param gcsUri the path to the video file to analyze.
*/
public static void speechTranscription(String gcsUri) throws Exception {
// Instantiate a com.google.cloud.videointelligence.v1p1beta1.VideoIntelligenceServiceClient
try (VideoIntelligenceServiceClient client = VideoIntelligenceServiceClient.create()) {
// Set the language code
SpeechTranscriptionConfig config =
SpeechTranscriptionConfig.newBuilder()
.setLanguageCode("en-US")
.setEnableAutomaticPunctuation(true)
.build();
// Set the video context with the above configuration
VideoContext context = VideoContext.newBuilder().setSpeechTranscriptionConfig(config).build();
// Create the request
AnnotateVideoRequest request =
AnnotateVideoRequest.newBuilder()
.setInputUri(gcsUri)
.addFeatures(Feature.SPEECH_TRANSCRIPTION)
.setVideoContext(context)
.build();
// asynchronously perform speech transcription on videos
OperationFuture<AnnotateVideoResponse, AnnotateVideoProgress> response =
client.annotateVideoAsync(request);
System.out.println("Waiting for operation to complete...");
// Display the results
for (VideoAnnotationResults results :
response.get(300, TimeUnit.SECONDS).getAnnotationResultsList()) {
for (SpeechTranscription speechTranscription : results.getSpeechTranscriptionsList()) {
try {
// Print the transcription
if (speechTranscription.getAlternativesCount() > 0) {
SpeechRecognitionAlternative alternative = speechTranscription.getAlternatives(0);
System.out.printf("Transcript: %s\n", alternative.getTranscript());
System.out.printf("Confidence: %.2f\n", alternative.getConfidence());
System.out.println("Word level information:");
for (WordInfo wordInfo : alternative.getWordsList()) {
double startTime =
wordInfo.getStartTime().getSeconds() + wordInfo.getStartTime().getNanos() / 1e9;
double endTime =
wordInfo.getEndTime().getSeconds() + wordInfo.getEndTime().getNanos() / 1e9;
System.out.printf(
"\t%4.2fs - %4.2fs: %s\n", startTime, endTime, wordInfo.getWord());
}
} else {
System.out.println("No transcription found");
}
} catch (IndexOutOfBoundsException ioe) {
System.out.println("Could not retrieve frame: " + ioe.getMessage());
}
}
}
}
}
// [END video_speech_transcription_gcs_beta]
}