diff --git a/com.ibm.streamsx.sttgateway/CHANGELOG.md b/com.ibm.streamsx.sttgateway/CHANGELOG.md
index 2054e7e..f1e0edf 100644
--- a/com.ibm.streamsx.sttgateway/CHANGELOG.md
+++ b/com.ibm.streamsx.sttgateway/CHANGELOG.md
@@ -1,5 +1,10 @@
# Changes
+## v2.3.2
+* Jan/10/2022
+* Fixed a problem where the call start date time values were not always correctly included in the STT result.
+* Added these three new parameters to the WatsonSTT operator: speechDetectorSensitivity, backgroundAudioSuppression, characterInsertionBias
+
## v2.3.1
* Sep/20/2021
* Dynamically change the maximum concurrent calls allowed value.
diff --git a/com.ibm.streamsx.sttgateway/com.ibm.streamsx.sttgateway.watson/WatsonSTT/WatsonSTT.xml b/com.ibm.streamsx.sttgateway/com.ibm.streamsx.sttgateway.watson/WatsonSTT/WatsonSTT.xml
index eb50c90..f6a0c8f 100644
--- a/com.ibm.streamsx.sttgateway/com.ibm.streamsx.sttgateway.watson/WatsonSTT/WatsonSTT.xml
+++ b/com.ibm.streamsx.sttgateway/com.ibm.streamsx.sttgateway.watson/WatsonSTT/WatsonSTT.xml
@@ -510,7 +510,7 @@
customizationWeight
- This parameter specifies a relative weight for a custom language model as a float64 between 0.0 to 1.0 (Default is 0.0)
+ This parameter specifies a relative weight for a custom language model as a float64 from 0.0 to 1.0 (Default is 0.0)
true
true
AttributeFree
@@ -666,6 +666,36 @@
1
+
+ speechDetectorSensitivity
+ This parameter specifies a float64 value from 0.0 to 1.0 to adjust the sensitivity of speech activity detection (Default is 0.5)
+ true
+ true
+ AttributeFree
+ float64
+ 1
+
+
+
+ backgroundAudioSuppression
+ This parameter specifies a float64 value from 0.0 to 1.0 to suppress side conversations or background noise (Default is 0.0)
+ true
+ true
+ AttributeFree
+ float64
+ 1
+
+
+
+ characterInsertionBias
+ This parameter specifies a float64 value from -0.5 to 1.0 to change how prone the STT engine is to insert more transcribed characters (Default is 0.0)
+ true
+ true
+ AttributeFree
+ float64
+ 1
+
+
diff --git a/com.ibm.streamsx.sttgateway/com.ibm.streamsx.sttgateway.watson/WatsonSTT/WatsonSTT_cpp.cgt b/com.ibm.streamsx.sttgateway/com.ibm.streamsx.sttgateway.watson/WatsonSTT/WatsonSTT_cpp.cgt
index 7519566..3550835 100644
--- a/com.ibm.streamsx.sttgateway/com.ibm.streamsx.sttgateway.watson/WatsonSTT/WatsonSTT_cpp.cgt
+++ b/com.ibm.streamsx.sttgateway/com.ibm.streamsx.sttgateway.watson/WatsonSTT/WatsonSTT_cpp.cgt
@@ -8,7 +8,7 @@
/*
============================================================
First created on: Jul/01/2018
-Last modified on: Sep/12/2021
+Last modified on: Jan/04/2022
Please refer to the sttgateway-tech-brief.txt file in the
top-level directory of this toolkit to read about
@@ -459,6 +459,18 @@ https://cloud.ibm.com/docs/services/speech-to-text?topic=speech-to-text-websocke
my $sttLiveMetricsUpdateNeeded = $model->getParameterByName("sttLiveMetricsUpdateNeeded");
$sttLiveMetricsUpdateNeeded = $sttLiveMetricsUpdateNeeded ? $sttLiveMetricsUpdateNeeded->getValueAt(0)->getCppExpression() : 1;
+
+ my $speechDetectorSensitivity = $model->getParameterByName("speechDetectorSensitivity");
+ # Default: 0.5
+ $speechDetectorSensitivity = $speechDetectorSensitivity ? $speechDetectorSensitivity->getValueAt(0)->getCppExpression() : 0.5;
+
+ my $backgroundAudioSuppression = $model->getParameterByName("backgroundAudioSuppression");
+ # Default: 0.0
+ $backgroundAudioSuppression = $backgroundAudioSuppression ? $backgroundAudioSuppression->getValueAt(0)->getCppExpression() : 0.0;
+
+ my $characterInsertionBias = $model->getParameterByName("characterInsertionBias");
+ # Default: 0.0
+ $characterInsertionBias = $characterInsertionBias ? $characterInsertionBias->getValueAt(0)->getCppExpression() : 0.0;
%>
#include
@@ -501,7 +513,10 @@ MY_OPERATOR::MY_OPERATOR()
<%=$redactionNeeded%>,
<%=$keywordsSpottingThreshold%>,
<%=$keywordsToBeSpotted%>,
- <%=$isTranscriptionCompletedRequested%>
+ <%=$isTranscriptionCompletedRequested%>,
+ <%=$speechDetectorSensitivity%>,
+ <%=$backgroundAudioSuppression%>,
+ <%=$characterInsertionBias%>
}
)
{}
diff --git a/com.ibm.streamsx.sttgateway/impl/include/WatsonSTTConfig.hpp b/com.ibm.streamsx.sttgateway/impl/include/WatsonSTTConfig.hpp
index 0816d27..4c48fed 100644
--- a/com.ibm.streamsx.sttgateway/impl/include/WatsonSTTConfig.hpp
+++ b/com.ibm.streamsx.sttgateway/impl/include/WatsonSTTConfig.hpp
@@ -47,6 +47,9 @@ struct WatsonSTTConfig {
SPL::float64 keywordsSpottingThreshold;
const SPL::list keywordsToBeSpotted;
const bool isTranscriptionCompletedRequested;
+ SPL::float64 speechDetectorSensitivity;
+ SPL::float64 backgroundAudioSuppression;
+ SPL::float64 characterInsertionBias;
// Some definitions
//This time becomes effective, when the connectionAttemptsThreshold limit is exceeded
diff --git a/com.ibm.streamsx.sttgateway/impl/include/WatsonSTTImpl.hpp b/com.ibm.streamsx.sttgateway/impl/include/WatsonSTTImpl.hpp
index b2e473d..34324b4 100644
--- a/com.ibm.streamsx.sttgateway/impl/include/WatsonSTTImpl.hpp
+++ b/com.ibm.streamsx.sttgateway/impl/include/WatsonSTTImpl.hpp
@@ -232,6 +232,18 @@ WatsonSTTImpl::WatsonSTTImpl(OP & splOperator_,Conf config_)
throw std::runtime_error(STTGW_INVALID_PARAM_VALUE_3("WatsonSTT", Conf::keywordsSpottingThreshold, "keywordsSpottingThreshold"));
}
+ if (Conf::speechDetectorSensitivity < 0.0 || Conf::speechDetectorSensitivity > 1.0) {
+ throw std::runtime_error(STTGW_INVALID_PARAM_VALUE_3("WatsonSTT", Conf::speechDetectorSensitivity, "speechDetectorSensitivity"));
+ }
+
+ if (Conf::backgroundAudioSuppression < 0.0 || Conf::backgroundAudioSuppression > 1.0) {
+ throw std::runtime_error(STTGW_INVALID_PARAM_VALUE_3("WatsonSTT", Conf::backgroundAudioSuppression, "backgroundAudioSuppression"));
+ }
+
+ if (Conf::characterInsertionBias < -0.5 || Conf::characterInsertionBias > 1.0) {
+ throw std::runtime_error(STTGW_INVALID_PARAM_VALUE_3("WatsonSTT", Conf::characterInsertionBias, "characterInsertionBias"));
+ }
+
// If the keywords to be spotted list is empty, then disable keywords_spotting.
if (Conf::keywordsToBeSpotted.size() == 0) {
Conf::keywordsSpottingThreshold = 0.0;
@@ -246,9 +258,9 @@ WatsonSTTImpl::WatsonSTTImpl(OP & splOperator_,Conf config_)
}
// The parameters maxUtteranceAlternatives, wordAlternativesThreshold, keywordsSpottingThreshold, keywordsToBeSpotted
- // are not available in sttResultMose complete
+ // are not available in sttResultMode complete
// The COF getUtteranceNumber, isFinalizedUtterance, getConfidence, getUtteranceAlternatives
- // are not available in sttResultMose complete
+ // are not available in sttResultMode complete
// Update the operator metric.
sttOutputResultModeMetric->setValueNoLock(Conf::sttOutputResultMode);
@@ -284,6 +296,9 @@ WatsonSTTImpl::WatsonSTTImpl(OP & splOperator_,Conf config_)
<< "\nkeywordsSpottingThreshold = " << Conf::keywordsSpottingThreshold
<< "\nkeywordsToBeSpotted = " << Conf::keywordsToBeSpotted
<< "\nisTranscriptionCompletedRequested = " << Conf::isTranscriptionCompletedRequested
+ << "\nspeechDetectorSensitivity = " << Conf::speechDetectorSensitivity
+ << "\nbackgroundAudioSuppression = " << Conf::backgroundAudioSuppression
+ << "\ncharacterInsertionBias = " << Conf::characterInsertionBias
<< "\nconnectionState.wsState.is_lock_free() = " << Rec::wsState.is_lock_free()
<< "\nrecentOTuple.is_lock_free() = " << Rec::recentOTuple.is_lock_free()
<< "\n----------------------------------------------------------------" << std::endl;
diff --git a/com.ibm.streamsx.sttgateway/impl/include/WatsonSTTImplReceiver.hpp b/com.ibm.streamsx.sttgateway/impl/include/WatsonSTTImplReceiver.hpp
index 310bd3a..b7dabe7 100644
--- a/com.ibm.streamsx.sttgateway/impl/include/WatsonSTTImplReceiver.hpp
+++ b/com.ibm.streamsx.sttgateway/impl/include/WatsonSTTImplReceiver.hpp
@@ -578,6 +578,20 @@ void WatsonSTTImplReceiver::on_open(client* c, websocketpp::connection_h
msg += ", \"redaction\" : true";
}
+ // https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-detection
+ if (speechDetectorSensitivity >= 0.0) {
+ msg += ", \"speech_detector_sensitivity\" : " + boost::to_string(speechDetectorSensitivity);
+ }
+
+ // https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-detection
+ if (backgroundAudioSuppression >= 0.0) {
+ msg += ", \"background_audio_suppression\" : " + boost::to_string(backgroundAudioSuppression);
+ }
+
+ if (characterInsertionBias >= -5.0) {
+ msg += ", \"character_insertion_bias\" : " + boost::to_string(characterInsertionBias);
+ }
+
// https://cloud.ibm.com/docs/services/speech-to-text?topic=speech-to-text-output#keyword_spotting
if (keywordsSpottingThreshold > 0.0) {
msg += ", \"keywords_threshold\" : " + boost::to_string(keywordsSpottingThreshold);
diff --git a/com.ibm.streamsx.sttgateway/info.xml b/com.ibm.streamsx.sttgateway/info.xml
index 3e4b864..dc532b6 100644
--- a/com.ibm.streamsx.sttgateway/info.xml
+++ b/com.ibm.streamsx.sttgateway/info.xml
@@ -14,7 +14,7 @@
**Note:** This toolkit requires c++11 support.
- 2.3.1
+ 2.3.2
4.2.1.6
diff --git a/samples/VgwDataRouterToWatsonSTT/com.ibm.streamsx.sttgateway.sample.watsonstt/VgwDataRouterToWatsonSTT.spl b/samples/VgwDataRouterToWatsonSTT/com.ibm.streamsx.sttgateway.sample.watsonstt/VgwDataRouterToWatsonSTT.spl
index 31b45d6..31f576e 100644
--- a/samples/VgwDataRouterToWatsonSTT/com.ibm.streamsx.sttgateway.sample.watsonstt/VgwDataRouterToWatsonSTT.spl
+++ b/samples/VgwDataRouterToWatsonSTT/com.ibm.streamsx.sttgateway.sample.watsonstt/VgwDataRouterToWatsonSTT.spl
@@ -1,14 +1,14 @@
/*
==============================================
# Licensed Materials - Property of IBM
-# Copyright IBM Corp. 2018, 2021
+# Copyright IBM Corp. 2018, 2022
==============================================
*/
/*
==============================================
First created on: Nov/27/2020
-Last modified on: Sep/20/2021
+Last modified on: Jan/08/2022
IMPORTANT NOTE
--------------
@@ -846,6 +846,10 @@ public composite VgwDataRouterToWatsonSTT {
// After getting released, such UDP channels will become available for
// doing speech to text work for any new voice calls.
mutable map _vgwSessionToCompletedUdpChannelMap = {};
+ // This map tells us the call start date time string for a given vgwSessionId.
+ mutable map _vgwSessionToCallStartDateTime = {};
+ // This map tells us the call start time in epoch seconds for a given vgwSessionId.
+ mutable map _vgwSessionToCallStartTimeInEpochSeconds = {};
mutable BinarySpeech_t _oTuple = {};
mutable rstring _key = "";
}
@@ -994,6 +998,10 @@ public composite VgwDataRouterToWatsonSTT {
if(BSD.callStartTimeInEpochSeconds == 0l) {
BSD.callStartTimeInEpochSeconds = getSeconds(getTimestamp());
}
+
+ // Insert the call start date time values in the state variables.
+ insertM(_vgwSessionToCallStartDateTime, BSD.vgwSessionId, BSD.callStartDateTime);
+ insertM(_vgwSessionToCallStartTimeInEpochSeconds, BSD.vgwSessionId, BSD.callStartTimeInEpochSeconds);
rstring socsFileName = dataDirectory() + "/" +
BSD.vgwSessionId + "-call-started.txt";
@@ -1055,6 +1063,12 @@ public composite VgwDataRouterToWatsonSTT {
(rstring)BSD.totalSpeechDataBytesReceived +
", speechEngineId=" + (rstring)BSD.speechEngineId +
", speechResultProcessorId=" + (rstring)BSD.speechResultProcessorId);
+ // Set the call start date time values to the tuple attributes.
+ if(has(_vgwSessionToCallStartDateTime, BSD.vgwSessionId) == true) {
+ BSD.callStartDateTime = _vgwSessionToCallStartDateTime[BSD.vgwSessionId];
+ BSD.callStartTimeInEpochSeconds = _vgwSessionToCallStartTimeInEpochSeconds[BSD.vgwSessionId];
+ }
+
// Submit this tuple.
submit(BSD, BSDF);
} else {
@@ -1209,6 +1223,12 @@ public composite VgwDataRouterToWatsonSTT {
// We are done. Remove it from the map as well.
removeM(_vgwSessionToCompletedUdpChannelMap, key2);
}
+
+ // Remove the call start date time values from the state variables.
+ if(has(_vgwSessionToCallStartDateTime, BSD.vgwSessionId) == true) {
+ removeM(_vgwSessionToCallStartDateTime, BSD.vgwSessionId);
+ removeM(_vgwSessionToCallStartTimeInEpochSeconds, BSD.vgwSessionId);
+ }
// At this time, the voice call for this VGW session id has ended.
// We can now write an "End of Call" indicator file in the
@@ -1353,6 +1373,15 @@ public composite IBMWatsonSpeechToText(input AudioBlobContent, AccessToken;
(boolean)getSubmissionTimeValue("smartFormattingNeeded", "false");
expression $redactionNeeded :
(boolean)getSubmissionTimeValue("redactionNeeded", "false");
+ // Allowed value range for this is from 0.0 to 1.0.
+ expression $speechDetectorSensitivity :
+ (float64)getSubmissionTimeValue("speechDetectorSensitivity", "0.5");
+ // Allowed value range for this is from 0.0 to 1.0.
+ expression $backgroundAudioSuppression :
+ (float64)getSubmissionTimeValue("backgroundAudioSuppression", "0.0");
+ // Allowed value range for this is from -0.5 to 1.0.
+ expression $characterInsertionBias :
+ (float64)getSubmissionTimeValue("characterInsertionBias", "0.0");
expression $keywordsSpottingThreshold :
(float64)getSubmissionTimeValue("keywordsSpottingThreshold", "0.0");
expression> $keywordsToBeSpotted :
@@ -1466,6 +1495,9 @@ public composite IBMWatsonSpeechToText(input AudioBlobContent, AccessToken;
wordAlternativesThreshold: $wordAlternativesThreshold;
smartFormattingNeeded: $smartFormattingNeeded;
redactionNeeded: $redactionNeeded;
+ speechDetectorSensitivity: $speechDetectorSensitivity;
+ backgroundAudioSuppression: $backgroundAudioSuppression;
+ characterInsertionBias: $characterInsertionBias;
keywordsSpottingThreshold: $keywordsSpottingThreshold;
keywordsToBeSpotted: $keywordsToBeSpotted;
websocketLoggingNeeded: $sttWebsocketLoggingNeeded;
@@ -1685,11 +1717,16 @@ public composite STTResultProcessor(input MyTranscriptionResult, BinarySpeechDat
MTR.callStartTimeInEpochSeconds = _callStartTimeInEpochSeconds;
}
- // If the user opted for include the time of the
+ // If the user opted for including the time of the
// utterance result reception time, let us add it to
// the transcription result.
if($includeUtteranceResultReceptionTime == true) {
+ // This is the current time expressed in ctime format.
MTR.utteranceResultReceptionTime = ctime(getTimestamp());
+ // We will also do the utterance reception time expressed
+ // in seconds elapsed since the start of the call.
+ MTR.utteranceRxTime =
+ getSeconds(getTimestamp()) - MTR.callStartTimeInEpochSeconds;
}
// We will write the transcription results to
diff --git a/samples/VgwDataRouterToWatsonSTT/info.xml b/samples/VgwDataRouterToWatsonSTT/info.xml
index fa23913..2817911 100644
--- a/samples/VgwDataRouterToWatsonSTT/info.xml
+++ b/samples/VgwDataRouterToWatsonSTT/info.xml
@@ -4,13 +4,13 @@
VgwDataRouterToWatsonSTT
Example that showcases STT on Cloud and STT on CP4D
- 1.0.4
+ 1.0.5
4.2.1.6
com.ibm.streamsx.sttgateway
- [2.3.1,7.0.0]
+ [2.3.2,7.0.0]
com.ibm.streamsx.json
diff --git a/samples/VgwDataRouterToWatsonSTTMini/com.ibm.streamsx.sttgateway.sample.watsonstt/VgwDataRouterToWatsonSTT.spl b/samples/VgwDataRouterToWatsonSTTMini/com.ibm.streamsx.sttgateway.sample.watsonstt/VgwDataRouterToWatsonSTT.spl
index 913bb08..005d0af 100644
--- a/samples/VgwDataRouterToWatsonSTTMini/com.ibm.streamsx.sttgateway.sample.watsonstt/VgwDataRouterToWatsonSTT.spl
+++ b/samples/VgwDataRouterToWatsonSTTMini/com.ibm.streamsx.sttgateway.sample.watsonstt/VgwDataRouterToWatsonSTT.spl
@@ -1,14 +1,14 @@
/*
==============================================
# Licensed Materials - Property of IBM
-# Copyright IBM Corp. 2018, 2021
+# Copyright IBM Corp. 2018, 2022
==============================================
*/
/*
==============================================
First created on: Nov/27/2020
-Last modified on: Sep/20/2021
+Last modified on: Jan/08/2022
IMPORTANT NOTE
--------------
@@ -342,6 +342,9 @@ type MySTTResult_t = rstring vgwSessionId, boolean isCustomerSpeechData,
* @param wordAlternativesThreshold wordAlternativesThreshold
* @param smartFormattingNeeded smartFormattingNeeded
* @param redactionNeeded redactionNeeded
+ * @param speechDetectorSensitivity speechDetectorSensitivity
+ * @param backgroundAudioSuppression backgroundAudioSuppression
+ * @param characterInsertionBias characterInsertionBias
* @param keywordsSpottingThreshold keywordsSpottingThreshold
* @param keywordsToBeSpotted keywordsToBeSpotted
* @param sttWebsocketLoggingNeeded sttWebsocketLoggingNeeded
@@ -760,6 +763,10 @@ public composite VgwDataRouterToWatsonSTT {
// After getting released, such UDP channels will become available for
// doing speech to text work for any new voice calls.
mutable map _vgwSessionToCompletedUdpChannelMap = {};
+ // This map tells us the call start date time string for a given vgwSessionId.
+ mutable map _vgwSessionToCallStartDateTime = {};
+ // This map tells us the call start time in epoch seconds for a given vgwSessionId.
+ mutable map _vgwSessionToCallStartTimeInEpochSeconds = {};
mutable BinarySpeech_t _oTuple = {};
mutable rstring _key = "";
}
@@ -909,6 +916,10 @@ public composite VgwDataRouterToWatsonSTT {
BSD.callStartTimeInEpochSeconds = getSeconds(getTimestamp());
}
+ // Insert the call start date time values in the state variables.
+ insertM(_vgwSessionToCallStartDateTime, BSD.vgwSessionId, BSD.callStartDateTime);
+ insertM(_vgwSessionToCallStartTimeInEpochSeconds, BSD.vgwSessionId, BSD.callStartTimeInEpochSeconds);
+
rstring socsFileName = dataDirectory() + "/" +
BSD.vgwSessionId + "-call-started.txt";
uint64 fileHandle = fopen (socsFileName, "w+", err);
@@ -969,6 +980,12 @@ public composite VgwDataRouterToWatsonSTT {
(rstring)BSD.totalSpeechDataBytesReceived +
", speechEngineId=" + (rstring)BSD.speechEngineId +
", speechResultProcessorId=" + (rstring)BSD.speechResultProcessorId);
+ // Set the call start date time values to the tuple attributes.
+ if(has(_vgwSessionToCallStartDateTime, BSD.vgwSessionId) == true) {
+ BSD.callStartDateTime = _vgwSessionToCallStartDateTime[BSD.vgwSessionId];
+ BSD.callStartTimeInEpochSeconds = _vgwSessionToCallStartTimeInEpochSeconds[BSD.vgwSessionId];
+ }
+
// Submit this tuple.
submit(BSD, BSDF);
} else {
@@ -1119,6 +1136,12 @@ public composite VgwDataRouterToWatsonSTT {
removeM(_vgwSessionToCompletedUdpChannelMap, key2);
}
+ // Remove the call start date time values from the state variables.
+ if(has(_vgwSessionToCallStartDateTime, BSD.vgwSessionId) == true) {
+ removeM(_vgwSessionToCallStartDateTime, BSD.vgwSessionId);
+ removeM(_vgwSessionToCallStartTimeInEpochSeconds, BSD.vgwSessionId);
+ }
+
// At this time, the voice call for this VGW session id has ended.
// We can now write an "End of Call" indicator file in the
// application's data directory. e-g: 5362954-call-completed.txt
@@ -1250,6 +1273,15 @@ public composite IBMWatsonSpeechToText(input AudioBlobContent, AccessToken;
(boolean)getSubmissionTimeValue("smartFormattingNeeded", "false");
expression $redactionNeeded :
(boolean)getSubmissionTimeValue("redactionNeeded", "false");
+ // Allowed value range for this is from 0.0 to 1.0.
+ expression $speechDetectorSensitivity :
+ (float64)getSubmissionTimeValue("speechDetectorSensitivity", "0.5");
+ // Allowed value range for this is from 0.0 to 1.0.
+ expression $backgroundAudioSuppression :
+ (float64)getSubmissionTimeValue("backgroundAudioSuppression", "0.0");
+ // Allowed value range for this is from -0.5 to 1.0.
+ expression $characterInsertionBias :
+ (float64)getSubmissionTimeValue("characterInsertionBias", "0.0");
expression $keywordsSpottingThreshold :
(float64)getSubmissionTimeValue("keywordsSpottingThreshold", "0.0");
expression> $keywordsToBeSpotted :
@@ -1363,6 +1395,9 @@ public composite IBMWatsonSpeechToText(input AudioBlobContent, AccessToken;
wordAlternativesThreshold: $wordAlternativesThreshold;
smartFormattingNeeded: $smartFormattingNeeded;
redactionNeeded: $redactionNeeded;
+ speechDetectorSensitivity: $speechDetectorSensitivity;
+ backgroundAudioSuppression: $backgroundAudioSuppression;
+ characterInsertionBias: $characterInsertionBias;
keywordsSpottingThreshold: $keywordsSpottingThreshold;
keywordsToBeSpotted: $keywordsToBeSpotted;
websocketLoggingNeeded: $sttWebsocketLoggingNeeded;
@@ -1505,7 +1540,7 @@ public composite STTResultProcessor(input MyTranscriptionResult) {
MTR.callStartTimeInEpochSeconds = _callStartTimeInEpochSeconds;
}
- // If the user opted for include the time of the
+ // If the user opted for including the time of the
// utterance result reception time, let us add it to
// the transcription result.
if($includeUtteranceResultReceptionTime == true) {
@@ -1535,8 +1570,8 @@ public composite STTResultProcessor(input MyTranscriptionResult) {
// replace this operator with an Export operator.
// But, exporting from multiple speech processors may not be
// the right approach. A preferred and better approach would be
- // to use a WebSocketSink operator to send the utterances to be
- // received on the other end by a WebSocketSendReceive operator.
+ // to use a WebSocketSendReceive operator to send the utterances to be
+ // received on the other end by a WebSocketSource operator.
// That approach will scale well without putting more stress on the
// Streams instance since the data transfer is done outside the
// confines of the Streams runtime. If more details are needed about
diff --git a/samples/VgwDataRouterToWatsonSTTMini/info.xml b/samples/VgwDataRouterToWatsonSTTMini/info.xml
index 85bbe24..9556d23 100644
--- a/samples/VgwDataRouterToWatsonSTTMini/info.xml
+++ b/samples/VgwDataRouterToWatsonSTTMini/info.xml
@@ -3,13 +3,13 @@
VgwDataRouterToWatsonSTTMini
Example that showcases STT on Cloud and STT on CP4D
- 1.0.4
+ 1.0.5
4.2.1.6
com.ibm.streamsx.sttgateway
- [2.3.1,7.0.0]
+ [2.3.2,7.0.0]
com.ibm.streamsx.websocket
diff --git a/samples/VoiceGatewayToStreamsToWatsonSTT/com.ibm.streamsx.sttgateway.sample.watsonstt/VoiceGatewayToStreamsToWatsonSTT.spl b/samples/VoiceGatewayToStreamsToWatsonSTT/com.ibm.streamsx.sttgateway.sample.watsonstt/VoiceGatewayToStreamsToWatsonSTT.spl
index 8c7b74b..48306d9 100644
--- a/samples/VoiceGatewayToStreamsToWatsonSTT/com.ibm.streamsx.sttgateway.sample.watsonstt/VoiceGatewayToStreamsToWatsonSTT.spl
+++ b/samples/VoiceGatewayToStreamsToWatsonSTT/com.ibm.streamsx.sttgateway.sample.watsonstt/VoiceGatewayToStreamsToWatsonSTT.spl
@@ -1,14 +1,14 @@
/*
==============================================
# Licensed Materials - Property of IBM
-# Copyright IBM Corp. 2018, 2021
+# Copyright IBM Corp. 2018, 2022
==============================================
*/
/*
==============================================
First created on: Sep/25/2019
-Last modified on: Sep/20/2021
+Last modified on: Jan/08/2022
A) What does this example application do?
--------------------------------------
@@ -310,6 +310,9 @@ type MySTTResult_t = rstring vgwSessionId, boolean isCustomerSpeechData,
* @param wordAlternativesThreshold wordAlternativesThreshold
* @param smartFormattingNeeded smartFormattingNeeded
* @param redactionNeeded redactionNeeded
+ * @param speechDetectorSensitivity speechDetectorSensitivity
+ * @param backgroundAudioSuppression backgroundAudioSuppression
+ * @param characterInsertionBias characterInsertionBias
* @param keywordsSpottingThreshold keywordsSpottingThreshold
* @param keywordsToBeSpotted keywordsToBeSpotted
* @param sttWebsocketLoggingNeeded sttWebsocketLoggingNeeded
@@ -690,6 +693,10 @@ public composite VoiceGatewayToStreamsToWatsonSTT {
// After getting released, such UDP channels will become available for
// doing speech to text work for any new voice calls.
mutable map _vgwSessionToCompletedUdpChannelMap = {};
+ // This map tells us the call start date time string for a given vgwSessionId.
+ mutable map _vgwSessionToCallStartDateTime = {};
+ // This map tells us the call start time in epoch seconds for a given vgwSessionId.
+ mutable map _vgwSessionToCallStartTimeInEpochSeconds = {};
mutable BinarySpeech_t _oTuple = {};
mutable rstring _key = "";
}
@@ -839,6 +846,10 @@ public composite VoiceGatewayToStreamsToWatsonSTT {
BSD.callStartTimeInEpochSeconds = getSeconds(getTimestamp());
}
+ // Insert the call start date time values in the state variables.
+ insertM(_vgwSessionToCallStartDateTime, BSD.vgwSessionId, BSD.callStartDateTime);
+ insertM(_vgwSessionToCallStartTimeInEpochSeconds, BSD.vgwSessionId, BSD.callStartTimeInEpochSeconds);
+
rstring socsFileName = dataDirectory() + "/" +
BSD.vgwSessionId + "-call-started.txt";
uint64 fileHandle = fopen (socsFileName, "w+", err);
@@ -895,6 +906,12 @@ public composite VoiceGatewayToStreamsToWatsonSTT {
(rstring)BSD.totalSpeechDataBytesReceived +
", sttEngineId=" + (rstring)BSD.sttEngineId +
", sttResultProcessorId=" + (rstring)BSD.sttResultProcessorId);
+ // Set the call start date time values to the tuple attributes.
+ if(has(_vgwSessionToCallStartDateTime, BSD.vgwSessionId) == true) {
+ BSD.callStartDateTime = _vgwSessionToCallStartDateTime[BSD.vgwSessionId];
+ BSD.callStartTimeInEpochSeconds = _vgwSessionToCallStartTimeInEpochSeconds[BSD.vgwSessionId];
+ }
+
// Submit this tuple.
submit(BSD, BSDF);
} else {
@@ -1048,6 +1065,12 @@ public composite VoiceGatewayToStreamsToWatsonSTT {
// We are done. Remove it from the map as well.
removeM(_vgwSessionToCompletedUdpChannelMap, key2);
}
+
+ // Remove the call start date time values from the state variables.
+ if(has(_vgwSessionToCallStartDateTime, BSD.vgwSessionId) == true) {
+ removeM(_vgwSessionToCallStartDateTime, BSD.vgwSessionId);
+ removeM(_vgwSessionToCallStartTimeInEpochSeconds, BSD.vgwSessionId);
+ }
// At this time, the voice call for this VGW session id has ended.
// We can now write an "End of Call" indicator file in the
@@ -1193,6 +1216,15 @@ public composite IBMWatsonSpeechToText(input AudioBlobContent, AccessToken;
(boolean)getSubmissionTimeValue("smartFormattingNeeded", "false");
expression $redactionNeeded :
(boolean)getSubmissionTimeValue("redactionNeeded", "false");
+ // Allowed value range for this is from 0.0 to 1.0.
+ expression $speechDetectorSensitivity :
+ (float64)getSubmissionTimeValue("speechDetectorSensitivity", "0.5");
+ // Allowed value range for this is from 0.0 to 1.0.
+ expression $backgroundAudioSuppression :
+ (float64)getSubmissionTimeValue("backgroundAudioSuppression", "0.0");
+ // Allowed value range for this is from -0.5 to 1.0.
+ expression $characterInsertionBias :
+ (float64)getSubmissionTimeValue("characterInsertionBias", "0.0");
expression $keywordsSpottingThreshold :
(float64)getSubmissionTimeValue("keywordsSpottingThreshold", "0.0");
expression> $keywordsToBeSpotted :
@@ -1306,6 +1338,9 @@ public composite IBMWatsonSpeechToText(input AudioBlobContent, AccessToken;
wordAlternativesThreshold: $wordAlternativesThreshold;
smartFormattingNeeded: $smartFormattingNeeded;
redactionNeeded: $redactionNeeded;
+ speechDetectorSensitivity: $speechDetectorSensitivity;
+ backgroundAudioSuppression: $backgroundAudioSuppression;
+ characterInsertionBias: $characterInsertionBias;
keywordsSpottingThreshold: $keywordsSpottingThreshold;
keywordsToBeSpotted: $keywordsToBeSpotted;
websocketLoggingNeeded: $sttWebsocketLoggingNeeded;
@@ -1525,10 +1560,11 @@ public composite STTResultProcessor(input MyTranscriptionResult, BinarySpeechDat
MTR.callStartTimeInEpochSeconds = _callStartTimeInEpochSeconds;
}
- // If the user opted for include the time of the
+ // If the user opted for including the time of the
// utterance result reception time, let us add it to
// the transcription result.
if($includeUtteranceResultReceptionTime == true) {
+ // This is the current time expressed in ctime format.
MTR.utteranceResultReceptionTime = ctime(getTimestamp());
// We will also do the utterance reception time expressed
// in seconds elapsed since the start of the call.
diff --git a/samples/VoiceGatewayToStreamsToWatsonSTT/info.xml b/samples/VoiceGatewayToStreamsToWatsonSTT/info.xml
index 9f531c2..15597c9 100644
--- a/samples/VoiceGatewayToStreamsToWatsonSTT/info.xml
+++ b/samples/VoiceGatewayToStreamsToWatsonSTT/info.xml
@@ -3,13 +3,13 @@
VoiceGatewayToStreamsToWatsonSTT
Example that showcases STT on Cloud and STT on CP4D
- 1.0.7
+ 1.0.8
4.2.1.6
com.ibm.streamsx.sttgateway
- [2.3.1,7.0.0)
+ [2.3.2,7.0.0)
com.ibm.streamsx.json
diff --git a/sttgateway-tech-brief.txt b/sttgateway-tech-brief.txt
index fd14e28..6320aa5 100644
--- a/sttgateway-tech-brief.txt
+++ b/sttgateway-tech-brief.txt
@@ -1,6 +1,6 @@
============================================================
First created on: July/01/2018
-Last modified on: September/20/2021
+Last modified on: January/10/2022
Purpose of this toolkit
-----------------------