Skip to content

Commit

Permalink
Changes done for v2.3.2.
Browse files Browse the repository at this point in the history
  • Loading branch information
nysenthil committed Jan 10, 2022
1 parent f17e1e3 commit cb7ae97
Show file tree
Hide file tree
Showing 14 changed files with 214 additions and 24 deletions.
5 changes: 5 additions & 0 deletions com.ibm.streamsx.sttgateway/CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
# Changes

## v2.3.2
* Jan/10/2022
* Fixed a problem where the call start date time values were not always correctly included in the STT result.
* Added these three new parameters to the WatsonSTT operator: speechDetectorSensitivity, backgroundAudioSuppression, characterInsertionBias

## v2.3.1
* Sep/20/2021
* Dynamically change the maximum concurrent calls allowed value.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -510,7 +510,7 @@

<parameter>
<name>customizationWeight</name>
<description>This parameter specifies a relative weight for a custom language model as a float64 between 0.0 to 1.0 (Default is 0.0)</description>
<description>This parameter specifies a relative weight for a custom language model as a float64 from 0.0 to 1.0 (Default is 0.0)</description>
<optional>true</optional>
<rewriteAllowed>true</rewriteAllowed>
<expressionMode>AttributeFree</expressionMode>
Expand Down Expand Up @@ -666,6 +666,36 @@
<cardinality>1</cardinality>
</parameter>

<parameter>
<name>speechDetectorSensitivity</name>
<description>This parameter specifies a float64 value from 0.0 to 1.0 to adjust the sensitivity of speech activity detection (Default is 0.5)</description>
<optional>true</optional>
<rewriteAllowed>true</rewriteAllowed>
<expressionMode>AttributeFree</expressionMode>
<type>float64</type>
<cardinality>1</cardinality>
</parameter>

<parameter>
<name>backgroundAudioSuppression</name>
<description>This parameter specifies a float64 value from 0.0 to 1.0 to suppress side conversations or background noise (Default is 0.0)</description>
<optional>true</optional>
<rewriteAllowed>true</rewriteAllowed>
<expressionMode>AttributeFree</expressionMode>
<type>float64</type>
<cardinality>1</cardinality>
</parameter>

<parameter>
<name>characterInsertionBias</name>
<description>This parameter specifies a float64 value from -0.5 to 1.0 to change how prone the STT engine is to insert more transcribed characters (Default is 0.0)</description>
<optional>true</optional>
<rewriteAllowed>true</rewriteAllowed>
<expressionMode>AttributeFree</expressionMode>
<type>float64</type>
<cardinality>1</cardinality>
</parameter>

</parameters>
<inputPorts>
<inputPortSet>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
/*
============================================================
First created on: Jul/01/2018
Last modified on: Sep/12/2021
Last modified on: Jan/04/2022
Please refer to the sttgateway-tech-brief.txt file in the
top-level directory of this toolkit to read about
Expand Down Expand Up @@ -459,6 +459,18 @@ https://cloud.ibm.com/docs/services/speech-to-text?topic=speech-to-text-websocke
my $sttLiveMetricsUpdateNeeded = $model->getParameterByName("sttLiveMetricsUpdateNeeded");
$sttLiveMetricsUpdateNeeded = $sttLiveMetricsUpdateNeeded ? $sttLiveMetricsUpdateNeeded->getValueAt(0)->getCppExpression() : 1;
my $speechDetectorSensitivity = $model->getParameterByName("speechDetectorSensitivity");
# Default: 0.5
$speechDetectorSensitivity = $speechDetectorSensitivity ? $speechDetectorSensitivity->getValueAt(0)->getCppExpression() : 0.5;
my $backgroundAudioSuppression = $model->getParameterByName("backgroundAudioSuppression");
# Default: 0.0
$backgroundAudioSuppression = $backgroundAudioSuppression ? $backgroundAudioSuppression->getValueAt(0)->getCppExpression() : 0.0;
my $characterInsertionBias = $model->getParameterByName("characterInsertionBias");
# Default: 0.0
$characterInsertionBias = $characterInsertionBias ? $characterInsertionBias->getValueAt(0)->getCppExpression() : 0.0;
%>
#include <type_traits>
Expand Down Expand Up @@ -501,7 +513,10 @@ MY_OPERATOR::MY_OPERATOR()
<%=$redactionNeeded%>,
<%=$keywordsSpottingThreshold%>,
<%=$keywordsToBeSpotted%>,
<%=$isTranscriptionCompletedRequested%>
<%=$isTranscriptionCompletedRequested%>,
<%=$speechDetectorSensitivity%>,
<%=$backgroundAudioSuppression%>,
<%=$characterInsertionBias%>
}
)
{}
Expand Down
3 changes: 3 additions & 0 deletions com.ibm.streamsx.sttgateway/impl/include/WatsonSTTConfig.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,9 @@ struct WatsonSTTConfig {
SPL::float64 keywordsSpottingThreshold;
const SPL::list<SPL::rstring> keywordsToBeSpotted;
const bool isTranscriptionCompletedRequested;
SPL::float64 speechDetectorSensitivity;
SPL::float64 backgroundAudioSuppression;
SPL::float64 characterInsertionBias;

// Some definitions
//This time becomes effective, when the connectionAttemptsThreshold limit is exceeded
Expand Down
19 changes: 17 additions & 2 deletions com.ibm.streamsx.sttgateway/impl/include/WatsonSTTImpl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -232,6 +232,18 @@ WatsonSTTImpl<OP, OT>::WatsonSTTImpl(OP & splOperator_,Conf config_)
throw std::runtime_error(STTGW_INVALID_PARAM_VALUE_3("WatsonSTT", Conf::keywordsSpottingThreshold, "keywordsSpottingThreshold"));
}

if (Conf::speechDetectorSensitivity < 0.0 || Conf::speechDetectorSensitivity > 1.0) {
throw std::runtime_error(STTGW_INVALID_PARAM_VALUE_3("WatsonSTT", Conf::speechDetectorSensitivity, "speechDetectorSensitivity"));
}

if (Conf::backgroundAudioSuppression < 0.0 || Conf::backgroundAudioSuppression > 1.0) {
throw std::runtime_error(STTGW_INVALID_PARAM_VALUE_3("WatsonSTT", Conf::backgroundAudioSuppression, "backgroundAudioSuppression"));
}

if (Conf::characterInsertionBias < -0.5 || Conf::characterInsertionBias > 1.0) {
throw std::runtime_error(STTGW_INVALID_PARAM_VALUE_3("WatsonSTT", Conf::characterInsertionBias, "characterInsertionBias"));
}

// If the keywords to be spotted list is empty, then disable keywords_spotting.
if (Conf::keywordsToBeSpotted.size() == 0) {
Conf::keywordsSpottingThreshold = 0.0;
Expand All @@ -246,9 +258,9 @@ WatsonSTTImpl<OP, OT>::WatsonSTTImpl(OP & splOperator_,Conf config_)
}

// The parameters maxUtteranceAlternatives, wordAlternativesThreshold, keywordsSpottingThreshold, keywordsToBeSpotted
// are not available in sttResultMose complete
// are not available in sttResultMode complete
// The COF getUtteranceNumber, isFinalizedUtterance, getConfidence, getUtteranceAlternatives
// are not available in sttResultMose complete
// are not available in sttResultMode complete

// Update the operator metric.
sttOutputResultModeMetric->setValueNoLock(Conf::sttOutputResultMode);
Expand Down Expand Up @@ -284,6 +296,9 @@ WatsonSTTImpl<OP, OT>::WatsonSTTImpl(OP & splOperator_,Conf config_)
<< "\nkeywordsSpottingThreshold = " << Conf::keywordsSpottingThreshold
<< "\nkeywordsToBeSpotted = " << Conf::keywordsToBeSpotted
<< "\nisTranscriptionCompletedRequested = " << Conf::isTranscriptionCompletedRequested
<< "\nspeechDetectorSensitivity = " << Conf::speechDetectorSensitivity
<< "\nbackgroundAudioSuppression = " << Conf::backgroundAudioSuppression
<< "\ncharacterInsertionBias = " << Conf::characterInsertionBias
<< "\nconnectionState.wsState.is_lock_free() = " << Rec::wsState.is_lock_free()
<< "\nrecentOTuple.is_lock_free() = " << Rec::recentOTuple.is_lock_free()
<< "\n----------------------------------------------------------------" << std::endl;
Expand Down
14 changes: 14 additions & 0 deletions com.ibm.streamsx.sttgateway/impl/include/WatsonSTTImplReceiver.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -578,6 +578,20 @@ void WatsonSTTImplReceiver<OP, OT>::on_open(client* c, websocketpp::connection_h
msg += ", \"redaction\" : true";
}

// https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-detection
if (speechDetectorSensitivity >= 0.0) {
msg += ", \"speech_detector_sensitivity\" : " + boost::to_string(speechDetectorSensitivity);
}

// https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-detection
if (backgroundAudioSuppression >= 0.0) {
msg += ", \"background_audio_suppression\" : " + boost::to_string(backgroundAudioSuppression);
}

if (characterInsertionBias >= -5.0) {
msg += ", \"character_insertion_bias\" : " + boost::to_string(characterInsertionBias);
}

// https://cloud.ibm.com/docs/services/speech-to-text?topic=speech-to-text-output#keyword_spotting
if (keywordsSpottingThreshold > 0.0) {
msg += ", \"keywords_threshold\" : " + boost::to_string(keywordsSpottingThreshold);
Expand Down
2 changes: 1 addition & 1 deletion com.ibm.streamsx.sttgateway/info.xml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@

**Note:** This toolkit requires c++11 support.
</description>
<version>2.3.1</version>
<version>2.3.2</version>
<requiredProductVersion>4.2.1.6</requiredProductVersion>
</identity>
<dependencies>
Expand Down
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
/*
==============================================
# Licensed Materials - Property of IBM
# Copyright IBM Corp. 2018, 2021
# Copyright IBM Corp. 2018, 2022
==============================================
*/

/*
==============================================
First created on: Nov/27/2020
Last modified on: Sep/20/2021
Last modified on: Jan/08/2022

IMPORTANT NOTE
--------------
Expand Down Expand Up @@ -846,6 +846,10 @@ public composite VgwDataRouterToWatsonSTT {
// After getting released, such UDP channels will become available for
// doing speech to text work for any new voice calls.
mutable map<rstring, int32> _vgwSessionToCompletedUdpChannelMap = {};
// This map tells us the call start date time string for a given vgwSessionId.
mutable map<rstring, rstring> _vgwSessionToCallStartDateTime = {};
// This map tells us the call start time in epoch seconds for a given vgwSessionId.
mutable map<rstring, int64> _vgwSessionToCallStartTimeInEpochSeconds = {};
mutable BinarySpeech_t _oTuple = {};
mutable rstring _key = "";
}
Expand Down Expand Up @@ -994,6 +998,10 @@ public composite VgwDataRouterToWatsonSTT {
if(BSD.callStartTimeInEpochSeconds == 0l) {
BSD.callStartTimeInEpochSeconds = getSeconds(getTimestamp());
}

// Insert the call start date time values in the state variables.
insertM(_vgwSessionToCallStartDateTime, BSD.vgwSessionId, BSD.callStartDateTime);
insertM(_vgwSessionToCallStartTimeInEpochSeconds, BSD.vgwSessionId, BSD.callStartTimeInEpochSeconds);

rstring socsFileName = dataDirectory() + "/" +
BSD.vgwSessionId + "-call-started.txt";
Expand Down Expand Up @@ -1055,6 +1063,12 @@ public composite VgwDataRouterToWatsonSTT {
(rstring)BSD.totalSpeechDataBytesReceived +
", speechEngineId=" + (rstring)BSD.speechEngineId +
", speechResultProcessorId=" + (rstring)BSD.speechResultProcessorId);
// Set the call start date time values to the tuple attributes.
if(has(_vgwSessionToCallStartDateTime, BSD.vgwSessionId) == true) {
BSD.callStartDateTime = _vgwSessionToCallStartDateTime[BSD.vgwSessionId];
BSD.callStartTimeInEpochSeconds = _vgwSessionToCallStartTimeInEpochSeconds[BSD.vgwSessionId];
}

// Submit this tuple.
submit(BSD, BSDF);
} else {
Expand Down Expand Up @@ -1209,6 +1223,12 @@ public composite VgwDataRouterToWatsonSTT {
// We are done. Remove it from the map as well.
removeM(_vgwSessionToCompletedUdpChannelMap, key2);
}

// Remove the call start date time values from the state variables.
if(has(_vgwSessionToCallStartDateTime, BSD.vgwSessionId) == true) {
removeM(_vgwSessionToCallStartDateTime, BSD.vgwSessionId);
removeM(_vgwSessionToCallStartTimeInEpochSeconds, BSD.vgwSessionId);
}

// At this time, the voice call for this VGW session id has ended.
// We can now write an "End of Call" indicator file in the
Expand Down Expand Up @@ -1353,6 +1373,15 @@ public composite IBMWatsonSpeechToText(input AudioBlobContent, AccessToken;
(boolean)getSubmissionTimeValue("smartFormattingNeeded", "false");
expression<boolean> $redactionNeeded :
(boolean)getSubmissionTimeValue("redactionNeeded", "false");
// Allowed value range for this is from 0.0 to 1.0.
expression<float64> $speechDetectorSensitivity :
(float64)getSubmissionTimeValue("speechDetectorSensitivity", "0.5");
// Allowed value range for this is from 0.0 to 1.0.
expression<float64> $backgroundAudioSuppression :
(float64)getSubmissionTimeValue("backgroundAudioSuppression", "0.0");
// Allowed value range for this is from -0.5 to 1.0.
expression<float64> $characterInsertionBias :
(float64)getSubmissionTimeValue("characterInsertionBias", "0.0");
expression<float64> $keywordsSpottingThreshold :
(float64)getSubmissionTimeValue("keywordsSpottingThreshold", "0.0");
expression<list<rstring>> $keywordsToBeSpotted :
Expand Down Expand Up @@ -1466,6 +1495,9 @@ public composite IBMWatsonSpeechToText(input AudioBlobContent, AccessToken;
wordAlternativesThreshold: $wordAlternativesThreshold;
smartFormattingNeeded: $smartFormattingNeeded;
redactionNeeded: $redactionNeeded;
speechDetectorSensitivity: $speechDetectorSensitivity;
backgroundAudioSuppression: $backgroundAudioSuppression;
characterInsertionBias: $characterInsertionBias;
keywordsSpottingThreshold: $keywordsSpottingThreshold;
keywordsToBeSpotted: $keywordsToBeSpotted;
websocketLoggingNeeded: $sttWebsocketLoggingNeeded;
Expand Down Expand Up @@ -1685,11 +1717,16 @@ public composite STTResultProcessor(input MyTranscriptionResult, BinarySpeechDat
MTR.callStartTimeInEpochSeconds = _callStartTimeInEpochSeconds;
}

// If the user opted for include the time of the
// If the user opted for including the time of the
// utterance result reception time, let us add it to
// the transcription result.
if($includeUtteranceResultReceptionTime == true) {
// This is the current time expressed in ctime format.
MTR.utteranceResultReceptionTime = ctime(getTimestamp());
// We will also do the utterance reception time expressed
// in seconds elapsed since the start of the call.
MTR.utteranceRxTime =
getSeconds(getTimestamp()) - MTR.callStartTimeInEpochSeconds;
}

// We will write the transcription results to
Expand Down
4 changes: 2 additions & 2 deletions samples/VgwDataRouterToWatsonSTT/info.xml
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,13 @@
<info:identity>
<info:name>VgwDataRouterToWatsonSTT</info:name>
<info:description>Example that showcases STT on Cloud and STT on CP4D</info:description>
<info:version>1.0.4</info:version>
<info:version>1.0.5</info:version>
<info:requiredProductVersion>4.2.1.6</info:requiredProductVersion>
</info:identity>
<info:dependencies>
<info:toolkit>
<common:name>com.ibm.streamsx.sttgateway</common:name>
<common:version>[2.3.1,7.0.0]</common:version>
<common:version>[2.3.2,7.0.0]</common:version>
</info:toolkit>
<info:toolkit>
<common:name>com.ibm.streamsx.json</common:name>
Expand Down
Loading

0 comments on commit cb7ae97

Please sign in to comment.