Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .bumpversion.cfg
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[bumpversion]
current_version = 10.0.0
current_version = 11.0.0
commit = True
message = Bump version: {current_version} → {new_version} [skip ci]

Expand Down
21 changes: 19 additions & 2 deletions ibm_watson/speech_to_text_v1.py
Original file line number Diff line number Diff line change
Expand Up @@ -218,6 +218,7 @@ def recognize(
end_of_phrase_silence_time: Optional[float] = None,
split_transcript_at_phrase_end: Optional[bool] = None,
speech_detector_sensitivity: Optional[float] = None,
sad_module: Optional[int] = None,
background_audio_suppression: Optional[float] = None,
low_latency: Optional[bool] = None,
character_insertion_bias: Optional[float] = None,
Expand Down Expand Up @@ -351,8 +352,9 @@ def recognize(
activity is detected in the stream. This can be used both in standard and
low latency mode. This feature enables client applications to know that
some words/speech has been detected and the service is in the process of
decoding. This can be used in lieu of interim results in standard mode. See
[Using speech recognition
decoding. This can be used in lieu of interim results in standard mode. Use
`sad_module: 2` to increase accuracy and performance in detecting speech
boundaries within the audio stream. See [Using speech recognition
parameters](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-service-features#features-parameters).
:param str language_customization_id: (optional) The customization ID
(GUID) of a custom language model that is to be used with the recognition
Expand Down Expand Up @@ -555,6 +557,12 @@ def recognize(
sensitivity](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-detection#detection-parameters-sensitivity)
and [Language model
support](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-detection#detection-support).
:param int sad_module: (optional) Detects speech boundaries within the
audio stream with better performance, improved noise suppression, faster
responsiveness, and increased accuracy.
Specify `sad_module: 2`
See [Speech Activity Detection
(SAD)](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-detection#sad).
:param float background_audio_suppression: (optional) The level to which
the service is to suppress background audio based on its volume to prevent
it from being transcribed as speech. Use the parameter to suppress side
Expand Down Expand Up @@ -647,6 +655,7 @@ def recognize(
'end_of_phrase_silence_time': end_of_phrase_silence_time,
'split_transcript_at_phrase_end': split_transcript_at_phrase_end,
'speech_detector_sensitivity': speech_detector_sensitivity,
'sad_module': sad_module,
'background_audio_suppression': background_audio_suppression,
'low_latency': low_latency,
'character_insertion_bias': character_insertion_bias,
Expand Down Expand Up @@ -845,6 +854,7 @@ def create_job(
end_of_phrase_silence_time: Optional[float] = None,
split_transcript_at_phrase_end: Optional[bool] = None,
speech_detector_sensitivity: Optional[float] = None,
sad_module: Optional[int] = None,
background_audio_suppression: Optional[float] = None,
low_latency: Optional[bool] = None,
character_insertion_bias: Optional[float] = None,
Expand Down Expand Up @@ -1244,6 +1254,12 @@ def create_job(
sensitivity](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-detection#detection-parameters-sensitivity)
and [Language model
support](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-detection#detection-support).
:param int sad_module: (optional) Detects speech boundaries within the
audio stream with better performance, improved noise suppression, faster
responsiveness, and increased accuracy.
Specify `sad_module: 2`
See [Speech Activity Detection
(SAD)](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-detection#sad).
:param float background_audio_suppression: (optional) The level to which
the service is to suppress background audio based on its volume to prevent
it from being transcribed as speech. Use the parameter to suppress side
Expand Down Expand Up @@ -1341,6 +1357,7 @@ def create_job(
'end_of_phrase_silence_time': end_of_phrase_silence_time,
'split_transcript_at_phrase_end': split_transcript_at_phrase_end,
'speech_detector_sensitivity': speech_detector_sensitivity,
'sad_module': sad_module,
'background_audio_suppression': background_audio_suppression,
'low_latency': low_latency,
'character_insertion_bias': character_insertion_bias,
Expand Down
8 changes: 8 additions & 0 deletions ibm_watson/speech_to_text_v1_adapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ def recognize_using_websocket(self,
background_audio_suppression=None,
low_latency=None,
character_insertion_bias=None,
sad_module=None,
**kwargs):
"""
Sends audio for speech recognition using web sockets.
Expand Down Expand Up @@ -309,6 +310,12 @@ def recognize_using_websocket(self,
`Narrowband` models.
See [Character insertion
bias](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-parsing#insertion-bias).
:param int sad_module: (optional) Detects speech boundaries within the
audio stream with better performance, improved noise suppression, faster
responsiveness, and increased accuracy.
Specify `sad_module: 2`
See [Speech Activity Detection
(SAD)](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-detection#sad).
:param dict headers: A `dict` containing the request headers
:return: A `dict` containing the `SpeechRecognitionResults` response.
:rtype: dict
Expand Down Expand Up @@ -377,6 +384,7 @@ def recognize_using_websocket(self,
'background_audio_suppression': background_audio_suppression,
'character_insertion_bias': character_insertion_bias,
'low_latency': low_latency,
'sad_module': sad_module,
}
options = {k: v for k, v in options.items() if v is not None}
request['options'] = options
Expand Down
27 changes: 27 additions & 0 deletions ibm_watson/text_to_speech_v1.py
Original file line number Diff line number Diff line change
Expand Up @@ -1808,22 +1808,29 @@ class Voice(str, Enum):
DE_DE_ERIKAV3VOICE = 'de-DE_ErikaV3Voice'
EN_AU_HEIDIEXPRESSIVE = 'en-AU_HeidiExpressive'
EN_AU_JACKEXPRESSIVE = 'en-AU_JackExpressive'
EN_CA_HANNAHNATURAL = 'en-CA_HannahNatural'
EN_GB_CHARLOTTEV3VOICE = 'en-GB_CharlotteV3Voice'
EN_GB_CHLOENATURAL = 'en-GB_ChloeNatural'
EN_GB_GEORGEEXPRESSIVE = 'en-GB_GeorgeExpressive'
EN_GB_JAMESV3VOICE = 'en-GB_JamesV3Voice'
EN_GB_GEORGENATURAL = 'en-GB_GeorgeNatural'
EN_GB_KATEV3VOICE = 'en-GB_KateV3Voice'
EN_US_ALLISONEXPRESSIVE = 'en-US_AllisonExpressive'
EN_US_ALLISONV3VOICE = 'en-US_AllisonV3Voice'
EN_US_ELLIENATURAL = 'en-US_EllieNatural'
EN_US_EMILYV3VOICE = 'en-US_EmilyV3Voice'
EN_US_EMMAEXPRESSIVE = 'en-US_EmmaExpressive'
EN_US_EMMANATURAL = 'en-US_EmmaNatural'
EN_US_ETHANNATURAL = 'en-US_EthanNatural'
EN_US_HENRYV3VOICE = 'en-US_HenryV3Voice'
EN_US_JACKSONNATURAL = 'en-US_JacksonNatural'
EN_US_KEVINV3VOICE = 'en-US_KevinV3Voice'
EN_US_LISAEXPRESSIVE = 'en-US_LisaExpressive'
EN_US_LISAV3VOICE = 'en-US_LisaV3Voice'
EN_US_MICHAELEXPRESSIVE = 'en-US_MichaelExpressive'
EN_US_MICHAELV3VOICE = 'en-US_MichaelV3Voice'
EN_US_OLIVIAV3VOICE = 'en-US_OliviaV3Voice'
EN_US_VICTORIANATURAL = 'en-US_VictoriaNatural'
ES_ES_ENRIQUEV3VOICE = 'es-ES_EnriqueV3Voice'
ES_ES_LAURAV3VOICE = 'es-ES_LauraV3Voice'
ES_LA_DANIELAEXPRESSIVE = 'es-LA_DanielaExpressive'
Expand All @@ -1836,8 +1843,10 @@ class Voice(str, Enum):
JA_JP_EMIV3VOICE = 'ja-JP_EmiV3Voice'
KO_KR_JINV3VOICE = 'ko-KR_JinV3Voice'
NL_NL_MERELV3VOICE = 'nl-NL_MerelV3Voice'
PT_BR_CAMILANATURAL = 'pt-BR_CamilaNatural'
PT_BR_ISABELAV3VOICE = 'pt-BR_IsabelaV3Voice'
PT_BR_LUCASEXPRESSIVE = 'pt-BR_LucasExpressive'
PT_BR_LUCASNATURAL = 'pt-BR_LucasNatural'


class SynthesizeEnums:
Expand Down Expand Up @@ -1887,22 +1896,29 @@ class Voice(str, Enum):
DE_DE_ERIKAV3VOICE = 'de-DE_ErikaV3Voice'
EN_AU_HEIDIEXPRESSIVE = 'en-AU_HeidiExpressive'
EN_AU_JACKEXPRESSIVE = 'en-AU_JackExpressive'
EN_CA_HANNAHNATURAL = 'en-CA_HannahNatural'
EN_GB_CHARLOTTEV3VOICE = 'en-GB_CharlotteV3Voice'
EN_GB_CHLOENATURAL = 'en-GB_ChloeNatural'
EN_GB_GEORGEEXPRESSIVE = 'en-GB_GeorgeExpressive'
EN_GB_JAMESV3VOICE = 'en-GB_JamesV3Voice'
EN_GB_GEORGENATURAL = 'en-GB_GeorgeNatural'
EN_GB_KATEV3VOICE = 'en-GB_KateV3Voice'
EN_US_ALLISONEXPRESSIVE = 'en-US_AllisonExpressive'
EN_US_ALLISONV3VOICE = 'en-US_AllisonV3Voice'
EN_US_ELLIENATURAL = 'en-US_EllieNatural'
EN_US_EMILYV3VOICE = 'en-US_EmilyV3Voice'
EN_US_EMMAEXPRESSIVE = 'en-US_EmmaExpressive'
EN_US_EMMANATURAL = 'en-US_EmmaNatural'
EN_US_ETHANNATURAL = 'en-US_EthanNatural'
EN_US_HENRYV3VOICE = 'en-US_HenryV3Voice'
EN_US_JACKSONNATURAL = 'en-US_JacksonNatural'
EN_US_KEVINV3VOICE = 'en-US_KevinV3Voice'
EN_US_LISAEXPRESSIVE = 'en-US_LisaExpressive'
EN_US_LISAV3VOICE = 'en-US_LisaV3Voice'
EN_US_MICHAELEXPRESSIVE = 'en-US_MichaelExpressive'
EN_US_MICHAELV3VOICE = 'en-US_MichaelV3Voice'
EN_US_OLIVIAV3VOICE = 'en-US_OliviaV3Voice'
EN_US_VICTORIANATURAL = 'en-US_VictoriaNatural'
ES_ES_ENRIQUEV3VOICE = 'es-ES_EnriqueV3Voice'
ES_ES_LAURAV3VOICE = 'es-ES_LauraV3Voice'
ES_LA_DANIELAEXPRESSIVE = 'es-LA_DanielaExpressive'
Expand All @@ -1915,8 +1931,10 @@ class Voice(str, Enum):
JA_JP_EMIV3VOICE = 'ja-JP_EmiV3Voice'
KO_KR_JINV3VOICE = 'ko-KR_JinV3Voice'
NL_NL_MERELV3VOICE = 'nl-NL_MerelV3Voice'
PT_BR_CAMILANATURAL = 'pt-BR_CamilaNatural'
PT_BR_ISABELAV3VOICE = 'pt-BR_IsabelaV3Voice'
PT_BR_LUCASEXPRESSIVE = 'pt-BR_LucasExpressive'
PT_BR_LUCASNATURAL = 'pt-BR_LucasNatural'

class SpellOutMode(str, Enum):
"""
Expand Down Expand Up @@ -1965,22 +1983,29 @@ class Voice(str, Enum):
DE_DE_ERIKAV3VOICE = 'de-DE_ErikaV3Voice'
EN_AU_HEIDIEXPRESSIVE = 'en-AU_HeidiExpressive'
EN_AU_JACKEXPRESSIVE = 'en-AU_JackExpressive'
EN_CA_HANNAHNATURAL = 'en-CA_HannahNatural'
EN_GB_CHARLOTTEV3VOICE = 'en-GB_CharlotteV3Voice'
EN_GB_CHLOENATURAL = 'en-GB_ChloeNatural'
EN_GB_GEORGEEXPRESSIVE = 'en-GB_GeorgeExpressive'
EN_GB_JAMESV3VOICE = 'en-GB_JamesV3Voice'
EN_GB_GEORGENATURAL = 'en-GB_GeorgeNatural'
EN_GB_KATEV3VOICE = 'en-GB_KateV3Voice'
EN_US_ALLISONEXPRESSIVE = 'en-US_AllisonExpressive'
EN_US_ALLISONV3VOICE = 'en-US_AllisonV3Voice'
EN_US_ELLIENATURAL = 'en-US_EllieNatural'
EN_US_EMILYV3VOICE = 'en-US_EmilyV3Voice'
EN_US_EMMAEXPRESSIVE = 'en-US_EmmaExpressive'
EN_US_EMMANATURAL = 'en-US_EmmaNatural'
EN_US_ETHANNATURAL = 'en-US_EthanNatural'
EN_US_HENRYV3VOICE = 'en-US_HenryV3Voice'
EN_US_JACKSONNATURAL = 'en-US_JacksonNatural'
EN_US_KEVINV3VOICE = 'en-US_KevinV3Voice'
EN_US_LISAEXPRESSIVE = 'en-US_LisaExpressive'
EN_US_LISAV3VOICE = 'en-US_LisaV3Voice'
EN_US_MICHAELEXPRESSIVE = 'en-US_MichaelExpressive'
EN_US_MICHAELV3VOICE = 'en-US_MichaelV3Voice'
EN_US_OLIVIAV3VOICE = 'en-US_OliviaV3Voice'
EN_US_VICTORIANATURAL = 'en-US_VictoriaNatural'
ES_ES_ENRIQUEV3VOICE = 'es-ES_EnriqueV3Voice'
ES_ES_LAURAV3VOICE = 'es-ES_LauraV3Voice'
ES_LA_DANIELAEXPRESSIVE = 'es-LA_DanielaExpressive'
Expand All @@ -1993,8 +2018,10 @@ class Voice(str, Enum):
JA_JP_EMIV3VOICE = 'ja-JP_EmiV3Voice'
KO_KR_JINV3VOICE = 'ko-KR_JinV3Voice'
NL_NL_MERELV3VOICE = 'nl-NL_MerelV3Voice'
PT_BR_CAMILANATURAL = 'pt-BR_CamilaNatural'
PT_BR_ISABELAV3VOICE = 'pt-BR_IsabelaV3Voice'
PT_BR_LUCASEXPRESSIVE = 'pt-BR_LucasExpressive'
PT_BR_LUCASNATURAL = 'pt-BR_LucasNatural'

class Format(str, Enum):
"""
Expand Down
2 changes: 1 addition & 1 deletion ibm_watson/version.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = '10.0.0'
__version__ = '11.0.0'
4 changes: 2 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#!/usr/bin/env python
# (C) Copyright IBM Corp. 2015, 2020.
# (C) Copyright IBM Corp. 2015, 2025.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand All @@ -16,7 +16,7 @@
from setuptools import setup
from os import path

__version__ = '10.0.0'
__version__ = '11.0.0'

# read contents of README file
this_directory = path.abspath(path.dirname(__file__))
Expand Down
8 changes: 7 additions & 1 deletion test/unit/test_speech_to_text_v1.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# -*- coding: utf-8 -*-
# (C) Copyright IBM Corp. 2024.
# (C) Copyright IBM Corp. 2025.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -239,6 +239,7 @@ def test_recognize_all_params(self):
end_of_phrase_silence_time = 0.8
split_transcript_at_phrase_end = False
speech_detector_sensitivity = 0.5
sad_module = 1
background_audio_suppression = 0.0
low_latency = False
character_insertion_bias = 0.0
Expand Down Expand Up @@ -270,6 +271,7 @@ def test_recognize_all_params(self):
end_of_phrase_silence_time=end_of_phrase_silence_time,
split_transcript_at_phrase_end=split_transcript_at_phrase_end,
speech_detector_sensitivity=speech_detector_sensitivity,
sad_module=sad_module,
background_audio_suppression=background_audio_suppression,
low_latency=low_latency,
character_insertion_bias=character_insertion_bias,
Expand Down Expand Up @@ -302,6 +304,7 @@ def test_recognize_all_params(self):
assert 'audio_metrics={}'.format('true' if audio_metrics else 'false') in query_string
assert 'end_of_phrase_silence_time={}'.format(end_of_phrase_silence_time) in query_string
assert 'split_transcript_at_phrase_end={}'.format('true' if split_transcript_at_phrase_end else 'false') in query_string
assert 'sad_module={}'.format(sad_module) in query_string
assert 'low_latency={}'.format('true' if low_latency else 'false') in query_string
# Validate body params

Expand Down Expand Up @@ -663,6 +666,7 @@ def test_create_job_all_params(self):
end_of_phrase_silence_time = 0.8
split_transcript_at_phrase_end = False
speech_detector_sensitivity = 0.5
sad_module = 1
background_audio_suppression = 0.0
low_latency = False
character_insertion_bias = 0.0
Expand Down Expand Up @@ -699,6 +703,7 @@ def test_create_job_all_params(self):
end_of_phrase_silence_time=end_of_phrase_silence_time,
split_transcript_at_phrase_end=split_transcript_at_phrase_end,
speech_detector_sensitivity=speech_detector_sensitivity,
sad_module=sad_module,
background_audio_suppression=background_audio_suppression,
low_latency=low_latency,
character_insertion_bias=character_insertion_bias,
Expand Down Expand Up @@ -735,6 +740,7 @@ def test_create_job_all_params(self):
assert 'audio_metrics={}'.format('true' if audio_metrics else 'false') in query_string
assert 'end_of_phrase_silence_time={}'.format(end_of_phrase_silence_time) in query_string
assert 'split_transcript_at_phrase_end={}'.format('true' if split_transcript_at_phrase_end else 'false') in query_string
assert 'sad_module={}'.format(sad_module) in query_string
assert 'low_latency={}'.format('true' if low_latency else 'false') in query_string
# Validate body params

Expand Down