watson-developer-cloud · apaparazzi0329 · Nov 11, 2025 · Nov 10, 2025 · Nov 10, 2025 · Nov 10, 2025
diff --git a/.bumpversion.cfg b/.bumpversion.cfg
@@ -1,5 +1,5 @@
 [bumpversion]
-current_version = 10.0.0
+current_version = 11.0.0
 commit = True
 message = Bump version: {current_version} → {new_version} [skip ci]
 

diff --git a/ibm_watson/speech_to_text_v1.py b/ibm_watson/speech_to_text_v1.py
@@ -218,6 +218,7 @@ def recognize(
         end_of_phrase_silence_time: Optional[float] = None,
         split_transcript_at_phrase_end: Optional[bool] = None,
         speech_detector_sensitivity: Optional[float] = None,
+        sad_module: Optional[int] = None,
         background_audio_suppression: Optional[float] = None,
         low_latency: Optional[bool] = None,
         character_insertion_bias: Optional[float] = None,
@@ -351,8 +352,9 @@ def recognize(
                activity is detected in the stream. This can be used both in standard and
                low latency mode. This feature enables client applications to know that
                some words/speech has been detected and the service is in the process of
-               decoding. This can be used in lieu of interim results in standard mode. See
-               [Using speech recognition
+               decoding. This can be used in lieu of interim results in standard mode. Use
+               `sad_module: 2` to increase accuracy and performance in detecting speech
+               boundaries within the audio stream. See [Using speech recognition
                parameters](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-service-features#features-parameters).
         :param str language_customization_id: (optional) The customization ID
                (GUID) of a custom language model that is to be used with the recognition
@@ -555,6 +557,12 @@ def recognize(
                sensitivity](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-detection#detection-parameters-sensitivity)
                and [Language model
                support](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-detection#detection-support).
+        :param int sad_module: (optional) Detects speech boundaries within the
+               audio stream with better performance, improved noise suppression, faster
+               responsiveness, and increased accuracy.
+               Specify `sad_module: 2`
+                See [Speech Activity Detection
+               (SAD)](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-detection#sad).
         :param float background_audio_suppression: (optional) The level to which
                the service is to suppress background audio based on its volume to prevent
                it from being transcribed as speech. Use the parameter to suppress side
@@ -647,6 +655,7 @@ def recognize(
             'end_of_phrase_silence_time': end_of_phrase_silence_time,
             'split_transcript_at_phrase_end': split_transcript_at_phrase_end,
             'speech_detector_sensitivity': speech_detector_sensitivity,
+            'sad_module': sad_module,
             'background_audio_suppression': background_audio_suppression,
             'low_latency': low_latency,
             'character_insertion_bias': character_insertion_bias,
@@ -845,6 +854,7 @@ def create_job(
         end_of_phrase_silence_time: Optional[float] = None,
         split_transcript_at_phrase_end: Optional[bool] = None,
         speech_detector_sensitivity: Optional[float] = None,
+        sad_module: Optional[int] = None,
         background_audio_suppression: Optional[float] = None,
         low_latency: Optional[bool] = None,
         character_insertion_bias: Optional[float] = None,
@@ -1244,6 +1254,12 @@ def create_job(
                sensitivity](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-detection#detection-parameters-sensitivity)
                and [Language model
                support](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-detection#detection-support).
+        :param int sad_module: (optional) Detects speech boundaries within the
+               audio stream with better performance, improved noise suppression, faster
+               responsiveness, and increased accuracy.
+               Specify `sad_module: 2`
+                See [Speech Activity Detection
+               (SAD)](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-detection#sad).
         :param float background_audio_suppression: (optional) The level to which
                the service is to suppress background audio based on its volume to prevent
                it from being transcribed as speech. Use the parameter to suppress side
@@ -1341,6 +1357,7 @@ def create_job(
             'end_of_phrase_silence_time': end_of_phrase_silence_time,
             'split_transcript_at_phrase_end': split_transcript_at_phrase_end,
             'speech_detector_sensitivity': speech_detector_sensitivity,
+            'sad_module': sad_module,
             'background_audio_suppression': background_audio_suppression,
             'low_latency': low_latency,
             'character_insertion_bias': character_insertion_bias,

diff --git a/ibm_watson/speech_to_text_v1_adapter.py b/ibm_watson/speech_to_text_v1_adapter.py
@@ -57,6 +57,7 @@ def recognize_using_websocket(self,
                                   background_audio_suppression=None,
                                   low_latency=None,
                                   character_insertion_bias=None,
+                                  sad_module=None,
                                   **kwargs):
         """
         Sends audio for speech recognition using web sockets.
@@ -309,6 +310,12 @@ def recognize_using_websocket(self,
                `Narrowband` models.
                See [Character insertion
                bias](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-parsing#insertion-bias).
+        :param int sad_module: (optional) Detects speech boundaries within the
+               audio stream with better performance, improved noise suppression, faster
+               responsiveness, and increased accuracy.
+               Specify `sad_module: 2`
+                See [Speech Activity Detection
+               (SAD)](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-detection#sad).
         :param dict headers: A `dict` containing the request headers
         :return: A `dict` containing the `SpeechRecognitionResults` response.
         :rtype: dict
@@ -377,6 +384,7 @@ def recognize_using_websocket(self,
             'background_audio_suppression': background_audio_suppression,
             'character_insertion_bias': character_insertion_bias,
             'low_latency': low_latency,
+            'sad_module': sad_module,
         }
         options = {k: v for k, v in options.items() if v is not None}
         request['options'] = options

diff --git a/ibm_watson/text_to_speech_v1.py b/ibm_watson/text_to_speech_v1.py
@@ -1808,22 +1808,29 @@ class Voice(str, Enum):
         DE_DE_ERIKAV3VOICE = 'de-DE_ErikaV3Voice'
         EN_AU_HEIDIEXPRESSIVE = 'en-AU_HeidiExpressive'
         EN_AU_JACKEXPRESSIVE = 'en-AU_JackExpressive'
+        EN_CA_HANNAHNATURAL = 'en-CA_HannahNatural'
         EN_GB_CHARLOTTEV3VOICE = 'en-GB_CharlotteV3Voice'
+        EN_GB_CHLOENATURAL = 'en-GB_ChloeNatural'
         EN_GB_GEORGEEXPRESSIVE = 'en-GB_GeorgeExpressive'
         EN_GB_JAMESV3VOICE = 'en-GB_JamesV3Voice'
+        EN_GB_GEORGENATURAL = 'en-GB_GeorgeNatural'
         EN_GB_KATEV3VOICE = 'en-GB_KateV3Voice'
         EN_US_ALLISONEXPRESSIVE = 'en-US_AllisonExpressive'
         EN_US_ALLISONV3VOICE = 'en-US_AllisonV3Voice'
         EN_US_ELLIENATURAL = 'en-US_EllieNatural'
         EN_US_EMILYV3VOICE = 'en-US_EmilyV3Voice'
         EN_US_EMMAEXPRESSIVE = 'en-US_EmmaExpressive'
+        EN_US_EMMANATURAL = 'en-US_EmmaNatural'
+        EN_US_ETHANNATURAL = 'en-US_EthanNatural'
         EN_US_HENRYV3VOICE = 'en-US_HenryV3Voice'
+        EN_US_JACKSONNATURAL = 'en-US_JacksonNatural'
         EN_US_KEVINV3VOICE = 'en-US_KevinV3Voice'
         EN_US_LISAEXPRESSIVE = 'en-US_LisaExpressive'
         EN_US_LISAV3VOICE = 'en-US_LisaV3Voice'
         EN_US_MICHAELEXPRESSIVE = 'en-US_MichaelExpressive'
         EN_US_MICHAELV3VOICE = 'en-US_MichaelV3Voice'
         EN_US_OLIVIAV3VOICE = 'en-US_OliviaV3Voice'
+        EN_US_VICTORIANATURAL = 'en-US_VictoriaNatural'
         ES_ES_ENRIQUEV3VOICE = 'es-ES_EnriqueV3Voice'
         ES_ES_LAURAV3VOICE = 'es-ES_LauraV3Voice'
         ES_LA_DANIELAEXPRESSIVE = 'es-LA_DanielaExpressive'
@@ -1836,8 +1843,10 @@ class Voice(str, Enum):
         JA_JP_EMIV3VOICE = 'ja-JP_EmiV3Voice'
         KO_KR_JINV3VOICE = 'ko-KR_JinV3Voice'
         NL_NL_MERELV3VOICE = 'nl-NL_MerelV3Voice'
+        PT_BR_CAMILANATURAL = 'pt-BR_CamilaNatural'
         PT_BR_ISABELAV3VOICE = 'pt-BR_IsabelaV3Voice'
         PT_BR_LUCASEXPRESSIVE = 'pt-BR_LucasExpressive'
+        PT_BR_LUCASNATURAL = 'pt-BR_LucasNatural'
 
 
 class SynthesizeEnums:
@@ -1887,22 +1896,29 @@ class Voice(str, Enum):
         DE_DE_ERIKAV3VOICE = 'de-DE_ErikaV3Voice'
         EN_AU_HEIDIEXPRESSIVE = 'en-AU_HeidiExpressive'
         EN_AU_JACKEXPRESSIVE = 'en-AU_JackExpressive'
+        EN_CA_HANNAHNATURAL = 'en-CA_HannahNatural'
         EN_GB_CHARLOTTEV3VOICE = 'en-GB_CharlotteV3Voice'
+        EN_GB_CHLOENATURAL = 'en-GB_ChloeNatural'
         EN_GB_GEORGEEXPRESSIVE = 'en-GB_GeorgeExpressive'
         EN_GB_JAMESV3VOICE = 'en-GB_JamesV3Voice'
+        EN_GB_GEORGENATURAL = 'en-GB_GeorgeNatural'
         EN_GB_KATEV3VOICE = 'en-GB_KateV3Voice'
         EN_US_ALLISONEXPRESSIVE = 'en-US_AllisonExpressive'
         EN_US_ALLISONV3VOICE = 'en-US_AllisonV3Voice'
         EN_US_ELLIENATURAL = 'en-US_EllieNatural'
         EN_US_EMILYV3VOICE = 'en-US_EmilyV3Voice'
         EN_US_EMMAEXPRESSIVE = 'en-US_EmmaExpressive'
+        EN_US_EMMANATURAL = 'en-US_EmmaNatural'
+        EN_US_ETHANNATURAL = 'en-US_EthanNatural'
         EN_US_HENRYV3VOICE = 'en-US_HenryV3Voice'
+        EN_US_JACKSONNATURAL = 'en-US_JacksonNatural'
         EN_US_KEVINV3VOICE = 'en-US_KevinV3Voice'
         EN_US_LISAEXPRESSIVE = 'en-US_LisaExpressive'
         EN_US_LISAV3VOICE = 'en-US_LisaV3Voice'
         EN_US_MICHAELEXPRESSIVE = 'en-US_MichaelExpressive'
         EN_US_MICHAELV3VOICE = 'en-US_MichaelV3Voice'
         EN_US_OLIVIAV3VOICE = 'en-US_OliviaV3Voice'
+        EN_US_VICTORIANATURAL = 'en-US_VictoriaNatural'
         ES_ES_ENRIQUEV3VOICE = 'es-ES_EnriqueV3Voice'
         ES_ES_LAURAV3VOICE = 'es-ES_LauraV3Voice'
         ES_LA_DANIELAEXPRESSIVE = 'es-LA_DanielaExpressive'
@@ -1915,8 +1931,10 @@ class Voice(str, Enum):
         JA_JP_EMIV3VOICE = 'ja-JP_EmiV3Voice'
         KO_KR_JINV3VOICE = 'ko-KR_JinV3Voice'
         NL_NL_MERELV3VOICE = 'nl-NL_MerelV3Voice'
+        PT_BR_CAMILANATURAL = 'pt-BR_CamilaNatural'
         PT_BR_ISABELAV3VOICE = 'pt-BR_IsabelaV3Voice'
         PT_BR_LUCASEXPRESSIVE = 'pt-BR_LucasExpressive'
+        PT_BR_LUCASNATURAL = 'pt-BR_LucasNatural'
 
     class SpellOutMode(str, Enum):
         """
@@ -1965,22 +1983,29 @@ class Voice(str, Enum):
         DE_DE_ERIKAV3VOICE = 'de-DE_ErikaV3Voice'
         EN_AU_HEIDIEXPRESSIVE = 'en-AU_HeidiExpressive'
         EN_AU_JACKEXPRESSIVE = 'en-AU_JackExpressive'
+        EN_CA_HANNAHNATURAL = 'en-CA_HannahNatural'
         EN_GB_CHARLOTTEV3VOICE = 'en-GB_CharlotteV3Voice'
+        EN_GB_CHLOENATURAL = 'en-GB_ChloeNatural'
         EN_GB_GEORGEEXPRESSIVE = 'en-GB_GeorgeExpressive'
         EN_GB_JAMESV3VOICE = 'en-GB_JamesV3Voice'
+        EN_GB_GEORGENATURAL = 'en-GB_GeorgeNatural'
         EN_GB_KATEV3VOICE = 'en-GB_KateV3Voice'
         EN_US_ALLISONEXPRESSIVE = 'en-US_AllisonExpressive'
         EN_US_ALLISONV3VOICE = 'en-US_AllisonV3Voice'
         EN_US_ELLIENATURAL = 'en-US_EllieNatural'
         EN_US_EMILYV3VOICE = 'en-US_EmilyV3Voice'
         EN_US_EMMAEXPRESSIVE = 'en-US_EmmaExpressive'
+        EN_US_EMMANATURAL = 'en-US_EmmaNatural'
+        EN_US_ETHANNATURAL = 'en-US_EthanNatural'
         EN_US_HENRYV3VOICE = 'en-US_HenryV3Voice'
+        EN_US_JACKSONNATURAL = 'en-US_JacksonNatural'
         EN_US_KEVINV3VOICE = 'en-US_KevinV3Voice'
         EN_US_LISAEXPRESSIVE = 'en-US_LisaExpressive'
         EN_US_LISAV3VOICE = 'en-US_LisaV3Voice'
         EN_US_MICHAELEXPRESSIVE = 'en-US_MichaelExpressive'
         EN_US_MICHAELV3VOICE = 'en-US_MichaelV3Voice'
         EN_US_OLIVIAV3VOICE = 'en-US_OliviaV3Voice'
+        EN_US_VICTORIANATURAL = 'en-US_VictoriaNatural'
         ES_ES_ENRIQUEV3VOICE = 'es-ES_EnriqueV3Voice'
         ES_ES_LAURAV3VOICE = 'es-ES_LauraV3Voice'
         ES_LA_DANIELAEXPRESSIVE = 'es-LA_DanielaExpressive'
@@ -1993,8 +2018,10 @@ class Voice(str, Enum):
         JA_JP_EMIV3VOICE = 'ja-JP_EmiV3Voice'
         KO_KR_JINV3VOICE = 'ko-KR_JinV3Voice'
         NL_NL_MERELV3VOICE = 'nl-NL_MerelV3Voice'
+        PT_BR_CAMILANATURAL = 'pt-BR_CamilaNatural'
         PT_BR_ISABELAV3VOICE = 'pt-BR_IsabelaV3Voice'
         PT_BR_LUCASEXPRESSIVE = 'pt-BR_LucasExpressive'
+        PT_BR_LUCASNATURAL = 'pt-BR_LucasNatural'
 
     class Format(str, Enum):
         """

diff --git a/ibm_watson/version.py b/ibm_watson/version.py
@@ -1 +1 @@
-__version__ = '10.0.0'
+__version__ = '11.0.0'
diff --git a/setup.py b/setup.py
@@ -1,5 +1,5 @@
 #!/usr/bin/env python
-# (C) Copyright IBM Corp. 2015, 2020.
+# (C) Copyright IBM Corp. 2015, 2025.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -16,7 +16,7 @@
 from setuptools import setup
 from os import path
 
-__version__ = '10.0.0'
+__version__ = '11.0.0'
 
 # read contents of README file
 this_directory = path.abspath(path.dirname(__file__))

diff --git a/test/unit/test_speech_to_text_v1.py b/test/unit/test_speech_to_text_v1.py
@@ -1,5 +1,5 @@
 # -*- coding: utf-8 -*-
-# (C) Copyright IBM Corp. 2024.
+# (C) Copyright IBM Corp. 2025.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -239,6 +239,7 @@ def test_recognize_all_params(self):
         end_of_phrase_silence_time = 0.8
         split_transcript_at_phrase_end = False
         speech_detector_sensitivity = 0.5
+        sad_module = 1
         background_audio_suppression = 0.0
         low_latency = False
         character_insertion_bias = 0.0
@@ -270,6 +271,7 @@ def test_recognize_all_params(self):
             end_of_phrase_silence_time=end_of_phrase_silence_time,
             split_transcript_at_phrase_end=split_transcript_at_phrase_end,
             speech_detector_sensitivity=speech_detector_sensitivity,
+            sad_module=sad_module,
             background_audio_suppression=background_audio_suppression,
             low_latency=low_latency,
             character_insertion_bias=character_insertion_bias,
@@ -302,6 +304,7 @@ def test_recognize_all_params(self):
         assert 'audio_metrics={}'.format('true' if audio_metrics else 'false') in query_string
         assert 'end_of_phrase_silence_time={}'.format(end_of_phrase_silence_time) in query_string
         assert 'split_transcript_at_phrase_end={}'.format('true' if split_transcript_at_phrase_end else 'false') in query_string
+        assert 'sad_module={}'.format(sad_module) in query_string
         assert 'low_latency={}'.format('true' if low_latency else 'false') in query_string
         # Validate body params
 
@@ -663,6 +666,7 @@ def test_create_job_all_params(self):
         end_of_phrase_silence_time = 0.8
         split_transcript_at_phrase_end = False
         speech_detector_sensitivity = 0.5
+        sad_module = 1
         background_audio_suppression = 0.0
         low_latency = False
         character_insertion_bias = 0.0
@@ -699,6 +703,7 @@ def test_create_job_all_params(self):
             end_of_phrase_silence_time=end_of_phrase_silence_time,
             split_transcript_at_phrase_end=split_transcript_at_phrase_end,
             speech_detector_sensitivity=speech_detector_sensitivity,
+            sad_module=sad_module,
             background_audio_suppression=background_audio_suppression,
             low_latency=low_latency,
             character_insertion_bias=character_insertion_bias,
@@ -735,6 +740,7 @@ def test_create_job_all_params(self):
         assert 'audio_metrics={}'.format('true' if audio_metrics else 'false') in query_string
         assert 'end_of_phrase_silence_time={}'.format(end_of_phrase_silence_time) in query_string
         assert 'split_transcript_at_phrase_end={}'.format('true' if split_transcript_at_phrase_end else 'false') in query_string
+        assert 'sad_module={}'.format(sad_module) in query_string
         assert 'low_latency={}'.format('true' if low_latency else 'false') in query_string
         # Validate body params