diff --git a/.release-please-manifest.json b/.release-please-manifest.json
index 3549461d..c6a6955c 100644
--- a/.release-please-manifest.json
+++ b/.release-please-manifest.json
@@ -1,3 +1,3 @@
{
- ".": "3.5.1"
+ ".": "3.5.2"
}
\ No newline at end of file
diff --git a/.stats.yml b/.stats.yml
index 2aa16be8..e3897189 100644
--- a/.stats.yml
+++ b/.stats.yml
@@ -1,4 +1,4 @@
configured_endpoints: 118
-openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai%2Fopenai-16cb18bed32bae8c5840fb39a1bf664026cc40463ad0c487dcb0df1bd3d72db0.yml
-openapi_spec_hash: 4cb51b22f98dee1a90bc7add82d1d132
+openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai%2Fopenai-94b1e3cb0bdc616ff0c2f267c33dadd95f133b1f64e647aab6c64afb292b2793.yml
+openapi_spec_hash: 2395319ac9befd59b6536ae7f9564a05
config_hash: 930dac3aa861344867e4ac84f037b5df
diff --git a/CHANGELOG.md b/CHANGELOG.md
index ebbf3c40..964ceae4 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,17 @@
# Changelog
+## 3.5.2 (2025-09-12)
+
+Full Changelog: [v3.5.1...v3.5.2](https://github.com/openai/openai-java/compare/v3.5.1...v3.5.2)
+
+### Chores
+
+* **api:** Minor docs and type updates for realtime ([63ad148](https://github.com/openai/openai-java/commit/63ad148f1f998de1c9d66021037d7d5e04615022))
+* improve formatter performance ([be0acb7](https://github.com/openai/openai-java/commit/be0acb7a7779c4df1bf3e4678c40f4bd6d2b50d6))
+* **internal:** codegen related update ([18a0e64](https://github.com/openai/openai-java/commit/18a0e645fd3ea28957e55633f2a271287b56c312))
+* **internal:** codegen related update ([ab87009](https://github.com/openai/openai-java/commit/ab870095a749e00bcc2e864476553f782d098525))
+* **internal:** remove redundant deserializer symbols ([8c63a5b](https://github.com/openai/openai-java/commit/8c63a5b4983039ace361ab08b89a0c227a88cf90))
+
## 3.5.1 (2025-09-10)
Full Changelog: [v3.5.0...v3.5.1](https://github.com/openai/openai-java/compare/v3.5.0...v3.5.1)
diff --git a/README.md b/README.md
index 285497ba..c356ae81 100644
--- a/README.md
+++ b/README.md
@@ -2,8 +2,8 @@
-[](https://central.sonatype.com/artifact/com.openai/openai-java/3.5.1)
-[](https://javadoc.io/doc/com.openai/openai-java/3.5.1)
+[](https://central.sonatype.com/artifact/com.openai/openai-java/3.5.2)
+[](https://javadoc.io/doc/com.openai/openai-java/3.5.2)
@@ -11,7 +11,7 @@ The OpenAI Java SDK provides convenient access to the [OpenAI REST API](https://
-The REST API documentation can be found on [platform.openai.com](https://platform.openai.com/docs). Javadocs are available on [javadoc.io](https://javadoc.io/doc/com.openai/openai-java/3.5.1).
+The REST API documentation can be found on [platform.openai.com](https://platform.openai.com/docs). Javadocs are available on [javadoc.io](https://javadoc.io/doc/com.openai/openai-java/3.5.2).
@@ -24,7 +24,7 @@ The REST API documentation can be found on [platform.openai.com](https://platfor
### Gradle
```kotlin
-implementation("com.openai:openai-java:3.5.1")
+implementation("com.openai:openai-java:3.5.2")
```
### Maven
@@ -33,7 +33,7 @@ implementation("com.openai:openai-java:3.5.1")
com.openai
openai-java
- 3.5.1
+ 3.5.2
```
@@ -1342,7 +1342,7 @@ If you're using Spring Boot, then you can use the SDK's [Spring Boot starter](ht
#### Gradle
```kotlin
-implementation("com.openai:openai-java-spring-boot-starter:3.5.1")
+implementation("com.openai:openai-java-spring-boot-starter:3.5.2")
```
#### Maven
@@ -1351,7 +1351,7 @@ implementation("com.openai:openai-java-spring-boot-starter:3.5.1")
com.openai
openai-java-spring-boot-starter
- 3.5.1
+ 3.5.2
```
diff --git a/build.gradle.kts b/build.gradle.kts
index 60955f4e..f1e418a7 100644
--- a/build.gradle.kts
+++ b/build.gradle.kts
@@ -8,7 +8,7 @@ repositories {
allprojects {
group = "com.openai"
- version = "3.5.1" // x-release-please-version
+ version = "3.5.2" // x-release-please-version
}
subprojects {
diff --git a/openai-java-core/src/main/kotlin/com/openai/models/audio/transcriptions/TranscriptionCreateParams.kt b/openai-java-core/src/main/kotlin/com/openai/models/audio/transcriptions/TranscriptionCreateParams.kt
index 581c6cff..8c34a011 100644
--- a/openai-java-core/src/main/kotlin/com/openai/models/audio/transcriptions/TranscriptionCreateParams.kt
+++ b/openai-java-core/src/main/kotlin/com/openai/models/audio/transcriptions/TranscriptionCreateParams.kt
@@ -7,13 +7,8 @@ import com.fasterxml.jackson.annotation.JsonAnySetter
import com.fasterxml.jackson.annotation.JsonCreator
import com.fasterxml.jackson.annotation.JsonProperty
import com.fasterxml.jackson.core.JsonGenerator
-import com.fasterxml.jackson.core.ObjectCodec
-import com.fasterxml.jackson.databind.JsonNode
import com.fasterxml.jackson.databind.SerializerProvider
-import com.fasterxml.jackson.databind.annotation.JsonDeserialize
import com.fasterxml.jackson.databind.annotation.JsonSerialize
-import com.fasterxml.jackson.module.kotlin.jacksonTypeRef
-import com.openai.core.BaseDeserializer
import com.openai.core.BaseSerializer
import com.openai.core.Enum
import com.openai.core.ExcludeMissing
@@ -21,7 +16,6 @@ import com.openai.core.JsonField
import com.openai.core.JsonValue
import com.openai.core.MultipartField
import com.openai.core.Params
-import com.openai.core.allMaxBy
import com.openai.core.checkKnown
import com.openai.core.checkRequired
import com.openai.core.getOrThrow
@@ -1220,7 +1214,6 @@ private constructor(
* object can be provided to tweak VAD detection parameters manually. If unset, the audio is
* transcribed as a single block.
*/
- @JsonDeserialize(using = ChunkingStrategy.Deserializer::class)
@JsonSerialize(using = ChunkingStrategy.Serializer::class)
class ChunkingStrategy
private constructor(
@@ -1285,25 +1278,6 @@ private constructor(
false
}
- /**
- * Returns a score indicating how many valid values are contained in this object
- * recursively.
- *
- * Used for best match union deserialization.
- */
- @JvmSynthetic
- internal fun validity(): Int =
- accept(
- object : Visitor {
- override fun visitAuto(auto: JsonValue) =
- auto.let { if (it == JsonValue.from("auto")) 1 else 0 }
-
- override fun visitVadConfig(vadConfig: VadConfig) = 1
-
- override fun unknown(json: JsonValue?) = 0
- }
- )
-
override fun equals(other: Any?): Boolean {
if (this === other) {
return true
@@ -1361,36 +1335,6 @@ private constructor(
}
}
- internal class Deserializer : BaseDeserializer(ChunkingStrategy::class) {
-
- override fun ObjectCodec.deserialize(node: JsonNode): ChunkingStrategy {
- val json = JsonValue.fromJsonNode(node)
-
- val bestMatches =
- sequenceOf(
- tryDeserialize(node, jacksonTypeRef())
- ?.let { ChunkingStrategy(auto = it, _json = json) }
- ?.takeIf { it.isValid() },
- tryDeserialize(node, jacksonTypeRef())?.let {
- ChunkingStrategy(vadConfig = it, _json = json)
- },
- )
- .filterNotNull()
- .allMaxBy { it.validity() }
- .toList()
- return when (bestMatches.size) {
- // This can happen if what we're deserializing is completely incompatible with
- // all the possible variants (e.g. deserializing from array).
- 0 -> ChunkingStrategy(_json = json)
- 1 -> bestMatches.single()
- // If there's more than one match with the highest validity, then use the first
- // completely valid match, or simply the first match if none are completely
- // valid.
- else -> bestMatches.firstOrNull { it.isValid() } ?: bestMatches.first()
- }
- }
- }
-
internal class Serializer : BaseSerializer(ChunkingStrategy::class) {
override fun serialize(
diff --git a/openai-java-core/src/main/kotlin/com/openai/models/images/ImageEditParams.kt b/openai-java-core/src/main/kotlin/com/openai/models/images/ImageEditParams.kt
index 1ff743f1..b90acd9e 100644
--- a/openai-java-core/src/main/kotlin/com/openai/models/images/ImageEditParams.kt
+++ b/openai-java-core/src/main/kotlin/com/openai/models/images/ImageEditParams.kt
@@ -7,13 +7,8 @@ import com.fasterxml.jackson.annotation.JsonAnySetter
import com.fasterxml.jackson.annotation.JsonCreator
import com.fasterxml.jackson.annotation.JsonProperty
import com.fasterxml.jackson.core.JsonGenerator
-import com.fasterxml.jackson.core.ObjectCodec
-import com.fasterxml.jackson.databind.JsonNode
import com.fasterxml.jackson.databind.SerializerProvider
-import com.fasterxml.jackson.databind.annotation.JsonDeserialize
import com.fasterxml.jackson.databind.annotation.JsonSerialize
-import com.fasterxml.jackson.module.kotlin.jacksonTypeRef
-import com.openai.core.BaseDeserializer
import com.openai.core.BaseSerializer
import com.openai.core.Enum
import com.openai.core.ExcludeMissing
@@ -21,7 +16,6 @@ import com.openai.core.JsonField
import com.openai.core.JsonValue
import com.openai.core.MultipartField
import com.openai.core.Params
-import com.openai.core.allMaxBy
import com.openai.core.checkRequired
import com.openai.core.getOrThrow
import com.openai.core.http.Headers
@@ -1729,7 +1723,6 @@ private constructor(
* For `dall-e-2`, you can only provide one image, and it should be a square `png` file less
* than 4MB.
*/
- @JsonDeserialize(using = Image.Deserializer::class)
@JsonSerialize(using = Image.Serializer::class)
class Image
private constructor(
@@ -1784,25 +1777,6 @@ private constructor(
false
}
- /**
- * Returns a score indicating how many valid values are contained in this object
- * recursively.
- *
- * Used for best match union deserialization.
- */
- @JvmSynthetic
- internal fun validity(): Int =
- accept(
- object : Visitor {
- override fun visitInputStream(inputStream: InputStream) = 1
-
- override fun visitInputStreams(inputStreams: List) =
- inputStreams.size
-
- override fun unknown(json: JsonValue?) = 0
- }
- )
-
override fun equals(other: Any?): Boolean {
if (this === other) {
return true
@@ -1855,36 +1829,6 @@ private constructor(
}
}
- internal class Deserializer : BaseDeserializer(Image::class) {
-
- override fun ObjectCodec.deserialize(node: JsonNode): Image {
- val json = JsonValue.fromJsonNode(node)
-
- val bestMatches =
- sequenceOf(
- tryDeserialize(node, jacksonTypeRef())?.let {
- Image(inputStream = it, _json = json)
- },
- tryDeserialize(node, jacksonTypeRef>())?.let {
- Image(inputStreams = it, _json = json)
- },
- )
- .filterNotNull()
- .allMaxBy { it.validity() }
- .toList()
- return when (bestMatches.size) {
- // This can happen if what we're deserializing is completely incompatible with
- // all the possible variants (e.g. deserializing from object).
- 0 -> Image(_json = json)
- 1 -> bestMatches.single()
- // If there's more than one match with the highest validity, then use the first
- // completely valid match, or simply the first match if none are completely
- // valid.
- else -> bestMatches.firstOrNull { it.isValid() } ?: bestMatches.first()
- }
- }
- }
-
internal class Serializer : BaseSerializer(Image::class) {
override fun serialize(
diff --git a/openai-java-core/src/main/kotlin/com/openai/models/realtime/InputAudioBufferTimeoutTriggered.kt b/openai-java-core/src/main/kotlin/com/openai/models/realtime/InputAudioBufferTimeoutTriggered.kt
index f42c519a..393326bc 100644
--- a/openai-java-core/src/main/kotlin/com/openai/models/realtime/InputAudioBufferTimeoutTriggered.kt
+++ b/openai-java-core/src/main/kotlin/com/openai/models/realtime/InputAudioBufferTimeoutTriggered.kt
@@ -15,7 +15,21 @@ import com.openai.errors.OpenAIInvalidDataException
import java.util.Collections
import java.util.Objects
-/** Returned when the server VAD timeout is triggered for the input audio buffer. */
+/**
+ * Returned when the Server VAD timeout is triggered for the input audio buffer. This is configured
+ * with `idle_timeout_ms` in the `turn_detection` settings of the session, and it indicates that
+ * there hasn't been any speech detected for the configured duration.
+ *
+ * The `audio_start_ms` and `audio_end_ms` fields indicate the segment of audio after the last model
+ * response up to the triggering time, as an offset from the beginning of audio written to the input
+ * audio buffer. This means it demarcates the segment of audio that was silent and the difference
+ * between the start and end values will roughly match the configured timeout.
+ *
+ * The empty audio will be committed to the conversation as an `input_audio` item (there will be a
+ * `input_audio_buffer.committed` event) and a model response will be generated. There may be speech
+ * that didn't trigger VAD but is still detected by the model, so the model may respond with
+ * something relevant to the conversation or a prompt to continue speaking.
+ */
class InputAudioBufferTimeoutTriggered
private constructor(
private val audioEndMs: JsonField,
@@ -40,7 +54,8 @@ private constructor(
) : this(audioEndMs, audioStartMs, eventId, itemId, type, mutableMapOf())
/**
- * Millisecond offset where speech ended within the buffered audio.
+ * Millisecond offset of audio written to the input audio buffer at the time the timeout was
+ * triggered.
*
* @throws OpenAIInvalidDataException if the JSON field has an unexpected type or is
* unexpectedly missing or null (e.g. if the server responded with an unexpected value).
@@ -48,7 +63,8 @@ private constructor(
fun audioEndMs(): Long = audioEndMs.getRequired("audio_end_ms")
/**
- * Millisecond offset where speech started within the buffered audio.
+ * Millisecond offset of audio written to the input audio buffer that was after the playback
+ * time of the last model response.
*
* @throws OpenAIInvalidDataException if the JSON field has an unexpected type or is
* unexpectedly missing or null (e.g. if the server responded with an unexpected value).
@@ -165,7 +181,10 @@ private constructor(
inputAudioBufferTimeoutTriggered.additionalProperties.toMutableMap()
}
- /** Millisecond offset where speech ended within the buffered audio. */
+ /**
+ * Millisecond offset of audio written to the input audio buffer at the time the timeout was
+ * triggered.
+ */
fun audioEndMs(audioEndMs: Long) = audioEndMs(JsonField.of(audioEndMs))
/**
@@ -176,7 +195,10 @@ private constructor(
*/
fun audioEndMs(audioEndMs: JsonField) = apply { this.audioEndMs = audioEndMs }
- /** Millisecond offset where speech started within the buffered audio. */
+ /**
+ * Millisecond offset of audio written to the input audio buffer that was after the playback
+ * time of the last model response.
+ */
fun audioStartMs(audioStartMs: Long) = audioStartMs(JsonField.of(audioStartMs))
/**
diff --git a/openai-java-core/src/main/kotlin/com/openai/models/realtime/RealtimeAudioConfigInput.kt b/openai-java-core/src/main/kotlin/com/openai/models/realtime/RealtimeAudioConfigInput.kt
index 3f72a49f..accf1636 100644
--- a/openai-java-core/src/main/kotlin/com/openai/models/realtime/RealtimeAudioConfigInput.kt
+++ b/openai-java-core/src/main/kotlin/com/openai/models/realtime/RealtimeAudioConfigInput.kt
@@ -76,14 +76,16 @@ private constructor(
/**
* Configuration for turn detection, ether Server VAD or Semantic VAD. This can be set to `null`
- * to turn off, in which case the client must manually trigger model response. Server VAD means
- * that the model will detect the start and end of speech based on audio volume and respond at
- * the end of user speech. Semantic VAD is more advanced and uses a turn detection model (in
- * conjunction with VAD) to semantically estimate whether the user has finished speaking, then
- * dynamically sets a timeout based on this probability. For example, if user audio trails off
- * with "uhhm", the model will score a low probability of turn end and wait longer for the user
- * to continue speaking. This can be useful for more natural conversations, but may have a
- * higher latency.
+ * to turn off, in which case the client must manually trigger model response.
+ *
+ * Server VAD means that the model will detect the start and end of speech based on audio volume
+ * and respond at the end of user speech.
+ *
+ * Semantic VAD is more advanced and uses a turn detection model (in conjunction with VAD) to
+ * semantically estimate whether the user has finished speaking, then dynamically sets a timeout
+ * based on this probability. For example, if user audio trails off with "uhhm", the model will
+ * score a low probability of turn end and wait longer for the user to continue speaking. This
+ * can be useful for more natural conversations, but may have a higher latency.
*
* @throws OpenAIInvalidDataException if the JSON field has an unexpected type (e.g. if the
* server responded with an unexpected value).
@@ -230,17 +232,24 @@ private constructor(
/**
* Configuration for turn detection, ether Server VAD or Semantic VAD. This can be set to
- * `null` to turn off, in which case the client must manually trigger model response. Server
- * VAD means that the model will detect the start and end of speech based on audio volume
- * and respond at the end of user speech. Semantic VAD is more advanced and uses a turn
- * detection model (in conjunction with VAD) to semantically estimate whether the user has
- * finished speaking, then dynamically sets a timeout based on this probability. For
- * example, if user audio trails off with "uhhm", the model will score a low probability of
- * turn end and wait longer for the user to continue speaking. This can be useful for more
- * natural conversations, but may have a higher latency.
+ * `null` to turn off, in which case the client must manually trigger model response.
+ *
+ * Server VAD means that the model will detect the start and end of speech based on audio
+ * volume and respond at the end of user speech.
+ *
+ * Semantic VAD is more advanced and uses a turn detection model (in conjunction with VAD)
+ * to semantically estimate whether the user has finished speaking, then dynamically sets a
+ * timeout based on this probability. For example, if user audio trails off with "uhhm", the
+ * model will score a low probability of turn end and wait longer for the user to continue
+ * speaking. This can be useful for more natural conversations, but may have a higher
+ * latency.
*/
- fun turnDetection(turnDetection: RealtimeAudioInputTurnDetection) =
- turnDetection(JsonField.of(turnDetection))
+ fun turnDetection(turnDetection: RealtimeAudioInputTurnDetection?) =
+ turnDetection(JsonField.ofNullable(turnDetection))
+
+ /** Alias for calling [Builder.turnDetection] with `turnDetection.orElse(null)`. */
+ fun turnDetection(turnDetection: Optional) =
+ turnDetection(turnDetection.getOrNull())
/**
* Sets [Builder.turnDetection] to an arbitrary JSON value.
@@ -253,6 +262,20 @@ private constructor(
this.turnDetection = turnDetection
}
+ /**
+ * Alias for calling [turnDetection] with
+ * `RealtimeAudioInputTurnDetection.ofServerVad(serverVad)`.
+ */
+ fun turnDetection(serverVad: RealtimeAudioInputTurnDetection.ServerVad) =
+ turnDetection(RealtimeAudioInputTurnDetection.ofServerVad(serverVad))
+
+ /**
+ * Alias for calling [turnDetection] with
+ * `RealtimeAudioInputTurnDetection.ofSemanticVad(semanticVad)`.
+ */
+ fun turnDetection(semanticVad: RealtimeAudioInputTurnDetection.SemanticVad) =
+ turnDetection(RealtimeAudioInputTurnDetection.ofSemanticVad(semanticVad))
+
fun additionalProperties(additionalProperties: Map) = apply {
this.additionalProperties.clear()
putAllAdditionalProperties(additionalProperties)
diff --git a/openai-java-core/src/main/kotlin/com/openai/models/realtime/RealtimeAudioInputTurnDetection.kt b/openai-java-core/src/main/kotlin/com/openai/models/realtime/RealtimeAudioInputTurnDetection.kt
index 63985f43..8c403670 100644
--- a/openai-java-core/src/main/kotlin/com/openai/models/realtime/RealtimeAudioInputTurnDetection.kt
+++ b/openai-java-core/src/main/kotlin/com/openai/models/realtime/RealtimeAudioInputTurnDetection.kt
@@ -6,11 +6,21 @@ import com.fasterxml.jackson.annotation.JsonAnyGetter
import com.fasterxml.jackson.annotation.JsonAnySetter
import com.fasterxml.jackson.annotation.JsonCreator
import com.fasterxml.jackson.annotation.JsonProperty
+import com.fasterxml.jackson.core.JsonGenerator
+import com.fasterxml.jackson.core.ObjectCodec
+import com.fasterxml.jackson.databind.JsonNode
+import com.fasterxml.jackson.databind.SerializerProvider
+import com.fasterxml.jackson.databind.annotation.JsonDeserialize
+import com.fasterxml.jackson.databind.annotation.JsonSerialize
+import com.fasterxml.jackson.module.kotlin.jacksonTypeRef
+import com.openai.core.BaseDeserializer
+import com.openai.core.BaseSerializer
import com.openai.core.Enum
import com.openai.core.ExcludeMissing
import com.openai.core.JsonField
import com.openai.core.JsonMissing
import com.openai.core.JsonValue
+import com.openai.core.getOrThrow
import com.openai.errors.OpenAIInvalidDataException
import java.util.Collections
import java.util.Objects
@@ -19,584 +29,641 @@ import kotlin.jvm.optionals.getOrNull
/**
* Configuration for turn detection, ether Server VAD or Semantic VAD. This can be set to `null` to
- * turn off, in which case the client must manually trigger model response. Server VAD means that
- * the model will detect the start and end of speech based on audio volume and respond at the end of
- * user speech. Semantic VAD is more advanced and uses a turn detection model (in conjunction with
- * VAD) to semantically estimate whether the user has finished speaking, then dynamically sets a
- * timeout based on this probability. For example, if user audio trails off with "uhhm", the model
- * will score a low probability of turn end and wait longer for the user to continue speaking. This
- * can be useful for more natural conversations, but may have a higher latency.
+ * turn off, in which case the client must manually trigger model response.
+ *
+ * Server VAD means that the model will detect the start and end of speech based on audio volume and
+ * respond at the end of user speech.
+ *
+ * Semantic VAD is more advanced and uses a turn detection model (in conjunction with VAD) to
+ * semantically estimate whether the user has finished speaking, then dynamically sets a timeout
+ * based on this probability. For example, if user audio trails off with "uhhm", the model will
+ * score a low probability of turn end and wait longer for the user to continue speaking. This can
+ * be useful for more natural conversations, but may have a higher latency.
*/
+@JsonDeserialize(using = RealtimeAudioInputTurnDetection.Deserializer::class)
+@JsonSerialize(using = RealtimeAudioInputTurnDetection.Serializer::class)
class RealtimeAudioInputTurnDetection
private constructor(
- private val createResponse: JsonField,
- private val eagerness: JsonField,
- private val idleTimeoutMs: JsonField,
- private val interruptResponse: JsonField,
- private val prefixPaddingMs: JsonField,
- private val silenceDurationMs: JsonField,
- private val threshold: JsonField,
- private val type: JsonField,
- private val additionalProperties: MutableMap,
+ private val serverVad: ServerVad? = null,
+ private val semanticVad: SemanticVad? = null,
+ private val _json: JsonValue? = null,
) {
- @JsonCreator
- private constructor(
- @JsonProperty("create_response")
- @ExcludeMissing
- createResponse: JsonField = JsonMissing.of(),
- @JsonProperty("eagerness")
- @ExcludeMissing
- eagerness: JsonField = JsonMissing.of(),
- @JsonProperty("idle_timeout_ms")
- @ExcludeMissing
- idleTimeoutMs: JsonField = JsonMissing.of(),
- @JsonProperty("interrupt_response")
- @ExcludeMissing
- interruptResponse: JsonField = JsonMissing.of(),
- @JsonProperty("prefix_padding_ms")
- @ExcludeMissing
- prefixPaddingMs: JsonField = JsonMissing.of(),
- @JsonProperty("silence_duration_ms")
- @ExcludeMissing
- silenceDurationMs: JsonField = JsonMissing.of(),
- @JsonProperty("threshold") @ExcludeMissing threshold: JsonField = JsonMissing.of(),
- @JsonProperty("type") @ExcludeMissing type: JsonField = JsonMissing.of(),
- ) : this(
- createResponse,
- eagerness,
- idleTimeoutMs,
- interruptResponse,
- prefixPaddingMs,
- silenceDurationMs,
- threshold,
- type,
- mutableMapOf(),
- )
-
/**
- * Whether or not to automatically generate a response when a VAD stop event occurs.
- *
- * @throws OpenAIInvalidDataException if the JSON field has an unexpected type (e.g. if the
- * server responded with an unexpected value).
+ * Server-side voice activity detection (VAD) which flips on when user speech is detected and
+ * off after a period of silence.
*/
- fun createResponse(): Optional = createResponse.getOptional("create_response")
+ fun serverVad(): Optional = Optional.ofNullable(serverVad)
/**
- * Used only for `semantic_vad` mode. The eagerness of the model to respond. `low` will wait
- * longer for the user to continue speaking, `high` will respond more quickly. `auto` is the
- * default and is equivalent to `medium`. `low`, `medium`, and `high` have max timeouts of 8s,
- * 4s, and 2s respectively.
- *
- * @throws OpenAIInvalidDataException if the JSON field has an unexpected type (e.g. if the
- * server responded with an unexpected value).
+ * Server-side semantic turn detection which uses a model to determine when the user has
+ * finished speaking.
*/
- fun eagerness(): Optional = eagerness.getOptional("eagerness")
+ fun semanticVad(): Optional = Optional.ofNullable(semanticVad)
- /**
- * Optional idle timeout after which turn detection will auto-timeout when no additional audio
- * is received and emits a `timeout_triggered` event.
- *
- * @throws OpenAIInvalidDataException if the JSON field has an unexpected type (e.g. if the
- * server responded with an unexpected value).
- */
- fun idleTimeoutMs(): Optional = idleTimeoutMs.getOptional("idle_timeout_ms")
+ fun isServerVad(): Boolean = serverVad != null
- /**
- * Whether or not to automatically interrupt any ongoing response with output to the default
- * conversation (i.e. `conversation` of `auto`) when a VAD start event occurs.
- *
- * @throws OpenAIInvalidDataException if the JSON field has an unexpected type (e.g. if the
- * server responded with an unexpected value).
- */
- fun interruptResponse(): Optional = interruptResponse.getOptional("interrupt_response")
+ fun isSemanticVad(): Boolean = semanticVad != null
/**
- * Used only for `server_vad` mode. Amount of audio to include before the VAD detected speech
- * (in milliseconds). Defaults to 300ms.
- *
- * @throws OpenAIInvalidDataException if the JSON field has an unexpected type (e.g. if the
- * server responded with an unexpected value).
+ * Server-side voice activity detection (VAD) which flips on when user speech is detected and
+ * off after a period of silence.
*/
- fun prefixPaddingMs(): Optional = prefixPaddingMs.getOptional("prefix_padding_ms")
+ fun asServerVad(): ServerVad = serverVad.getOrThrow("serverVad")
/**
- * Used only for `server_vad` mode. Duration of silence to detect speech stop (in milliseconds).
- * Defaults to 500ms. With shorter values the model will respond more quickly, but may jump in
- * on short pauses from the user.
- *
- * @throws OpenAIInvalidDataException if the JSON field has an unexpected type (e.g. if the
- * server responded with an unexpected value).
+ * Server-side semantic turn detection which uses a model to determine when the user has
+ * finished speaking.
*/
- fun silenceDurationMs(): Optional = silenceDurationMs.getOptional("silence_duration_ms")
+ fun asSemanticVad(): SemanticVad = semanticVad.getOrThrow("semanticVad")
- /**
- * Used only for `server_vad` mode. Activation threshold for VAD (0.0 to 1.0), this defaults to
- * 0.5. A higher threshold will require louder audio to activate the model, and thus might
- * perform better in noisy environments.
- *
- * @throws OpenAIInvalidDataException if the JSON field has an unexpected type (e.g. if the
- * server responded with an unexpected value).
- */
- fun threshold(): Optional = threshold.getOptional("threshold")
+ fun _json(): Optional = Optional.ofNullable(_json)
- /**
- * Type of turn detection.
- *
- * @throws OpenAIInvalidDataException if the JSON field has an unexpected type (e.g. if the
- * server responded with an unexpected value).
- */
- fun type(): Optional = type.getOptional("type")
+ fun accept(visitor: Visitor): T =
+ when {
+ serverVad != null -> visitor.visitServerVad(serverVad)
+ semanticVad != null -> visitor.visitSemanticVad(semanticVad)
+ else -> visitor.unknown(_json)
+ }
- /**
- * Returns the raw JSON value of [createResponse].
- *
- * Unlike [createResponse], this method doesn't throw if the JSON field has an unexpected type.
- */
- @JsonProperty("create_response")
- @ExcludeMissing
- fun _createResponse(): JsonField = createResponse
+ private var validated: Boolean = false
- /**
- * Returns the raw JSON value of [eagerness].
- *
- * Unlike [eagerness], this method doesn't throw if the JSON field has an unexpected type.
- */
- @JsonProperty("eagerness") @ExcludeMissing fun _eagerness(): JsonField = eagerness
+ fun validate(): RealtimeAudioInputTurnDetection = apply {
+ if (validated) {
+ return@apply
+ }
- /**
- * Returns the raw JSON value of [idleTimeoutMs].
- *
- * Unlike [idleTimeoutMs], this method doesn't throw if the JSON field has an unexpected type.
- */
- @JsonProperty("idle_timeout_ms")
- @ExcludeMissing
- fun _idleTimeoutMs(): JsonField = idleTimeoutMs
+ accept(
+ object : Visitor {
+ override fun visitServerVad(serverVad: ServerVad) {
+ serverVad.validate()
+ }
- /**
- * Returns the raw JSON value of [interruptResponse].
- *
- * Unlike [interruptResponse], this method doesn't throw if the JSON field has an unexpected
- * type.
- */
- @JsonProperty("interrupt_response")
- @ExcludeMissing
- fun _interruptResponse(): JsonField = interruptResponse
+ override fun visitSemanticVad(semanticVad: SemanticVad) {
+ semanticVad.validate()
+ }
+ }
+ )
+ validated = true
+ }
- /**
- * Returns the raw JSON value of [prefixPaddingMs].
- *
- * Unlike [prefixPaddingMs], this method doesn't throw if the JSON field has an unexpected type.
- */
- @JsonProperty("prefix_padding_ms")
- @ExcludeMissing
- fun _prefixPaddingMs(): JsonField = prefixPaddingMs
+ fun isValid(): Boolean =
+ try {
+ validate()
+ true
+ } catch (e: OpenAIInvalidDataException) {
+ false
+ }
/**
- * Returns the raw JSON value of [silenceDurationMs].
+ * Returns a score indicating how many valid values are contained in this object recursively.
*
- * Unlike [silenceDurationMs], this method doesn't throw if the JSON field has an unexpected
- * type.
+ * Used for best match union deserialization.
*/
- @JsonProperty("silence_duration_ms")
- @ExcludeMissing
- fun _silenceDurationMs(): JsonField = silenceDurationMs
+ @JvmSynthetic
+ internal fun validity(): Int =
+ accept(
+ object : Visitor {
+ override fun visitServerVad(serverVad: ServerVad) = serverVad.validity()
- /**
- * Returns the raw JSON value of [threshold].
- *
- * Unlike [threshold], this method doesn't throw if the JSON field has an unexpected type.
- */
- @JsonProperty("threshold") @ExcludeMissing fun _threshold(): JsonField = threshold
+ override fun visitSemanticVad(semanticVad: SemanticVad) = semanticVad.validity()
- /**
- * Returns the raw JSON value of [type].
- *
- * Unlike [type], this method doesn't throw if the JSON field has an unexpected type.
- */
- @JsonProperty("type") @ExcludeMissing fun _type(): JsonField = type
+ override fun unknown(json: JsonValue?) = 0
+ }
+ )
+
+ override fun equals(other: Any?): Boolean {
+ if (this === other) {
+ return true
+ }
- @JsonAnySetter
- private fun putAdditionalProperty(key: String, value: JsonValue) {
- additionalProperties.put(key, value)
+ return other is RealtimeAudioInputTurnDetection &&
+ serverVad == other.serverVad &&
+ semanticVad == other.semanticVad
}
- @JsonAnyGetter
- @ExcludeMissing
- fun _additionalProperties(): Map =
- Collections.unmodifiableMap(additionalProperties)
+ override fun hashCode(): Int = Objects.hash(serverVad, semanticVad)
- fun toBuilder() = Builder().from(this)
+ override fun toString(): String =
+ when {
+ serverVad != null -> "RealtimeAudioInputTurnDetection{serverVad=$serverVad}"
+ semanticVad != null -> "RealtimeAudioInputTurnDetection{semanticVad=$semanticVad}"
+ _json != null -> "RealtimeAudioInputTurnDetection{_unknown=$_json}"
+ else -> throw IllegalStateException("Invalid RealtimeAudioInputTurnDetection")
+ }
companion object {
/**
- * Returns a mutable builder for constructing an instance of
- * [RealtimeAudioInputTurnDetection].
+ * Server-side voice activity detection (VAD) which flips on when user speech is detected
+ * and off after a period of silence.
*/
- @JvmStatic fun builder() = Builder()
- }
-
- /** A builder for [RealtimeAudioInputTurnDetection]. */
- class Builder internal constructor() {
+ @JvmStatic
+ fun ofServerVad(serverVad: ServerVad) =
+ RealtimeAudioInputTurnDetection(serverVad = serverVad)
- private var createResponse: JsonField = JsonMissing.of()
- private var eagerness: JsonField = JsonMissing.of()
- private var idleTimeoutMs: JsonField = JsonMissing.of()
- private var interruptResponse: JsonField = JsonMissing.of()
- private var prefixPaddingMs: JsonField = JsonMissing.of()
- private var silenceDurationMs: JsonField = JsonMissing.of()
- private var threshold: JsonField = JsonMissing.of()
- private var type: JsonField = JsonMissing.of()
- private var additionalProperties: MutableMap = mutableMapOf()
+ /**
+ * Server-side semantic turn detection which uses a model to determine when the user has
+ * finished speaking.
+ */
+ @JvmStatic
+ fun ofSemanticVad(semanticVad: SemanticVad) =
+ RealtimeAudioInputTurnDetection(semanticVad = semanticVad)
+ }
- @JvmSynthetic
- internal fun from(realtimeAudioInputTurnDetection: RealtimeAudioInputTurnDetection) =
- apply {
- createResponse = realtimeAudioInputTurnDetection.createResponse
- eagerness = realtimeAudioInputTurnDetection.eagerness
- idleTimeoutMs = realtimeAudioInputTurnDetection.idleTimeoutMs
- interruptResponse = realtimeAudioInputTurnDetection.interruptResponse
- prefixPaddingMs = realtimeAudioInputTurnDetection.prefixPaddingMs
- silenceDurationMs = realtimeAudioInputTurnDetection.silenceDurationMs
- threshold = realtimeAudioInputTurnDetection.threshold
- type = realtimeAudioInputTurnDetection.type
- additionalProperties =
- realtimeAudioInputTurnDetection.additionalProperties.toMutableMap()
- }
-
- /** Whether or not to automatically generate a response when a VAD stop event occurs. */
- fun createResponse(createResponse: Boolean) = createResponse(JsonField.of(createResponse))
+ /**
+ * An interface that defines how to map each variant of [RealtimeAudioInputTurnDetection] to a
+ * value of type [T].
+ */
+ interface Visitor {
/**
- * Sets [Builder.createResponse] to an arbitrary JSON value.
- *
- * You should usually call [Builder.createResponse] with a well-typed [Boolean] value
- * instead. This method is primarily for setting the field to an undocumented or not yet
- * supported value.
+ * Server-side voice activity detection (VAD) which flips on when user speech is detected
+ * and off after a period of silence.
*/
- fun createResponse(createResponse: JsonField) = apply {
- this.createResponse = createResponse
- }
+ fun visitServerVad(serverVad: ServerVad): T
/**
- * Used only for `semantic_vad` mode. The eagerness of the model to respond. `low` will wait
- * longer for the user to continue speaking, `high` will respond more quickly. `auto` is the
- * default and is equivalent to `medium`. `low`, `medium`, and `high` have max timeouts of
- * 8s, 4s, and 2s respectively.
+ * Server-side semantic turn detection which uses a model to determine when the user has
+ * finished speaking.
*/
- fun eagerness(eagerness: Eagerness) = eagerness(JsonField.of(eagerness))
+ fun visitSemanticVad(semanticVad: SemanticVad): T
/**
- * Sets [Builder.eagerness] to an arbitrary JSON value.
+ * Maps an unknown variant of [RealtimeAudioInputTurnDetection] to a value of type [T].
+ *
+ * An instance of [RealtimeAudioInputTurnDetection] can contain an unknown variant if it was
+ * deserialized from data that doesn't match any known variant. For example, if the SDK is
+ * on an older version than the API, then the API may respond with new variants that the SDK
+ * is unaware of.
*
- * You should usually call [Builder.eagerness] with a well-typed [Eagerness] value instead.
- * This method is primarily for setting the field to an undocumented or not yet supported
- * value.
+ * @throws OpenAIInvalidDataException in the default implementation.
*/
- fun eagerness(eagerness: JsonField) = apply { this.eagerness = eagerness }
+ fun unknown(json: JsonValue?): T {
+ throw OpenAIInvalidDataException("Unknown RealtimeAudioInputTurnDetection: $json")
+ }
+ }
+
+ internal class Deserializer :
+ BaseDeserializer(RealtimeAudioInputTurnDetection::class) {
+
+ override fun ObjectCodec.deserialize(node: JsonNode): RealtimeAudioInputTurnDetection {
+ val json = JsonValue.fromJsonNode(node)
+ val type = json.asObject().getOrNull()?.get("type")?.asString()?.getOrNull()
+
+ when (type) {
+ "server_vad" -> {
+ return tryDeserialize(node, jacksonTypeRef())?.let {
+ RealtimeAudioInputTurnDetection(serverVad = it, _json = json)
+ } ?: RealtimeAudioInputTurnDetection(_json = json)
+ }
+ "semantic_vad" -> {
+ return tryDeserialize(node, jacksonTypeRef())?.let {
+ RealtimeAudioInputTurnDetection(semanticVad = it, _json = json)
+ } ?: RealtimeAudioInputTurnDetection(_json = json)
+ }
+ }
+
+ return RealtimeAudioInputTurnDetection(_json = json)
+ }
+ }
+
+ internal class Serializer :
+ BaseSerializer(RealtimeAudioInputTurnDetection::class) {
+
+ override fun serialize(
+ value: RealtimeAudioInputTurnDetection,
+ generator: JsonGenerator,
+ provider: SerializerProvider,
+ ) {
+ when {
+ value.serverVad != null -> generator.writeObject(value.serverVad)
+ value.semanticVad != null -> generator.writeObject(value.semanticVad)
+ value._json != null -> generator.writeObject(value._json)
+ else -> throw IllegalStateException("Invalid RealtimeAudioInputTurnDetection")
+ }
+ }
+ }
+
+ /**
+ * Server-side voice activity detection (VAD) which flips on when user speech is detected and
+ * off after a period of silence.
+ */
+ class ServerVad
+ private constructor(
+ private val type: JsonValue,
+ private val createResponse: JsonField,
+ private val idleTimeoutMs: JsonField,
+ private val interruptResponse: JsonField,
+ private val prefixPaddingMs: JsonField,
+ private val silenceDurationMs: JsonField,
+ private val threshold: JsonField,
+ private val additionalProperties: MutableMap,
+ ) {
+
+ @JsonCreator
+ private constructor(
+ @JsonProperty("type") @ExcludeMissing type: JsonValue = JsonMissing.of(),
+ @JsonProperty("create_response")
+ @ExcludeMissing
+ createResponse: JsonField = JsonMissing.of(),
+ @JsonProperty("idle_timeout_ms")
+ @ExcludeMissing
+ idleTimeoutMs: JsonField = JsonMissing.of(),
+ @JsonProperty("interrupt_response")
+ @ExcludeMissing
+ interruptResponse: JsonField = JsonMissing.of(),
+ @JsonProperty("prefix_padding_ms")
+ @ExcludeMissing
+ prefixPaddingMs: JsonField = JsonMissing.of(),
+ @JsonProperty("silence_duration_ms")
+ @ExcludeMissing
+ silenceDurationMs: JsonField = JsonMissing.of(),
+ @JsonProperty("threshold")
+ @ExcludeMissing
+ threshold: JsonField = JsonMissing.of(),
+ ) : this(
+ type,
+ createResponse,
+ idleTimeoutMs,
+ interruptResponse,
+ prefixPaddingMs,
+ silenceDurationMs,
+ threshold,
+ mutableMapOf(),
+ )
/**
- * Optional idle timeout after which turn detection will auto-timeout when no additional
- * audio is received and emits a `timeout_triggered` event.
+ * Type of turn detection, `server_vad` to turn on simple Server VAD.
+ *
+ * Expected to always return the following:
+ * ```java
+ * JsonValue.from("server_vad")
+ * ```
+ *
+ * However, this method can be useful for debugging and logging (e.g. if the server
+ * responded with an unexpected value).
*/
- fun idleTimeoutMs(idleTimeoutMs: Long?) = idleTimeoutMs(JsonField.ofNullable(idleTimeoutMs))
+ @JsonProperty("type") @ExcludeMissing fun _type(): JsonValue = type
/**
- * Alias for [Builder.idleTimeoutMs].
+ * Whether or not to automatically generate a response when a VAD stop event occurs.
*
- * This unboxed primitive overload exists for backwards compatibility.
+ * @throws OpenAIInvalidDataException if the JSON field has an unexpected type (e.g. if the
+ * server responded with an unexpected value).
*/
- fun idleTimeoutMs(idleTimeoutMs: Long) = idleTimeoutMs(idleTimeoutMs as Long?)
-
- /** Alias for calling [Builder.idleTimeoutMs] with `idleTimeoutMs.orElse(null)`. */
- fun idleTimeoutMs(idleTimeoutMs: Optional) = idleTimeoutMs(idleTimeoutMs.getOrNull())
+ fun createResponse(): Optional = createResponse.getOptional("create_response")
/**
- * Sets [Builder.idleTimeoutMs] to an arbitrary JSON value.
+ * Optional timeout after which a model response will be triggered automatically. This is
+ * useful for situations in which a long pause from the user is unexpected, such as a phone
+ * call. The model will effectively prompt the user to continue the conversation based on
+ * the current context.
*
- * You should usually call [Builder.idleTimeoutMs] with a well-typed [Long] value instead.
- * This method is primarily for setting the field to an undocumented or not yet supported
- * value.
+ * The timeout value will be applied after the last model response's audio has finished
+ * playing, i.e. it's set to the `response.done` time plus audio playback duration.
+ *
+ * An `input_audio_buffer.timeout_triggered` event (plus events associated with the
+ * Response) will be emitted when the timeout is reached. Idle timeout is currently only
+ * supported for `server_vad` mode.
+ *
+ * @throws OpenAIInvalidDataException if the JSON field has an unexpected type (e.g. if the
+ * server responded with an unexpected value).
*/
- fun idleTimeoutMs(idleTimeoutMs: JsonField) = apply {
- this.idleTimeoutMs = idleTimeoutMs
- }
+ fun idleTimeoutMs(): Optional = idleTimeoutMs.getOptional("idle_timeout_ms")
/**
* Whether or not to automatically interrupt any ongoing response with output to the default
* conversation (i.e. `conversation` of `auto`) when a VAD start event occurs.
- */
- fun interruptResponse(interruptResponse: Boolean) =
- interruptResponse(JsonField.of(interruptResponse))
-
- /**
- * Sets [Builder.interruptResponse] to an arbitrary JSON value.
*
- * You should usually call [Builder.interruptResponse] with a well-typed [Boolean] value
- * instead. This method is primarily for setting the field to an undocumented or not yet
- * supported value.
+ * @throws OpenAIInvalidDataException if the JSON field has an unexpected type (e.g. if the
+ * server responded with an unexpected value).
*/
- fun interruptResponse(interruptResponse: JsonField) = apply {
- this.interruptResponse = interruptResponse
- }
+ fun interruptResponse(): Optional =
+ interruptResponse.getOptional("interrupt_response")
/**
* Used only for `server_vad` mode. Amount of audio to include before the VAD detected
* speech (in milliseconds). Defaults to 300ms.
- */
- fun prefixPaddingMs(prefixPaddingMs: Long) = prefixPaddingMs(JsonField.of(prefixPaddingMs))
-
- /**
- * Sets [Builder.prefixPaddingMs] to an arbitrary JSON value.
*
- * You should usually call [Builder.prefixPaddingMs] with a well-typed [Long] value instead.
- * This method is primarily for setting the field to an undocumented or not yet supported
- * value.
+ * @throws OpenAIInvalidDataException if the JSON field has an unexpected type (e.g. if the
+ * server responded with an unexpected value).
*/
- fun prefixPaddingMs(prefixPaddingMs: JsonField) = apply {
- this.prefixPaddingMs = prefixPaddingMs
- }
+ fun prefixPaddingMs(): Optional = prefixPaddingMs.getOptional("prefix_padding_ms")
/**
* Used only for `server_vad` mode. Duration of silence to detect speech stop (in
* milliseconds). Defaults to 500ms. With shorter values the model will respond more
* quickly, but may jump in on short pauses from the user.
- */
- fun silenceDurationMs(silenceDurationMs: Long) =
- silenceDurationMs(JsonField.of(silenceDurationMs))
-
- /**
- * Sets [Builder.silenceDurationMs] to an arbitrary JSON value.
*
- * You should usually call [Builder.silenceDurationMs] with a well-typed [Long] value
- * instead. This method is primarily for setting the field to an undocumented or not yet
- * supported value.
+ * @throws OpenAIInvalidDataException if the JSON field has an unexpected type (e.g. if the
+ * server responded with an unexpected value).
*/
- fun silenceDurationMs(silenceDurationMs: JsonField) = apply {
- this.silenceDurationMs = silenceDurationMs
- }
+ fun silenceDurationMs(): Optional =
+ silenceDurationMs.getOptional("silence_duration_ms")
/**
* Used only for `server_vad` mode. Activation threshold for VAD (0.0 to 1.0), this defaults
* to 0.5. A higher threshold will require louder audio to activate the model, and thus
* might perform better in noisy environments.
+ *
+ * @throws OpenAIInvalidDataException if the JSON field has an unexpected type (e.g. if the
+ * server responded with an unexpected value).
*/
- fun threshold(threshold: Double) = threshold(JsonField.of(threshold))
+ fun threshold(): Optional = threshold.getOptional("threshold")
+
+ /**
+ * Returns the raw JSON value of [createResponse].
+ *
+ * Unlike [createResponse], this method doesn't throw if the JSON field has an unexpected
+ * type.
+ */
+ @JsonProperty("create_response")
+ @ExcludeMissing
+ fun _createResponse(): JsonField = createResponse
/**
- * Sets [Builder.threshold] to an arbitrary JSON value.
+ * Returns the raw JSON value of [idleTimeoutMs].
*
- * You should usually call [Builder.threshold] with a well-typed [Double] value instead.
- * This method is primarily for setting the field to an undocumented or not yet supported
- * value.
+ * Unlike [idleTimeoutMs], this method doesn't throw if the JSON field has an unexpected
+ * type.
*/
- fun threshold(threshold: JsonField) = apply { this.threshold = threshold }
+ @JsonProperty("idle_timeout_ms")
+ @ExcludeMissing
+ fun _idleTimeoutMs(): JsonField = idleTimeoutMs
- /** Type of turn detection. */
- fun type(type: Type) = type(JsonField.of(type))
+ /**
+ * Returns the raw JSON value of [interruptResponse].
+ *
+ * Unlike [interruptResponse], this method doesn't throw if the JSON field has an unexpected
+ * type.
+ */
+ @JsonProperty("interrupt_response")
+ @ExcludeMissing
+ fun _interruptResponse(): JsonField = interruptResponse
/**
- * Sets [Builder.type] to an arbitrary JSON value.
+ * Returns the raw JSON value of [prefixPaddingMs].
*
- * You should usually call [Builder.type] with a well-typed [Type] value instead. This
- * method is primarily for setting the field to an undocumented or not yet supported value.
+ * Unlike [prefixPaddingMs], this method doesn't throw if the JSON field has an unexpected
+ * type.
*/
- fun type(type: JsonField) = apply { this.type = type }
+ @JsonProperty("prefix_padding_ms")
+ @ExcludeMissing
+ fun _prefixPaddingMs(): JsonField = prefixPaddingMs
- fun additionalProperties(additionalProperties: Map) = apply {
- this.additionalProperties.clear()
- putAllAdditionalProperties(additionalProperties)
- }
+ /**
+ * Returns the raw JSON value of [silenceDurationMs].
+ *
+ * Unlike [silenceDurationMs], this method doesn't throw if the JSON field has an unexpected
+ * type.
+ */
+ @JsonProperty("silence_duration_ms")
+ @ExcludeMissing
+ fun _silenceDurationMs(): JsonField = silenceDurationMs
- fun putAdditionalProperty(key: String, value: JsonValue) = apply {
+ /**
+ * Returns the raw JSON value of [threshold].
+ *
+ * Unlike [threshold], this method doesn't throw if the JSON field has an unexpected type.
+ */
+ @JsonProperty("threshold") @ExcludeMissing fun _threshold(): JsonField = threshold
+
+ @JsonAnySetter
+ private fun putAdditionalProperty(key: String, value: JsonValue) {
additionalProperties.put(key, value)
}
- fun putAllAdditionalProperties(additionalProperties: Map) = apply {
- this.additionalProperties.putAll(additionalProperties)
- }
+ @JsonAnyGetter
+ @ExcludeMissing
+ fun _additionalProperties(): Map =
+ Collections.unmodifiableMap(additionalProperties)
+
+ fun toBuilder() = Builder().from(this)
- fun removeAdditionalProperty(key: String) = apply { additionalProperties.remove(key) }
+ companion object {
- fun removeAllAdditionalProperties(keys: Set) = apply {
- keys.forEach(::removeAdditionalProperty)
+ /** Returns a mutable builder for constructing an instance of [ServerVad]. */
+ @JvmStatic fun builder() = Builder()
}
- /**
- * Returns an immutable instance of [RealtimeAudioInputTurnDetection].
- *
- * Further updates to this [Builder] will not mutate the returned instance.
- */
- fun build(): RealtimeAudioInputTurnDetection =
- RealtimeAudioInputTurnDetection(
- createResponse,
- eagerness,
- idleTimeoutMs,
- interruptResponse,
- prefixPaddingMs,
- silenceDurationMs,
- threshold,
- type,
- additionalProperties.toMutableMap(),
- )
- }
+ /** A builder for [ServerVad]. */
+ class Builder internal constructor() {
+
+ private var type: JsonValue = JsonValue.from("server_vad")
+ private var createResponse: JsonField = JsonMissing.of()
+ private var idleTimeoutMs: JsonField = JsonMissing.of()
+ private var interruptResponse: JsonField = JsonMissing.of()
+ private var prefixPaddingMs: JsonField = JsonMissing.of()
+ private var silenceDurationMs: JsonField = JsonMissing.of()
+ private var threshold: JsonField = JsonMissing.of()
+ private var additionalProperties: MutableMap = mutableMapOf()
+
+ @JvmSynthetic
+ internal fun from(serverVad: ServerVad) = apply {
+ type = serverVad.type
+ createResponse = serverVad.createResponse
+ idleTimeoutMs = serverVad.idleTimeoutMs
+ interruptResponse = serverVad.interruptResponse
+ prefixPaddingMs = serverVad.prefixPaddingMs
+ silenceDurationMs = serverVad.silenceDurationMs
+ threshold = serverVad.threshold
+ additionalProperties = serverVad.additionalProperties.toMutableMap()
+ }
- private var validated: Boolean = false
+ /**
+ * Sets the field to an arbitrary JSON value.
+ *
+ * It is usually unnecessary to call this method because the field defaults to the
+ * following:
+ * ```java
+ * JsonValue.from("server_vad")
+ * ```
+ *
+ * This method is primarily for setting the field to an undocumented or not yet
+ * supported value.
+ */
+ fun type(type: JsonValue) = apply { this.type = type }
- fun validate(): RealtimeAudioInputTurnDetection = apply {
- if (validated) {
- return@apply
- }
+ /** Whether or not to automatically generate a response when a VAD stop event occurs. */
+ fun createResponse(createResponse: Boolean) =
+ createResponse(JsonField.of(createResponse))
- createResponse()
- eagerness().ifPresent { it.validate() }
- idleTimeoutMs()
- interruptResponse()
- prefixPaddingMs()
- silenceDurationMs()
- threshold()
- type().ifPresent { it.validate() }
- validated = true
- }
+ /**
+ * Sets [Builder.createResponse] to an arbitrary JSON value.
+ *
+ * You should usually call [Builder.createResponse] with a well-typed [Boolean] value
+ * instead. This method is primarily for setting the field to an undocumented or not yet
+ * supported value.
+ */
+ fun createResponse(createResponse: JsonField) = apply {
+ this.createResponse = createResponse
+ }
- fun isValid(): Boolean =
- try {
- validate()
- true
- } catch (e: OpenAIInvalidDataException) {
- false
- }
+ /**
+ * Optional timeout after which a model response will be triggered automatically. This
+ * is useful for situations in which a long pause from the user is unexpected, such as a
+ * phone call. The model will effectively prompt the user to continue the conversation
+ * based on the current context.
+ *
+ * The timeout value will be applied after the last model response's audio has finished
+ * playing, i.e. it's set to the `response.done` time plus audio playback duration.
+ *
+ * An `input_audio_buffer.timeout_triggered` event (plus events associated with the
+ * Response) will be emitted when the timeout is reached. Idle timeout is currently only
+ * supported for `server_vad` mode.
+ */
+ fun idleTimeoutMs(idleTimeoutMs: Long?) =
+ idleTimeoutMs(JsonField.ofNullable(idleTimeoutMs))
- /**
- * Returns a score indicating how many valid values are contained in this object recursively.
- *
- * Used for best match union deserialization.
- */
- @JvmSynthetic
- internal fun validity(): Int =
- (if (createResponse.asKnown().isPresent) 1 else 0) +
- (eagerness.asKnown().getOrNull()?.validity() ?: 0) +
- (if (idleTimeoutMs.asKnown().isPresent) 1 else 0) +
- (if (interruptResponse.asKnown().isPresent) 1 else 0) +
- (if (prefixPaddingMs.asKnown().isPresent) 1 else 0) +
- (if (silenceDurationMs.asKnown().isPresent) 1 else 0) +
- (if (threshold.asKnown().isPresent) 1 else 0) +
- (type.asKnown().getOrNull()?.validity() ?: 0)
+ /**
+ * Alias for [Builder.idleTimeoutMs].
+ *
+ * This unboxed primitive overload exists for backwards compatibility.
+ */
+ fun idleTimeoutMs(idleTimeoutMs: Long) = idleTimeoutMs(idleTimeoutMs as Long?)
- /**
- * Used only for `semantic_vad` mode. The eagerness of the model to respond. `low` will wait
- * longer for the user to continue speaking, `high` will respond more quickly. `auto` is the
- * default and is equivalent to `medium`. `low`, `medium`, and `high` have max timeouts of 8s,
- * 4s, and 2s respectively.
- */
- class Eagerness @JsonCreator private constructor(private val value: JsonField) : Enum {
+ /** Alias for calling [Builder.idleTimeoutMs] with `idleTimeoutMs.orElse(null)`. */
+ fun idleTimeoutMs(idleTimeoutMs: Optional) =
+ idleTimeoutMs(idleTimeoutMs.getOrNull())
- /**
- * Returns this class instance's raw value.
- *
- * This is usually only useful if this instance was deserialized from data that doesn't
- * match any known member, and you want to know that value. For example, if the SDK is on an
- * older version than the API, then the API may respond with new members that the SDK is
- * unaware of.
- */
- @com.fasterxml.jackson.annotation.JsonValue fun _value(): JsonField = value
+ /**
+ * Sets [Builder.idleTimeoutMs] to an arbitrary JSON value.
+ *
+ * You should usually call [Builder.idleTimeoutMs] with a well-typed [Long] value
+ * instead. This method is primarily for setting the field to an undocumented or not yet
+ * supported value.
+ */
+ fun idleTimeoutMs(idleTimeoutMs: JsonField) = apply {
+ this.idleTimeoutMs = idleTimeoutMs
+ }
- companion object {
+ /**
+ * Whether or not to automatically interrupt any ongoing response with output to the
+ * default conversation (i.e. `conversation` of `auto`) when a VAD start event occurs.
+ */
+ fun interruptResponse(interruptResponse: Boolean) =
+ interruptResponse(JsonField.of(interruptResponse))
- @JvmField val LOW = of("low")
+ /**
+ * Sets [Builder.interruptResponse] to an arbitrary JSON value.
+ *
+ * You should usually call [Builder.interruptResponse] with a well-typed [Boolean] value
+ * instead. This method is primarily for setting the field to an undocumented or not yet
+ * supported value.
+ */
+ fun interruptResponse(interruptResponse: JsonField) = apply {
+ this.interruptResponse = interruptResponse
+ }
- @JvmField val MEDIUM = of("medium")
+ /**
+ * Used only for `server_vad` mode. Amount of audio to include before the VAD detected
+ * speech (in milliseconds). Defaults to 300ms.
+ */
+ fun prefixPaddingMs(prefixPaddingMs: Long) =
+ prefixPaddingMs(JsonField.of(prefixPaddingMs))
- @JvmField val HIGH = of("high")
+ /**
+ * Sets [Builder.prefixPaddingMs] to an arbitrary JSON value.
+ *
+ * You should usually call [Builder.prefixPaddingMs] with a well-typed [Long] value
+ * instead. This method is primarily for setting the field to an undocumented or not yet
+ * supported value.
+ */
+ fun prefixPaddingMs(prefixPaddingMs: JsonField) = apply {
+ this.prefixPaddingMs = prefixPaddingMs
+ }
- @JvmField val AUTO = of("auto")
+ /**
+ * Used only for `server_vad` mode. Duration of silence to detect speech stop (in
+ * milliseconds). Defaults to 500ms. With shorter values the model will respond more
+ * quickly, but may jump in on short pauses from the user.
+ */
+ fun silenceDurationMs(silenceDurationMs: Long) =
+ silenceDurationMs(JsonField.of(silenceDurationMs))
- @JvmStatic fun of(value: String) = Eagerness(JsonField.of(value))
- }
+ /**
+ * Sets [Builder.silenceDurationMs] to an arbitrary JSON value.
+ *
+ * You should usually call [Builder.silenceDurationMs] with a well-typed [Long] value
+ * instead. This method is primarily for setting the field to an undocumented or not yet
+ * supported value.
+ */
+ fun silenceDurationMs(silenceDurationMs: JsonField) = apply {
+ this.silenceDurationMs = silenceDurationMs
+ }
- /** An enum containing [Eagerness]'s known values. */
- enum class Known {
- LOW,
- MEDIUM,
- HIGH,
- AUTO,
- }
+ /**
+ * Used only for `server_vad` mode. Activation threshold for VAD (0.0 to 1.0), this
+ * defaults to 0.5. A higher threshold will require louder audio to activate the model,
+ * and thus might perform better in noisy environments.
+ */
+ fun threshold(threshold: Double) = threshold(JsonField.of(threshold))
- /**
- * An enum containing [Eagerness]'s known values, as well as an [_UNKNOWN] member.
- *
- * An instance of [Eagerness] can contain an unknown value in a couple of cases:
- * - It was deserialized from data that doesn't match any known member. For example, if the
- * SDK is on an older version than the API, then the API may respond with new members that
- * the SDK is unaware of.
- * - It was constructed with an arbitrary value using the [of] method.
- */
- enum class Value {
- LOW,
- MEDIUM,
- HIGH,
- AUTO,
/**
- * An enum member indicating that [Eagerness] was instantiated with an unknown value.
+ * Sets [Builder.threshold] to an arbitrary JSON value.
+ *
+ * You should usually call [Builder.threshold] with a well-typed [Double] value instead.
+ * This method is primarily for setting the field to an undocumented or not yet
+ * supported value.
*/
- _UNKNOWN,
- }
+ fun threshold(threshold: JsonField) = apply { this.threshold = threshold }
- /**
- * Returns an enum member corresponding to this class instance's value, or [Value._UNKNOWN]
- * if the class was instantiated with an unknown value.
- *
- * Use the [known] method instead if you're certain the value is always known or if you want
- * to throw for the unknown case.
- */
- fun value(): Value =
- when (this) {
- LOW -> Value.LOW
- MEDIUM -> Value.MEDIUM
- HIGH -> Value.HIGH
- AUTO -> Value.AUTO
- else -> Value._UNKNOWN
+ fun additionalProperties(additionalProperties: Map) = apply {
+ this.additionalProperties.clear()
+ putAllAdditionalProperties(additionalProperties)
}
- /**
- * Returns an enum member corresponding to this class instance's value.
- *
- * Use the [value] method instead if you're uncertain the value is always known and don't
- * want to throw for the unknown case.
- *
- * @throws OpenAIInvalidDataException if this class instance's value is a not a known
- * member.
- */
- fun known(): Known =
- when (this) {
- LOW -> Known.LOW
- MEDIUM -> Known.MEDIUM
- HIGH -> Known.HIGH
- AUTO -> Known.AUTO
- else -> throw OpenAIInvalidDataException("Unknown Eagerness: $value")
+ fun putAdditionalProperty(key: String, value: JsonValue) = apply {
+ additionalProperties.put(key, value)
}
- /**
- * Returns this class instance's primitive wire representation.
- *
- * This differs from the [toString] method because that method is primarily for debugging
- * and generally doesn't throw.
- *
- * @throws OpenAIInvalidDataException if this class instance's value does not have the
- * expected primitive type.
- */
- fun asString(): String =
- _value().asString().orElseThrow { OpenAIInvalidDataException("Value is not a String") }
+ fun putAllAdditionalProperties(additionalProperties: Map) = apply {
+ this.additionalProperties.putAll(additionalProperties)
+ }
+
+ fun removeAdditionalProperty(key: String) = apply { additionalProperties.remove(key) }
+
+ fun removeAllAdditionalProperties(keys: Set) = apply {
+ keys.forEach(::removeAdditionalProperty)
+ }
+
+ /**
+ * Returns an immutable instance of [ServerVad].
+ *
+ * Further updates to this [Builder] will not mutate the returned instance.
+ */
+ fun build(): ServerVad =
+ ServerVad(
+ type,
+ createResponse,
+ idleTimeoutMs,
+ interruptResponse,
+ prefixPaddingMs,
+ silenceDurationMs,
+ threshold,
+ additionalProperties.toMutableMap(),
+ )
+ }
private var validated: Boolean = false
- fun validate(): Eagerness = apply {
+ fun validate(): ServerVad = apply {
if (validated) {
return@apply
}
- known()
+ _type().let {
+ if (it != JsonValue.from("server_vad")) {
+ throw OpenAIInvalidDataException("'type' is invalid, received $it")
+ }
+ }
+ createResponse()
+ idleTimeoutMs()
+ interruptResponse()
+ prefixPaddingMs()
+ silenceDurationMs()
+ threshold()
validated = true
}
@@ -614,115 +681,298 @@ private constructor(
*
* Used for best match union deserialization.
*/
- @JvmSynthetic internal fun validity(): Int = if (value() == Value._UNKNOWN) 0 else 1
+ @JvmSynthetic
+ internal fun validity(): Int =
+ type.let { if (it == JsonValue.from("server_vad")) 1 else 0 } +
+ (if (createResponse.asKnown().isPresent) 1 else 0) +
+ (if (idleTimeoutMs.asKnown().isPresent) 1 else 0) +
+ (if (interruptResponse.asKnown().isPresent) 1 else 0) +
+ (if (prefixPaddingMs.asKnown().isPresent) 1 else 0) +
+ (if (silenceDurationMs.asKnown().isPresent) 1 else 0) +
+ (if (threshold.asKnown().isPresent) 1 else 0)
override fun equals(other: Any?): Boolean {
if (this === other) {
return true
}
- return other is Eagerness && value == other.value
+ return other is ServerVad &&
+ type == other.type &&
+ createResponse == other.createResponse &&
+ idleTimeoutMs == other.idleTimeoutMs &&
+ interruptResponse == other.interruptResponse &&
+ prefixPaddingMs == other.prefixPaddingMs &&
+ silenceDurationMs == other.silenceDurationMs &&
+ threshold == other.threshold &&
+ additionalProperties == other.additionalProperties
}
- override fun hashCode() = value.hashCode()
+ private val hashCode: Int by lazy {
+ Objects.hash(
+ type,
+ createResponse,
+ idleTimeoutMs,
+ interruptResponse,
+ prefixPaddingMs,
+ silenceDurationMs,
+ threshold,
+ additionalProperties,
+ )
+ }
- override fun toString() = value.toString()
+ override fun hashCode(): Int = hashCode
+
+ override fun toString() =
+ "ServerVad{type=$type, createResponse=$createResponse, idleTimeoutMs=$idleTimeoutMs, interruptResponse=$interruptResponse, prefixPaddingMs=$prefixPaddingMs, silenceDurationMs=$silenceDurationMs, threshold=$threshold, additionalProperties=$additionalProperties}"
}
- /** Type of turn detection. */
- class Type @JsonCreator private constructor(private val value: JsonField) : Enum {
+ /**
+ * Server-side semantic turn detection which uses a model to determine when the user has
+ * finished speaking.
+ */
+ class SemanticVad
+ private constructor(
+ private val type: JsonValue,
+ private val createResponse: JsonField,
+ private val eagerness: JsonField,
+ private val interruptResponse: JsonField,
+ private val additionalProperties: MutableMap,
+ ) {
+
+ @JsonCreator
+ private constructor(
+ @JsonProperty("type") @ExcludeMissing type: JsonValue = JsonMissing.of(),
+ @JsonProperty("create_response")
+ @ExcludeMissing
+ createResponse: JsonField = JsonMissing.of(),
+ @JsonProperty("eagerness")
+ @ExcludeMissing
+ eagerness: JsonField = JsonMissing.of(),
+ @JsonProperty("interrupt_response")
+ @ExcludeMissing
+ interruptResponse: JsonField = JsonMissing.of(),
+ ) : this(type, createResponse, eagerness, interruptResponse, mutableMapOf())
/**
- * Returns this class instance's raw value.
+ * Type of turn detection, `semantic_vad` to turn on Semantic VAD.
+ *
+ * Expected to always return the following:
+ * ```java
+ * JsonValue.from("semantic_vad")
+ * ```
*
- * This is usually only useful if this instance was deserialized from data that doesn't
- * match any known member, and you want to know that value. For example, if the SDK is on an
- * older version than the API, then the API may respond with new members that the SDK is
- * unaware of.
+ * However, this method can be useful for debugging and logging (e.g. if the server
+ * responded with an unexpected value).
*/
- @com.fasterxml.jackson.annotation.JsonValue fun _value(): JsonField = value
-
- companion object {
-
- @JvmField val SERVER_VAD = of("server_vad")
-
- @JvmField val SEMANTIC_VAD = of("semantic_vad")
-
- @JvmStatic fun of(value: String) = Type(JsonField.of(value))
- }
-
- /** An enum containing [Type]'s known values. */
- enum class Known {
- SERVER_VAD,
- SEMANTIC_VAD,
- }
+ @JsonProperty("type") @ExcludeMissing fun _type(): JsonValue = type
/**
- * An enum containing [Type]'s known values, as well as an [_UNKNOWN] member.
+ * Whether or not to automatically generate a response when a VAD stop event occurs.
*
- * An instance of [Type] can contain an unknown value in a couple of cases:
- * - It was deserialized from data that doesn't match any known member. For example, if the
- * SDK is on an older version than the API, then the API may respond with new members that
- * the SDK is unaware of.
- * - It was constructed with an arbitrary value using the [of] method.
+ * @throws OpenAIInvalidDataException if the JSON field has an unexpected type (e.g. if the
+ * server responded with an unexpected value).
*/
- enum class Value {
- SERVER_VAD,
- SEMANTIC_VAD,
- /** An enum member indicating that [Type] was instantiated with an unknown value. */
- _UNKNOWN,
- }
+ fun createResponse(): Optional = createResponse.getOptional("create_response")
/**
- * Returns an enum member corresponding to this class instance's value, or [Value._UNKNOWN]
- * if the class was instantiated with an unknown value.
+ * Used only for `semantic_vad` mode. The eagerness of the model to respond. `low` will wait
+ * longer for the user to continue speaking, `high` will respond more quickly. `auto` is the
+ * default and is equivalent to `medium`. `low`, `medium`, and `high` have max timeouts of
+ * 8s, 4s, and 2s respectively.
*
- * Use the [known] method instead if you're certain the value is always known or if you want
- * to throw for the unknown case.
+ * @throws OpenAIInvalidDataException if the JSON field has an unexpected type (e.g. if the
+ * server responded with an unexpected value).
*/
- fun value(): Value =
- when (this) {
- SERVER_VAD -> Value.SERVER_VAD
- SEMANTIC_VAD -> Value.SEMANTIC_VAD
- else -> Value._UNKNOWN
- }
+ fun eagerness(): Optional = eagerness.getOptional("eagerness")
/**
- * Returns an enum member corresponding to this class instance's value.
+ * Whether or not to automatically interrupt any ongoing response with output to the default
+ * conversation (i.e. `conversation` of `auto`) when a VAD start event occurs.
*
- * Use the [value] method instead if you're uncertain the value is always known and don't
- * want to throw for the unknown case.
+ * @throws OpenAIInvalidDataException if the JSON field has an unexpected type (e.g. if the
+ * server responded with an unexpected value).
+ */
+ fun interruptResponse(): Optional =
+ interruptResponse.getOptional("interrupt_response")
+
+ /**
+ * Returns the raw JSON value of [createResponse].
*
- * @throws OpenAIInvalidDataException if this class instance's value is a not a known
- * member.
+ * Unlike [createResponse], this method doesn't throw if the JSON field has an unexpected
+ * type.
*/
- fun known(): Known =
- when (this) {
- SERVER_VAD -> Known.SERVER_VAD
- SEMANTIC_VAD -> Known.SEMANTIC_VAD
- else -> throw OpenAIInvalidDataException("Unknown Type: $value")
- }
+ @JsonProperty("create_response")
+ @ExcludeMissing
+ fun _createResponse(): JsonField = createResponse
/**
- * Returns this class instance's primitive wire representation.
+ * Returns the raw JSON value of [eagerness].
*
- * This differs from the [toString] method because that method is primarily for debugging
- * and generally doesn't throw.
+ * Unlike [eagerness], this method doesn't throw if the JSON field has an unexpected type.
+ */
+ @JsonProperty("eagerness")
+ @ExcludeMissing
+ fun _eagerness(): JsonField = eagerness
+
+ /**
+ * Returns the raw JSON value of [interruptResponse].
*
- * @throws OpenAIInvalidDataException if this class instance's value does not have the
- * expected primitive type.
+ * Unlike [interruptResponse], this method doesn't throw if the JSON field has an unexpected
+ * type.
*/
- fun asString(): String =
- _value().asString().orElseThrow { OpenAIInvalidDataException("Value is not a String") }
+ @JsonProperty("interrupt_response")
+ @ExcludeMissing
+ fun _interruptResponse(): JsonField = interruptResponse
+
+ @JsonAnySetter
+ private fun putAdditionalProperty(key: String, value: JsonValue) {
+ additionalProperties.put(key, value)
+ }
+
+ @JsonAnyGetter
+ @ExcludeMissing
+ fun _additionalProperties(): Map =
+ Collections.unmodifiableMap(additionalProperties)
+
+ fun toBuilder() = Builder().from(this)
+
+ companion object {
+
+ /** Returns a mutable builder for constructing an instance of [SemanticVad]. */
+ @JvmStatic fun builder() = Builder()
+ }
+
+ /** A builder for [SemanticVad]. */
+ class Builder internal constructor() {
+
+ private var type: JsonValue = JsonValue.from("semantic_vad")
+ private var createResponse: JsonField = JsonMissing.of()
+ private var eagerness: JsonField = JsonMissing.of()
+ private var interruptResponse: JsonField = JsonMissing.of()
+ private var additionalProperties: MutableMap = mutableMapOf()
+
+ @JvmSynthetic
+ internal fun from(semanticVad: SemanticVad) = apply {
+ type = semanticVad.type
+ createResponse = semanticVad.createResponse
+ eagerness = semanticVad.eagerness
+ interruptResponse = semanticVad.interruptResponse
+ additionalProperties = semanticVad.additionalProperties.toMutableMap()
+ }
+
+ /**
+ * Sets the field to an arbitrary JSON value.
+ *
+ * It is usually unnecessary to call this method because the field defaults to the
+ * following:
+ * ```java
+ * JsonValue.from("semantic_vad")
+ * ```
+ *
+ * This method is primarily for setting the field to an undocumented or not yet
+ * supported value.
+ */
+ fun type(type: JsonValue) = apply { this.type = type }
+
+ /** Whether or not to automatically generate a response when a VAD stop event occurs. */
+ fun createResponse(createResponse: Boolean) =
+ createResponse(JsonField.of(createResponse))
+
+ /**
+ * Sets [Builder.createResponse] to an arbitrary JSON value.
+ *
+ * You should usually call [Builder.createResponse] with a well-typed [Boolean] value
+ * instead. This method is primarily for setting the field to an undocumented or not yet
+ * supported value.
+ */
+ fun createResponse(createResponse: JsonField) = apply {
+ this.createResponse = createResponse
+ }
+
+ /**
+ * Used only for `semantic_vad` mode. The eagerness of the model to respond. `low` will
+ * wait longer for the user to continue speaking, `high` will respond more quickly.
+ * `auto` is the default and is equivalent to `medium`. `low`, `medium`, and `high` have
+ * max timeouts of 8s, 4s, and 2s respectively.
+ */
+ fun eagerness(eagerness: Eagerness) = eagerness(JsonField.of(eagerness))
+
+ /**
+ * Sets [Builder.eagerness] to an arbitrary JSON value.
+ *
+ * You should usually call [Builder.eagerness] with a well-typed [Eagerness] value
+ * instead. This method is primarily for setting the field to an undocumented or not yet
+ * supported value.
+ */
+ fun eagerness(eagerness: JsonField) = apply { this.eagerness = eagerness }
+
+ /**
+ * Whether or not to automatically interrupt any ongoing response with output to the
+ * default conversation (i.e. `conversation` of `auto`) when a VAD start event occurs.
+ */
+ fun interruptResponse(interruptResponse: Boolean) =
+ interruptResponse(JsonField.of(interruptResponse))
+
+ /**
+ * Sets [Builder.interruptResponse] to an arbitrary JSON value.
+ *
+ * You should usually call [Builder.interruptResponse] with a well-typed [Boolean] value
+ * instead. This method is primarily for setting the field to an undocumented or not yet
+ * supported value.
+ */
+ fun interruptResponse(interruptResponse: JsonField) = apply {
+ this.interruptResponse = interruptResponse
+ }
+
+ fun additionalProperties(additionalProperties: Map) = apply {
+ this.additionalProperties.clear()
+ putAllAdditionalProperties(additionalProperties)
+ }
+
+ fun putAdditionalProperty(key: String, value: JsonValue) = apply {
+ additionalProperties.put(key, value)
+ }
+
+ fun putAllAdditionalProperties(additionalProperties: Map) = apply {
+ this.additionalProperties.putAll(additionalProperties)
+ }
+
+ fun removeAdditionalProperty(key: String) = apply { additionalProperties.remove(key) }
+
+ fun removeAllAdditionalProperties(keys: Set) = apply {
+ keys.forEach(::removeAdditionalProperty)
+ }
+
+ /**
+ * Returns an immutable instance of [SemanticVad].
+ *
+ * Further updates to this [Builder] will not mutate the returned instance.
+ */
+ fun build(): SemanticVad =
+ SemanticVad(
+ type,
+ createResponse,
+ eagerness,
+ interruptResponse,
+ additionalProperties.toMutableMap(),
+ )
+ }
private var validated: Boolean = false
- fun validate(): Type = apply {
+ fun validate(): SemanticVad = apply {
if (validated) {
return@apply
}
- known()
+ _type().let {
+ if (it != JsonValue.from("semantic_vad")) {
+ throw OpenAIInvalidDataException("'type' is invalid, received $it")
+ }
+ }
+ createResponse()
+ eagerness().ifPresent { it.validate() }
+ interruptResponse()
validated = true
}
@@ -740,54 +990,182 @@ private constructor(
*
* Used for best match union deserialization.
*/
- @JvmSynthetic internal fun validity(): Int = if (value() == Value._UNKNOWN) 0 else 1
+ @JvmSynthetic
+ internal fun validity(): Int =
+ type.let { if (it == JsonValue.from("semantic_vad")) 1 else 0 } +
+ (if (createResponse.asKnown().isPresent) 1 else 0) +
+ (eagerness.asKnown().getOrNull()?.validity() ?: 0) +
+ (if (interruptResponse.asKnown().isPresent) 1 else 0)
+
+ /**
+ * Used only for `semantic_vad` mode. The eagerness of the model to respond. `low` will wait
+ * longer for the user to continue speaking, `high` will respond more quickly. `auto` is the
+ * default and is equivalent to `medium`. `low`, `medium`, and `high` have max timeouts of
+ * 8s, 4s, and 2s respectively.
+ */
+ class Eagerness @JsonCreator private constructor(private val value: JsonField) :
+ Enum {
+
+ /**
+ * Returns this class instance's raw value.
+ *
+ * This is usually only useful if this instance was deserialized from data that doesn't
+ * match any known member, and you want to know that value. For example, if the SDK is
+ * on an older version than the API, then the API may respond with new members that the
+ * SDK is unaware of.
+ */
+ @com.fasterxml.jackson.annotation.JsonValue fun _value(): JsonField = value
+
+ companion object {
+
+ @JvmField val LOW = of("low")
+
+ @JvmField val MEDIUM = of("medium")
+
+ @JvmField val HIGH = of("high")
+
+ @JvmField val AUTO = of("auto")
+
+ @JvmStatic fun of(value: String) = Eagerness(JsonField.of(value))
+ }
+
+ /** An enum containing [Eagerness]'s known values. */
+ enum class Known {
+ LOW,
+ MEDIUM,
+ HIGH,
+ AUTO,
+ }
+
+ /**
+ * An enum containing [Eagerness]'s known values, as well as an [_UNKNOWN] member.
+ *
+ * An instance of [Eagerness] can contain an unknown value in a couple of cases:
+ * - It was deserialized from data that doesn't match any known member. For example, if
+ * the SDK is on an older version than the API, then the API may respond with new
+ * members that the SDK is unaware of.
+ * - It was constructed with an arbitrary value using the [of] method.
+ */
+ enum class Value {
+ LOW,
+ MEDIUM,
+ HIGH,
+ AUTO,
+ /**
+ * An enum member indicating that [Eagerness] was instantiated with an unknown
+ * value.
+ */
+ _UNKNOWN,
+ }
+
+ /**
+ * Returns an enum member corresponding to this class instance's value, or
+ * [Value._UNKNOWN] if the class was instantiated with an unknown value.
+ *
+ * Use the [known] method instead if you're certain the value is always known or if you
+ * want to throw for the unknown case.
+ */
+ fun value(): Value =
+ when (this) {
+ LOW -> Value.LOW
+ MEDIUM -> Value.MEDIUM
+ HIGH -> Value.HIGH
+ AUTO -> Value.AUTO
+ else -> Value._UNKNOWN
+ }
+
+ /**
+ * Returns an enum member corresponding to this class instance's value.
+ *
+ * Use the [value] method instead if you're uncertain the value is always known and
+ * don't want to throw for the unknown case.
+ *
+ * @throws OpenAIInvalidDataException if this class instance's value is a not a known
+ * member.
+ */
+ fun known(): Known =
+ when (this) {
+ LOW -> Known.LOW
+ MEDIUM -> Known.MEDIUM
+ HIGH -> Known.HIGH
+ AUTO -> Known.AUTO
+ else -> throw OpenAIInvalidDataException("Unknown Eagerness: $value")
+ }
+
+ /**
+ * Returns this class instance's primitive wire representation.
+ *
+ * This differs from the [toString] method because that method is primarily for
+ * debugging and generally doesn't throw.
+ *
+ * @throws OpenAIInvalidDataException if this class instance's value does not have the
+ * expected primitive type.
+ */
+ fun asString(): String =
+ _value().asString().orElseThrow {
+ OpenAIInvalidDataException("Value is not a String")
+ }
+
+ private var validated: Boolean = false
+
+ fun validate(): Eagerness = apply {
+ if (validated) {
+ return@apply
+ }
+
+ known()
+ validated = true
+ }
+
+ fun isValid(): Boolean =
+ try {
+ validate()
+ true
+ } catch (e: OpenAIInvalidDataException) {
+ false
+ }
+
+ /**
+ * Returns a score indicating how many valid values are contained in this object
+ * recursively.
+ *
+ * Used for best match union deserialization.
+ */
+ @JvmSynthetic internal fun validity(): Int = if (value() == Value._UNKNOWN) 0 else 1
+
+ override fun equals(other: Any?): Boolean {
+ if (this === other) {
+ return true
+ }
+
+ return other is Eagerness && value == other.value
+ }
+
+ override fun hashCode() = value.hashCode()
+
+ override fun toString() = value.toString()
+ }
override fun equals(other: Any?): Boolean {
if (this === other) {
return true
}
- return other is Type && value == other.value
+ return other is SemanticVad &&
+ type == other.type &&
+ createResponse == other.createResponse &&
+ eagerness == other.eagerness &&
+ interruptResponse == other.interruptResponse &&
+ additionalProperties == other.additionalProperties
}
- override fun hashCode() = value.hashCode()
-
- override fun toString() = value.toString()
- }
-
- override fun equals(other: Any?): Boolean {
- if (this === other) {
- return true
+ private val hashCode: Int by lazy {
+ Objects.hash(type, createResponse, eagerness, interruptResponse, additionalProperties)
}
- return other is RealtimeAudioInputTurnDetection &&
- createResponse == other.createResponse &&
- eagerness == other.eagerness &&
- idleTimeoutMs == other.idleTimeoutMs &&
- interruptResponse == other.interruptResponse &&
- prefixPaddingMs == other.prefixPaddingMs &&
- silenceDurationMs == other.silenceDurationMs &&
- threshold == other.threshold &&
- type == other.type &&
- additionalProperties == other.additionalProperties
- }
+ override fun hashCode(): Int = hashCode
- private val hashCode: Int by lazy {
- Objects.hash(
- createResponse,
- eagerness,
- idleTimeoutMs,
- interruptResponse,
- prefixPaddingMs,
- silenceDurationMs,
- threshold,
- type,
- additionalProperties,
- )
+ override fun toString() =
+ "SemanticVad{type=$type, createResponse=$createResponse, eagerness=$eagerness, interruptResponse=$interruptResponse, additionalProperties=$additionalProperties}"
}
-
- override fun hashCode(): Int = hashCode
-
- override fun toString() =
- "RealtimeAudioInputTurnDetection{createResponse=$createResponse, eagerness=$eagerness, idleTimeoutMs=$idleTimeoutMs, interruptResponse=$interruptResponse, prefixPaddingMs=$prefixPaddingMs, silenceDurationMs=$silenceDurationMs, threshold=$threshold, type=$type, additionalProperties=$additionalProperties}"
}
diff --git a/openai-java-core/src/main/kotlin/com/openai/models/realtime/RealtimeServerEvent.kt b/openai-java-core/src/main/kotlin/com/openai/models/realtime/RealtimeServerEvent.kt
index 0cad1e4d..a8a56aa9 100644
--- a/openai-java-core/src/main/kotlin/com/openai/models/realtime/RealtimeServerEvent.kt
+++ b/openai-java-core/src/main/kotlin/com/openai/models/realtime/RealtimeServerEvent.kt
@@ -371,7 +371,21 @@ private constructor(
fun conversationItemDone(): Optional =
Optional.ofNullable(conversationItemDone)
- /** Returned when the server VAD timeout is triggered for the input audio buffer. */
+ /**
+ * Returned when the Server VAD timeout is triggered for the input audio buffer. This is
+ * configured with `idle_timeout_ms` in the `turn_detection` settings of the session, and it
+ * indicates that there hasn't been any speech detected for the configured duration.
+ *
+ * The `audio_start_ms` and `audio_end_ms` fields indicate the segment of audio after the last
+ * model response up to the triggering time, as an offset from the beginning of audio written to
+ * the input audio buffer. This means it demarcates the segment of audio that was silent and the
+ * difference between the start and end values will roughly match the configured timeout.
+ *
+ * The empty audio will be committed to the conversation as an `input_audio` item (there will be
+ * a `input_audio_buffer.committed` event) and a model response will be generated. There may be
+ * speech that didn't trigger VAD but is still detected by the model, so the model may respond
+ * with something relevant to the conversation or a prompt to continue speaking.
+ */
fun inputAudioBufferTimeoutTriggered(): Optional =
Optional.ofNullable(inputAudioBufferTimeoutTriggered)
@@ -794,7 +808,21 @@ private constructor(
fun asConversationItemDone(): ConversationItemDone =
conversationItemDone.getOrThrow("conversationItemDone")
- /** Returned when the server VAD timeout is triggered for the input audio buffer. */
+ /**
+ * Returned when the Server VAD timeout is triggered for the input audio buffer. This is
+ * configured with `idle_timeout_ms` in the `turn_detection` settings of the session, and it
+ * indicates that there hasn't been any speech detected for the configured duration.
+ *
+ * The `audio_start_ms` and `audio_end_ms` fields indicate the segment of audio after the last
+ * model response up to the triggering time, as an offset from the beginning of audio written to
+ * the input audio buffer. This means it demarcates the segment of audio that was silent and the
+ * difference between the start and end values will roughly match the configured timeout.
+ *
+ * The empty audio will be committed to the conversation as an `input_audio` item (there will be
+ * a `input_audio_buffer.committed` event) and a model response will be generated. There may be
+ * speech that didn't trigger VAD but is still detected by the model, so the model may respond
+ * with something relevant to the conversation or a prompt to continue speaking.
+ */
fun asInputAudioBufferTimeoutTriggered(): InputAudioBufferTimeoutTriggered =
inputAudioBufferTimeoutTriggered.getOrThrow("inputAudioBufferTimeoutTriggered")
@@ -1955,7 +1983,23 @@ private constructor(
fun ofConversationItemDone(conversationItemDone: ConversationItemDone) =
RealtimeServerEvent(conversationItemDone = conversationItemDone)
- /** Returned when the server VAD timeout is triggered for the input audio buffer. */
+ /**
+ * Returned when the Server VAD timeout is triggered for the input audio buffer. This is
+ * configured with `idle_timeout_ms` in the `turn_detection` settings of the session, and it
+ * indicates that there hasn't been any speech detected for the configured duration.
+ *
+ * The `audio_start_ms` and `audio_end_ms` fields indicate the segment of audio after the
+ * last model response up to the triggering time, as an offset from the beginning of audio
+ * written to the input audio buffer. This means it demarcates the segment of audio that was
+ * silent and the difference between the start and end values will roughly match the
+ * configured timeout.
+ *
+ * The empty audio will be committed to the conversation as an `input_audio` item (there
+ * will be a `input_audio_buffer.committed` event) and a model response will be generated.
+ * There may be speech that didn't trigger VAD but is still detected by the model, so the
+ * model may respond with something relevant to the conversation or a prompt to continue
+ * speaking.
+ */
@JvmStatic
fun ofInputAudioBufferTimeoutTriggered(
inputAudioBufferTimeoutTriggered: InputAudioBufferTimeoutTriggered
@@ -2301,7 +2345,23 @@ private constructor(
*/
fun visitConversationItemDone(conversationItemDone: ConversationItemDone): T
- /** Returned when the server VAD timeout is triggered for the input audio buffer. */
+ /**
+ * Returned when the Server VAD timeout is triggered for the input audio buffer. This is
+ * configured with `idle_timeout_ms` in the `turn_detection` settings of the session, and it
+ * indicates that there hasn't been any speech detected for the configured duration.
+ *
+ * The `audio_start_ms` and `audio_end_ms` fields indicate the segment of audio after the
+ * last model response up to the triggering time, as an offset from the beginning of audio
+ * written to the input audio buffer. This means it demarcates the segment of audio that was
+ * silent and the difference between the start and end values will roughly match the
+ * configured timeout.
+ *
+ * The empty audio will be committed to the conversation as an `input_audio` item (there
+ * will be a `input_audio_buffer.committed` event) and a model response will be generated.
+ * There may be speech that didn't trigger VAD but is still detected by the model, so the
+ * model may respond with something relevant to the conversation or a prompt to continue
+ * speaking.
+ */
fun visitInputAudioBufferTimeoutTriggered(
inputAudioBufferTimeoutTriggered: InputAudioBufferTimeoutTriggered
): T
diff --git a/openai-java-core/src/main/kotlin/com/openai/models/realtime/RealtimeSession.kt b/openai-java-core/src/main/kotlin/com/openai/models/realtime/RealtimeSession.kt
index cf15084c..f0675950 100644
--- a/openai-java-core/src/main/kotlin/com/openai/models/realtime/RealtimeSession.kt
+++ b/openai-java-core/src/main/kotlin/com/openai/models/realtime/RealtimeSession.kt
@@ -31,7 +31,7 @@ import java.util.Objects
import java.util.Optional
import kotlin.jvm.optionals.getOrNull
-/** Realtime session object. */
+/** Realtime session object for the beta interface. */
class RealtimeSession
private constructor(
private val id: JsonField,
@@ -311,14 +311,16 @@ private constructor(
/**
* Configuration for turn detection, ether Server VAD or Semantic VAD. This can be set to `null`
- * to turn off, in which case the client must manually trigger model response. Server VAD means
- * that the model will detect the start and end of speech based on audio volume and respond at
- * the end of user speech. Semantic VAD is more advanced and uses a turn detection model (in
- * conjunction with VAD) to semantically estimate whether the user has finished speaking, then
- * dynamically sets a timeout based on this probability. For example, if user audio trails off
- * with "uhhm", the model will score a low probability of turn end and wait longer for the user
- * to continue speaking. This can be useful for more natural conversations, but may have a
- * higher latency.
+ * to turn off, in which case the client must manually trigger model response.
+ *
+ * Server VAD means that the model will detect the start and end of speech based on audio volume
+ * and respond at the end of user speech.
+ *
+ * Semantic VAD is more advanced and uses a turn detection model (in conjunction with VAD) to
+ * semantically estimate whether the user has finished speaking, then dynamically sets a timeout
+ * based on this probability. For example, if user audio trails off with "uhhm", the model will
+ * score a low probability of turn end and wait longer for the user to continue speaking. This
+ * can be useful for more natural conversations, but may have a higher latency.
*
* @throws OpenAIInvalidDataException if the JSON field has an unexpected type (e.g. if the
* server responded with an unexpected value).
@@ -930,14 +932,17 @@ private constructor(
/**
* Configuration for turn detection, ether Server VAD or Semantic VAD. This can be set to
- * `null` to turn off, in which case the client must manually trigger model response. Server
- * VAD means that the model will detect the start and end of speech based on audio volume
- * and respond at the end of user speech. Semantic VAD is more advanced and uses a turn
- * detection model (in conjunction with VAD) to semantically estimate whether the user has
- * finished speaking, then dynamically sets a timeout based on this probability. For
- * example, if user audio trails off with "uhhm", the model will score a low probability of
- * turn end and wait longer for the user to continue speaking. This can be useful for more
- * natural conversations, but may have a higher latency.
+ * `null` to turn off, in which case the client must manually trigger model response.
+ *
+ * Server VAD means that the model will detect the start and end of speech based on audio
+ * volume and respond at the end of user speech.
+ *
+ * Semantic VAD is more advanced and uses a turn detection model (in conjunction with VAD)
+ * to semantically estimate whether the user has finished speaking, then dynamically sets a
+ * timeout based on this probability. For example, if user audio trails off with "uhhm", the
+ * model will score a low probability of turn end and wait longer for the user to continue
+ * speaking. This can be useful for more natural conversations, but may have a higher
+ * latency.
*/
fun turnDetection(turnDetection: TurnDetection?) =
turnDetection(JsonField.ofNullable(turnDetection))
@@ -957,6 +962,14 @@ private constructor(
this.turnDetection = turnDetection
}
+ /** Alias for calling [turnDetection] with `TurnDetection.ofServerVad(serverVad)`. */
+ fun turnDetection(serverVad: TurnDetection.ServerVad) =
+ turnDetection(TurnDetection.ofServerVad(serverVad))
+
+ /** Alias for calling [turnDetection] with `TurnDetection.ofSemanticVad(semanticVad)`. */
+ fun turnDetection(semanticVad: TurnDetection.SemanticVad) =
+ turnDetection(TurnDetection.ofSemanticVad(semanticVad))
+
/**
* The voice the model uses to respond. Voice cannot be changed during the session once the
* model has responded with audio at least once. Current voice options are `alloy`, `ash`,
@@ -2675,596 +2688,647 @@ private constructor(
/**
* Configuration for turn detection, ether Server VAD or Semantic VAD. This can be set to `null`
- * to turn off, in which case the client must manually trigger model response. Server VAD means
- * that the model will detect the start and end of speech based on audio volume and respond at
- * the end of user speech. Semantic VAD is more advanced and uses a turn detection model (in
- * conjunction with VAD) to semantically estimate whether the user has finished speaking, then
- * dynamically sets a timeout based on this probability. For example, if user audio trails off
- * with "uhhm", the model will score a low probability of turn end and wait longer for the user
- * to continue speaking. This can be useful for more natural conversations, but may have a
- * higher latency.
+ * to turn off, in which case the client must manually trigger model response.
+ *
+ * Server VAD means that the model will detect the start and end of speech based on audio volume
+ * and respond at the end of user speech.
+ *
+ * Semantic VAD is more advanced and uses a turn detection model (in conjunction with VAD) to
+ * semantically estimate whether the user has finished speaking, then dynamically sets a timeout
+ * based on this probability. For example, if user audio trails off with "uhhm", the model will
+ * score a low probability of turn end and wait longer for the user to continue speaking. This
+ * can be useful for more natural conversations, but may have a higher latency.
*/
+ @JsonDeserialize(using = TurnDetection.Deserializer::class)
+ @JsonSerialize(using = TurnDetection.Serializer::class)
class TurnDetection
private constructor(
- private val createResponse: JsonField,
- private val eagerness: JsonField,
- private val idleTimeoutMs: JsonField,
- private val interruptResponse: JsonField,
- private val prefixPaddingMs: JsonField,
- private val silenceDurationMs: JsonField,
- private val threshold: JsonField,
- private val type: JsonField,
- private val additionalProperties: MutableMap,
+ private val serverVad: ServerVad? = null,
+ private val semanticVad: SemanticVad? = null,
+ private val _json: JsonValue? = null,
) {
- @JsonCreator
- private constructor(
- @JsonProperty("create_response")
- @ExcludeMissing
- createResponse: JsonField = JsonMissing.of(),
- @JsonProperty("eagerness")
- @ExcludeMissing
- eagerness: JsonField = JsonMissing.of(),
- @JsonProperty("idle_timeout_ms")
- @ExcludeMissing
- idleTimeoutMs: JsonField = JsonMissing.of(),
- @JsonProperty("interrupt_response")
- @ExcludeMissing
- interruptResponse: JsonField = JsonMissing.of(),
- @JsonProperty("prefix_padding_ms")
- @ExcludeMissing
- prefixPaddingMs: JsonField = JsonMissing.of(),
- @JsonProperty("silence_duration_ms")
- @ExcludeMissing
- silenceDurationMs: JsonField = JsonMissing.of(),
- @JsonProperty("threshold")
- @ExcludeMissing
- threshold: JsonField = JsonMissing.of(),
- @JsonProperty("type") @ExcludeMissing type: JsonField = JsonMissing.of(),
- ) : this(
- createResponse,
- eagerness,
- idleTimeoutMs,
- interruptResponse,
- prefixPaddingMs,
- silenceDurationMs,
- threshold,
- type,
- mutableMapOf(),
- )
-
/**
- * Whether or not to automatically generate a response when a VAD stop event occurs.
- *
- * @throws OpenAIInvalidDataException if the JSON field has an unexpected type (e.g. if the
- * server responded with an unexpected value).
+ * Server-side voice activity detection (VAD) which flips on when user speech is detected
+ * and off after a period of silence.
*/
- fun createResponse(): Optional = createResponse.getOptional("create_response")
+ fun serverVad(): Optional = Optional.ofNullable(serverVad)
/**
- * Used only for `semantic_vad` mode. The eagerness of the model to respond. `low` will wait
- * longer for the user to continue speaking, `high` will respond more quickly. `auto` is the
- * default and is equivalent to `medium`.
- *
- * @throws OpenAIInvalidDataException if the JSON field has an unexpected type (e.g. if the
- * server responded with an unexpected value).
+ * Server-side semantic turn detection which uses a model to determine when the user has
+ * finished speaking.
*/
- fun eagerness(): Optional = eagerness.getOptional("eagerness")
+ fun semanticVad(): Optional = Optional.ofNullable(semanticVad)
- /**
- * Optional idle timeout after which turn detection will auto-timeout when no additional
- * audio is received.
- *
- * @throws OpenAIInvalidDataException if the JSON field has an unexpected type (e.g. if the
- * server responded with an unexpected value).
- */
- fun idleTimeoutMs(): Optional = idleTimeoutMs.getOptional("idle_timeout_ms")
+ fun isServerVad(): Boolean = serverVad != null
- /**
- * Whether or not to automatically interrupt any ongoing response with output to the default
- * conversation (i.e. `conversation` of `auto`) when a VAD start event occurs.
- *
- * @throws OpenAIInvalidDataException if the JSON field has an unexpected type (e.g. if the
- * server responded with an unexpected value).
- */
- fun interruptResponse(): Optional =
- interruptResponse.getOptional("interrupt_response")
+ fun isSemanticVad(): Boolean = semanticVad != null
/**
- * Used only for `server_vad` mode. Amount of audio to include before the VAD detected
- * speech (in milliseconds). Defaults to 300ms.
- *
- * @throws OpenAIInvalidDataException if the JSON field has an unexpected type (e.g. if the
- * server responded with an unexpected value).
+ * Server-side voice activity detection (VAD) which flips on when user speech is detected
+ * and off after a period of silence.
*/
- fun prefixPaddingMs(): Optional = prefixPaddingMs.getOptional("prefix_padding_ms")
+ fun asServerVad(): ServerVad = serverVad.getOrThrow("serverVad")
/**
- * Used only for `server_vad` mode. Duration of silence to detect speech stop (in
- * milliseconds). Defaults to 500ms. With shorter values the model will respond more
- * quickly, but may jump in on short pauses from the user.
- *
- * @throws OpenAIInvalidDataException if the JSON field has an unexpected type (e.g. if the
- * server responded with an unexpected value).
+ * Server-side semantic turn detection which uses a model to determine when the user has
+ * finished speaking.
*/
- fun silenceDurationMs(): Optional =
- silenceDurationMs.getOptional("silence_duration_ms")
+ fun asSemanticVad(): SemanticVad = semanticVad.getOrThrow("semanticVad")
- /**
- * Used only for `server_vad` mode. Activation threshold for VAD (0.0 to 1.0), this defaults
- * to 0.5. A higher threshold will require louder audio to activate the model, and thus
- * might perform better in noisy environments.
- *
- * @throws OpenAIInvalidDataException if the JSON field has an unexpected type (e.g. if the
- * server responded with an unexpected value).
- */
- fun threshold(): Optional = threshold.getOptional("threshold")
+ fun _json(): Optional = Optional.ofNullable(_json)
- /**
- * Type of turn detection.
- *
- * @throws OpenAIInvalidDataException if the JSON field has an unexpected type (e.g. if the
- * server responded with an unexpected value).
- */
- fun type(): Optional = type.getOptional("type")
+ fun accept(visitor: Visitor): T =
+ when {
+ serverVad != null -> visitor.visitServerVad(serverVad)
+ semanticVad != null -> visitor.visitSemanticVad(semanticVad)
+ else -> visitor.unknown(_json)
+ }
- /**
- * Returns the raw JSON value of [createResponse].
- *
- * Unlike [createResponse], this method doesn't throw if the JSON field has an unexpected
- * type.
- */
- @JsonProperty("create_response")
- @ExcludeMissing
- fun _createResponse(): JsonField = createResponse
+ private var validated: Boolean = false
- /**
- * Returns the raw JSON value of [eagerness].
- *
- * Unlike [eagerness], this method doesn't throw if the JSON field has an unexpected type.
- */
- @JsonProperty("eagerness")
- @ExcludeMissing
- fun _eagerness(): JsonField = eagerness
+ fun validate(): TurnDetection = apply {
+ if (validated) {
+ return@apply
+ }
- /**
- * Returns the raw JSON value of [idleTimeoutMs].
- *
- * Unlike [idleTimeoutMs], this method doesn't throw if the JSON field has an unexpected
- * type.
- */
- @JsonProperty("idle_timeout_ms")
- @ExcludeMissing
- fun _idleTimeoutMs(): JsonField = idleTimeoutMs
+ accept(
+ object : Visitor {
+ override fun visitServerVad(serverVad: ServerVad) {
+ serverVad.validate()
+ }
- /**
- * Returns the raw JSON value of [interruptResponse].
- *
- * Unlike [interruptResponse], this method doesn't throw if the JSON field has an unexpected
- * type.
- */
- @JsonProperty("interrupt_response")
- @ExcludeMissing
- fun _interruptResponse(): JsonField = interruptResponse
+ override fun visitSemanticVad(semanticVad: SemanticVad) {
+ semanticVad.validate()
+ }
+ }
+ )
+ validated = true
+ }
- /**
- * Returns the raw JSON value of [prefixPaddingMs].
- *
- * Unlike [prefixPaddingMs], this method doesn't throw if the JSON field has an unexpected
- * type.
- */
- @JsonProperty("prefix_padding_ms")
- @ExcludeMissing
- fun _prefixPaddingMs(): JsonField = prefixPaddingMs
+ fun isValid(): Boolean =
+ try {
+ validate()
+ true
+ } catch (e: OpenAIInvalidDataException) {
+ false
+ }
/**
- * Returns the raw JSON value of [silenceDurationMs].
+ * Returns a score indicating how many valid values are contained in this object
+ * recursively.
*
- * Unlike [silenceDurationMs], this method doesn't throw if the JSON field has an unexpected
- * type.
+ * Used for best match union deserialization.
*/
- @JsonProperty("silence_duration_ms")
- @ExcludeMissing
- fun _silenceDurationMs(): JsonField = silenceDurationMs
+ @JvmSynthetic
+ internal fun validity(): Int =
+ accept(
+ object : Visitor {
+ override fun visitServerVad(serverVad: ServerVad) = serverVad.validity()
- /**
- * Returns the raw JSON value of [threshold].
- *
- * Unlike [threshold], this method doesn't throw if the JSON field has an unexpected type.
- */
- @JsonProperty("threshold") @ExcludeMissing fun _threshold(): JsonField = threshold
+ override fun visitSemanticVad(semanticVad: SemanticVad) = semanticVad.validity()
- /**
- * Returns the raw JSON value of [type].
- *
- * Unlike [type], this method doesn't throw if the JSON field has an unexpected type.
- */
- @JsonProperty("type") @ExcludeMissing fun _type(): JsonField = type
+ override fun unknown(json: JsonValue?) = 0
+ }
+ )
- @JsonAnySetter
- private fun putAdditionalProperty(key: String, value: JsonValue) {
- additionalProperties.put(key, value)
+ override fun equals(other: Any?): Boolean {
+ if (this === other) {
+ return true
+ }
+
+ return other is TurnDetection &&
+ serverVad == other.serverVad &&
+ semanticVad == other.semanticVad
}
- @JsonAnyGetter
- @ExcludeMissing
- fun _additionalProperties(): Map =
- Collections.unmodifiableMap(additionalProperties)
+ override fun hashCode(): Int = Objects.hash(serverVad, semanticVad)
- fun toBuilder() = Builder().from(this)
+ override fun toString(): String =
+ when {
+ serverVad != null -> "TurnDetection{serverVad=$serverVad}"
+ semanticVad != null -> "TurnDetection{semanticVad=$semanticVad}"
+ _json != null -> "TurnDetection{_unknown=$_json}"
+ else -> throw IllegalStateException("Invalid TurnDetection")
+ }
companion object {
- /** Returns a mutable builder for constructing an instance of [TurnDetection]. */
- @JvmStatic fun builder() = Builder()
- }
-
- /** A builder for [TurnDetection]. */
- class Builder internal constructor() {
+ /**
+ * Server-side voice activity detection (VAD) which flips on when user speech is
+ * detected and off after a period of silence.
+ */
+ @JvmStatic fun ofServerVad(serverVad: ServerVad) = TurnDetection(serverVad = serverVad)
- private var createResponse: JsonField = JsonMissing.of()
- private var eagerness: JsonField = JsonMissing.of()
- private var idleTimeoutMs: JsonField = JsonMissing.of()
- private var interruptResponse: JsonField = JsonMissing.of()
- private var prefixPaddingMs: JsonField = JsonMissing.of()
- private var silenceDurationMs: JsonField = JsonMissing.of()
- private var threshold: JsonField = JsonMissing.of()
- private var type: JsonField = JsonMissing.of()
- private var additionalProperties: MutableMap = mutableMapOf()
+ /**
+ * Server-side semantic turn detection which uses a model to determine when the user has
+ * finished speaking.
+ */
+ @JvmStatic
+ fun ofSemanticVad(semanticVad: SemanticVad) = TurnDetection(semanticVad = semanticVad)
+ }
- @JvmSynthetic
- internal fun from(turnDetection: TurnDetection) = apply {
- createResponse = turnDetection.createResponse
- eagerness = turnDetection.eagerness
- idleTimeoutMs = turnDetection.idleTimeoutMs
- interruptResponse = turnDetection.interruptResponse
- prefixPaddingMs = turnDetection.prefixPaddingMs
- silenceDurationMs = turnDetection.silenceDurationMs
- threshold = turnDetection.threshold
- type = turnDetection.type
- additionalProperties = turnDetection.additionalProperties.toMutableMap()
- }
-
- /** Whether or not to automatically generate a response when a VAD stop event occurs. */
- fun createResponse(createResponse: Boolean) =
- createResponse(JsonField.of(createResponse))
+ /**
+ * An interface that defines how to map each variant of [TurnDetection] to a value of type
+ * [T].
+ */
+ interface Visitor {
/**
- * Sets [Builder.createResponse] to an arbitrary JSON value.
- *
- * You should usually call [Builder.createResponse] with a well-typed [Boolean] value
- * instead. This method is primarily for setting the field to an undocumented or not yet
- * supported value.
+ * Server-side voice activity detection (VAD) which flips on when user speech is
+ * detected and off after a period of silence.
*/
- fun createResponse(createResponse: JsonField) = apply {
- this.createResponse = createResponse
- }
+ fun visitServerVad(serverVad: ServerVad): T
/**
- * Used only for `semantic_vad` mode. The eagerness of the model to respond. `low` will
- * wait longer for the user to continue speaking, `high` will respond more quickly.
- * `auto` is the default and is equivalent to `medium`.
+ * Server-side semantic turn detection which uses a model to determine when the user has
+ * finished speaking.
*/
- fun eagerness(eagerness: Eagerness) = eagerness(JsonField.of(eagerness))
+ fun visitSemanticVad(semanticVad: SemanticVad): T
/**
- * Sets [Builder.eagerness] to an arbitrary JSON value.
+ * Maps an unknown variant of [TurnDetection] to a value of type [T].
*
- * You should usually call [Builder.eagerness] with a well-typed [Eagerness] value
- * instead. This method is primarily for setting the field to an undocumented or not yet
- * supported value.
+ * An instance of [TurnDetection] can contain an unknown variant if it was deserialized
+ * from data that doesn't match any known variant. For example, if the SDK is on an
+ * older version than the API, then the API may respond with new variants that the SDK
+ * is unaware of.
+ *
+ * @throws OpenAIInvalidDataException in the default implementation.
*/
- fun eagerness(eagerness: JsonField) = apply { this.eagerness = eagerness }
+ fun unknown(json: JsonValue?): T {
+ throw OpenAIInvalidDataException("Unknown TurnDetection: $json")
+ }
+ }
+
+ internal class Deserializer : BaseDeserializer(TurnDetection::class) {
+
+ override fun ObjectCodec.deserialize(node: JsonNode): TurnDetection {
+ val json = JsonValue.fromJsonNode(node)
+ val type = json.asObject().getOrNull()?.get("type")?.asString()?.getOrNull()
+
+ when (type) {
+ "server_vad" -> {
+ return tryDeserialize(node, jacksonTypeRef())?.let {
+ TurnDetection(serverVad = it, _json = json)
+ } ?: TurnDetection(_json = json)
+ }
+ "semantic_vad" -> {
+ return tryDeserialize(node, jacksonTypeRef())?.let {
+ TurnDetection(semanticVad = it, _json = json)
+ } ?: TurnDetection(_json = json)
+ }
+ }
+
+ return TurnDetection(_json = json)
+ }
+ }
+
+ internal class Serializer : BaseSerializer(TurnDetection::class) {
+
+ override fun serialize(
+ value: TurnDetection,
+ generator: JsonGenerator,
+ provider: SerializerProvider,
+ ) {
+ when {
+ value.serverVad != null -> generator.writeObject(value.serverVad)
+ value.semanticVad != null -> generator.writeObject(value.semanticVad)
+ value._json != null -> generator.writeObject(value._json)
+ else -> throw IllegalStateException("Invalid TurnDetection")
+ }
+ }
+ }
+
+ /**
+ * Server-side voice activity detection (VAD) which flips on when user speech is detected
+ * and off after a period of silence.
+ */
+ class ServerVad
+ private constructor(
+ private val type: JsonValue,
+ private val createResponse: JsonField,
+ private val idleTimeoutMs: JsonField,
+ private val interruptResponse: JsonField,
+ private val prefixPaddingMs: JsonField,
+ private val silenceDurationMs: JsonField,
+ private val threshold: JsonField,
+ private val additionalProperties: MutableMap,
+ ) {
+
+ @JsonCreator
+ private constructor(
+ @JsonProperty("type") @ExcludeMissing type: JsonValue = JsonMissing.of(),
+ @JsonProperty("create_response")
+ @ExcludeMissing
+ createResponse: JsonField = JsonMissing.of(),
+ @JsonProperty("idle_timeout_ms")
+ @ExcludeMissing
+ idleTimeoutMs: JsonField = JsonMissing.of(),
+ @JsonProperty("interrupt_response")
+ @ExcludeMissing
+ interruptResponse: JsonField = JsonMissing.of(),
+ @JsonProperty("prefix_padding_ms")
+ @ExcludeMissing
+ prefixPaddingMs: JsonField = JsonMissing.of(),
+ @JsonProperty("silence_duration_ms")
+ @ExcludeMissing
+ silenceDurationMs: JsonField = JsonMissing.of(),
+ @JsonProperty("threshold")
+ @ExcludeMissing
+ threshold: JsonField = JsonMissing.of(),
+ ) : this(
+ type,
+ createResponse,
+ idleTimeoutMs,
+ interruptResponse,
+ prefixPaddingMs,
+ silenceDurationMs,
+ threshold,
+ mutableMapOf(),
+ )
/**
- * Optional idle timeout after which turn detection will auto-timeout when no additional
- * audio is received.
+ * Type of turn detection, `server_vad` to turn on simple Server VAD.
+ *
+ * Expected to always return the following:
+ * ```java
+ * JsonValue.from("server_vad")
+ * ```
+ *
+ * However, this method can be useful for debugging and logging (e.g. if the server
+ * responded with an unexpected value).
*/
- fun idleTimeoutMs(idleTimeoutMs: Long?) =
- idleTimeoutMs(JsonField.ofNullable(idleTimeoutMs))
+ @JsonProperty("type") @ExcludeMissing fun _type(): JsonValue = type
/**
- * Alias for [Builder.idleTimeoutMs].
+ * Whether or not to automatically generate a response when a VAD stop event occurs.
*
- * This unboxed primitive overload exists for backwards compatibility.
+ * @throws OpenAIInvalidDataException if the JSON field has an unexpected type (e.g. if
+ * the server responded with an unexpected value).
*/
- fun idleTimeoutMs(idleTimeoutMs: Long) = idleTimeoutMs(idleTimeoutMs as Long?)
-
- /** Alias for calling [Builder.idleTimeoutMs] with `idleTimeoutMs.orElse(null)`. */
- fun idleTimeoutMs(idleTimeoutMs: Optional) =
- idleTimeoutMs(idleTimeoutMs.getOrNull())
+ fun createResponse(): Optional = createResponse.getOptional("create_response")
/**
- * Sets [Builder.idleTimeoutMs] to an arbitrary JSON value.
+ * Optional timeout after which a model response will be triggered automatically. This
+ * is useful for situations in which a long pause from the user is unexpected, such as a
+ * phone call. The model will effectively prompt the user to continue the conversation
+ * based on the current context.
*
- * You should usually call [Builder.idleTimeoutMs] with a well-typed [Long] value
- * instead. This method is primarily for setting the field to an undocumented or not yet
- * supported value.
+ * The timeout value will be applied after the last model response's audio has finished
+ * playing, i.e. it's set to the `response.done` time plus audio playback duration.
+ *
+ * An `input_audio_buffer.timeout_triggered` event (plus events associated with the
+ * Response) will be emitted when the timeout is reached. Idle timeout is currently only
+ * supported for `server_vad` mode.
+ *
+ * @throws OpenAIInvalidDataException if the JSON field has an unexpected type (e.g. if
+ * the server responded with an unexpected value).
*/
- fun idleTimeoutMs(idleTimeoutMs: JsonField) = apply {
- this.idleTimeoutMs = idleTimeoutMs
- }
+ fun idleTimeoutMs(): Optional = idleTimeoutMs.getOptional("idle_timeout_ms")
/**
* Whether or not to automatically interrupt any ongoing response with output to the
* default conversation (i.e. `conversation` of `auto`) when a VAD start event occurs.
- */
- fun interruptResponse(interruptResponse: Boolean) =
- interruptResponse(JsonField.of(interruptResponse))
-
- /**
- * Sets [Builder.interruptResponse] to an arbitrary JSON value.
*
- * You should usually call [Builder.interruptResponse] with a well-typed [Boolean] value
- * instead. This method is primarily for setting the field to an undocumented or not yet
- * supported value.
+ * @throws OpenAIInvalidDataException if the JSON field has an unexpected type (e.g. if
+ * the server responded with an unexpected value).
*/
- fun interruptResponse(interruptResponse: JsonField) = apply {
- this.interruptResponse = interruptResponse
- }
+ fun interruptResponse(): Optional =
+ interruptResponse.getOptional("interrupt_response")
/**
* Used only for `server_vad` mode. Amount of audio to include before the VAD detected
* speech (in milliseconds). Defaults to 300ms.
- */
- fun prefixPaddingMs(prefixPaddingMs: Long) =
- prefixPaddingMs(JsonField.of(prefixPaddingMs))
-
- /**
- * Sets [Builder.prefixPaddingMs] to an arbitrary JSON value.
*
- * You should usually call [Builder.prefixPaddingMs] with a well-typed [Long] value
- * instead. This method is primarily for setting the field to an undocumented or not yet
- * supported value.
+ * @throws OpenAIInvalidDataException if the JSON field has an unexpected type (e.g. if
+ * the server responded with an unexpected value).
*/
- fun prefixPaddingMs(prefixPaddingMs: JsonField) = apply {
- this.prefixPaddingMs = prefixPaddingMs
- }
+ fun prefixPaddingMs(): Optional = prefixPaddingMs.getOptional("prefix_padding_ms")
/**
* Used only for `server_vad` mode. Duration of silence to detect speech stop (in
* milliseconds). Defaults to 500ms. With shorter values the model will respond more
* quickly, but may jump in on short pauses from the user.
- */
- fun silenceDurationMs(silenceDurationMs: Long) =
- silenceDurationMs(JsonField.of(silenceDurationMs))
-
- /**
- * Sets [Builder.silenceDurationMs] to an arbitrary JSON value.
*
- * You should usually call [Builder.silenceDurationMs] with a well-typed [Long] value
- * instead. This method is primarily for setting the field to an undocumented or not yet
- * supported value.
+ * @throws OpenAIInvalidDataException if the JSON field has an unexpected type (e.g. if
+ * the server responded with an unexpected value).
*/
- fun silenceDurationMs(silenceDurationMs: JsonField) = apply {
- this.silenceDurationMs = silenceDurationMs
- }
+ fun silenceDurationMs(): Optional =
+ silenceDurationMs.getOptional("silence_duration_ms")
/**
* Used only for `server_vad` mode. Activation threshold for VAD (0.0 to 1.0), this
* defaults to 0.5. A higher threshold will require louder audio to activate the model,
* and thus might perform better in noisy environments.
+ *
+ * @throws OpenAIInvalidDataException if the JSON field has an unexpected type (e.g. if
+ * the server responded with an unexpected value).
*/
- fun threshold(threshold: Double) = threshold(JsonField.of(threshold))
+ fun threshold(): Optional = threshold.getOptional("threshold")
/**
- * Sets [Builder.threshold] to an arbitrary JSON value.
+ * Returns the raw JSON value of [createResponse].
*
- * You should usually call [Builder.threshold] with a well-typed [Double] value instead.
- * This method is primarily for setting the field to an undocumented or not yet
- * supported value.
+ * Unlike [createResponse], this method doesn't throw if the JSON field has an
+ * unexpected type.
*/
- fun threshold(threshold: JsonField) = apply { this.threshold = threshold }
-
- /** Type of turn detection. */
- fun type(type: Type) = type(JsonField.of(type))
+ @JsonProperty("create_response")
+ @ExcludeMissing
+ fun _createResponse(): JsonField = createResponse
/**
- * Sets [Builder.type] to an arbitrary JSON value.
+ * Returns the raw JSON value of [idleTimeoutMs].
*
- * You should usually call [Builder.type] with a well-typed [Type] value instead. This
- * method is primarily for setting the field to an undocumented or not yet supported
- * value.
+ * Unlike [idleTimeoutMs], this method doesn't throw if the JSON field has an unexpected
+ * type.
*/
- fun type(type: JsonField) = apply { this.type = type }
+ @JsonProperty("idle_timeout_ms")
+ @ExcludeMissing
+ fun _idleTimeoutMs(): JsonField = idleTimeoutMs
- fun additionalProperties(additionalProperties: Map) = apply {
- this.additionalProperties.clear()
- putAllAdditionalProperties(additionalProperties)
- }
+ /**
+ * Returns the raw JSON value of [interruptResponse].
+ *
+ * Unlike [interruptResponse], this method doesn't throw if the JSON field has an
+ * unexpected type.
+ */
+ @JsonProperty("interrupt_response")
+ @ExcludeMissing
+ fun _interruptResponse(): JsonField = interruptResponse
- fun putAdditionalProperty(key: String, value: JsonValue) = apply {
+ /**
+ * Returns the raw JSON value of [prefixPaddingMs].
+ *
+ * Unlike [prefixPaddingMs], this method doesn't throw if the JSON field has an
+ * unexpected type.
+ */
+ @JsonProperty("prefix_padding_ms")
+ @ExcludeMissing
+ fun _prefixPaddingMs(): JsonField = prefixPaddingMs
+
+ /**
+ * Returns the raw JSON value of [silenceDurationMs].
+ *
+ * Unlike [silenceDurationMs], this method doesn't throw if the JSON field has an
+ * unexpected type.
+ */
+ @JsonProperty("silence_duration_ms")
+ @ExcludeMissing
+ fun _silenceDurationMs(): JsonField = silenceDurationMs
+
+ /**
+ * Returns the raw JSON value of [threshold].
+ *
+ * Unlike [threshold], this method doesn't throw if the JSON field has an unexpected
+ * type.
+ */
+ @JsonProperty("threshold")
+ @ExcludeMissing
+ fun _threshold(): JsonField = threshold
+
+ @JsonAnySetter
+ private fun putAdditionalProperty(key: String, value: JsonValue) {
additionalProperties.put(key, value)
}
- fun putAllAdditionalProperties(additionalProperties: Map) = apply {
- this.additionalProperties.putAll(additionalProperties)
- }
+ @JsonAnyGetter
+ @ExcludeMissing
+ fun _additionalProperties(): Map =
+ Collections.unmodifiableMap(additionalProperties)
- fun removeAdditionalProperty(key: String) = apply { additionalProperties.remove(key) }
+ fun toBuilder() = Builder().from(this)
- fun removeAllAdditionalProperties(keys: Set) = apply {
- keys.forEach(::removeAdditionalProperty)
+ companion object {
+
+ /** Returns a mutable builder for constructing an instance of [ServerVad]. */
+ @JvmStatic fun builder() = Builder()
}
- /**
- * Returns an immutable instance of [TurnDetection].
- *
- * Further updates to this [Builder] will not mutate the returned instance.
- */
- fun build(): TurnDetection =
- TurnDetection(
- createResponse,
- eagerness,
- idleTimeoutMs,
- interruptResponse,
- prefixPaddingMs,
- silenceDurationMs,
- threshold,
- type,
- additionalProperties.toMutableMap(),
- )
- }
+ /** A builder for [ServerVad]. */
+ class Builder internal constructor() {
- private var validated: Boolean = false
+ private var type: JsonValue = JsonValue.from("server_vad")
+ private var createResponse: JsonField = JsonMissing.of()
+ private var idleTimeoutMs: JsonField = JsonMissing.of()
+ private var interruptResponse: JsonField = JsonMissing.of()
+ private var prefixPaddingMs: JsonField