From 63ad148f1f998de1c9d66021037d7d5e04615022 Mon Sep 17 00:00:00 2001 From: "stainless-app[bot]" <142633134+stainless-app[bot]@users.noreply.github.com> Date: Thu, 11 Sep 2025 18:06:34 +0000 Subject: [PATCH 1/7] chore(api): Minor docs and type updates for realtime --- .stats.yml | 4 +- .../InputAudioBufferTimeoutTriggered.kt | 32 +- .../realtime/RealtimeAudioConfigInput.kt | 59 +- .../RealtimeAudioInputTurnDetection.kt | 1478 +++++++++------ .../models/realtime/RealtimeServerEvent.kt | 68 +- .../openai/models/realtime/RealtimeSession.kt | 1552 ++++++++++------ .../RealtimeTranscriptionSessionAudioInput.kt | 68 +- ...scriptionSessionAudioInputTurnDetection.kt | 1505 +++++++++------ .../RealtimeSessionCreateResponse.kt | 1611 ++++++++++------- .../com/openai/models/responses/Response.kt | 24 +- .../models/responses/ResponseCreateParams.kt | 40 +- .../realtime/RealtimeAudioConfigInputTest.kt | 32 +- .../realtime/RealtimeAudioConfigTest.kt | 18 +- .../RealtimeAudioInputTurnDetectionTest.kt | 103 +- .../realtime/RealtimeClientEventTest.kt | 22 +- .../realtime/RealtimeServerEventTest.kt | 44 +- .../RealtimeSessionCreateRequestTest.kt | 18 +- .../models/realtime/RealtimeSessionTest.kt | 32 +- ...ltimeTranscriptionSessionAudioInputTest.kt | 36 +- ...ptionSessionAudioInputTurnDetectionTest.kt | 120 +- .../RealtimeTranscriptionSessionAudioTest.kt | 34 +- ...meTranscriptionSessionCreateRequestTest.kt | 42 +- .../realtime/SessionCreatedEventTest.kt | 30 +- .../models/realtime/SessionUpdateEventTest.kt | 30 +- .../realtime/SessionUpdatedEventTest.kt | 30 +- .../ClientSecretCreateParamsTest.kt | 28 +- .../ClientSecretCreateResponseTest.kt | 45 +- .../RealtimeSessionCreateResponseTest.kt | 40 +- .../realtime/ClientSecretServiceAsyncTest.kt | 12 +- .../realtime/ClientSecretServiceTest.kt | 12 +- 30 files changed, 4364 insertions(+), 2805 deletions(-) diff --git a/.stats.yml b/.stats.yml index 2aa16be8..5388f246 100644 --- a/.stats.yml +++ b/.stats.yml @@ -1,4 +1,4 @@ configured_endpoints: 118 -openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai%2Fopenai-16cb18bed32bae8c5840fb39a1bf664026cc40463ad0c487dcb0df1bd3d72db0.yml -openapi_spec_hash: 4cb51b22f98dee1a90bc7add82d1d132 +openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai%2Fopenai-c829f9e7f51d4946dae7b02eb37eb857b538a464cf54c7ced5eff1b1c93e07db.yml +openapi_spec_hash: 1b2eaba46b264bcec8831bc496543649 config_hash: 930dac3aa861344867e4ac84f037b5df diff --git a/openai-java-core/src/main/kotlin/com/openai/models/realtime/InputAudioBufferTimeoutTriggered.kt b/openai-java-core/src/main/kotlin/com/openai/models/realtime/InputAudioBufferTimeoutTriggered.kt index f42c519a..393326bc 100644 --- a/openai-java-core/src/main/kotlin/com/openai/models/realtime/InputAudioBufferTimeoutTriggered.kt +++ b/openai-java-core/src/main/kotlin/com/openai/models/realtime/InputAudioBufferTimeoutTriggered.kt @@ -15,7 +15,21 @@ import com.openai.errors.OpenAIInvalidDataException import java.util.Collections import java.util.Objects -/** Returned when the server VAD timeout is triggered for the input audio buffer. */ +/** + * Returned when the Server VAD timeout is triggered for the input audio buffer. This is configured + * with `idle_timeout_ms` in the `turn_detection` settings of the session, and it indicates that + * there hasn't been any speech detected for the configured duration. + * + * The `audio_start_ms` and `audio_end_ms` fields indicate the segment of audio after the last model + * response up to the triggering time, as an offset from the beginning of audio written to the input + * audio buffer. This means it demarcates the segment of audio that was silent and the difference + * between the start and end values will roughly match the configured timeout. + * + * The empty audio will be committed to the conversation as an `input_audio` item (there will be a + * `input_audio_buffer.committed` event) and a model response will be generated. There may be speech + * that didn't trigger VAD but is still detected by the model, so the model may respond with + * something relevant to the conversation or a prompt to continue speaking. + */ class InputAudioBufferTimeoutTriggered private constructor( private val audioEndMs: JsonField, @@ -40,7 +54,8 @@ private constructor( ) : this(audioEndMs, audioStartMs, eventId, itemId, type, mutableMapOf()) /** - * Millisecond offset where speech ended within the buffered audio. + * Millisecond offset of audio written to the input audio buffer at the time the timeout was + * triggered. * * @throws OpenAIInvalidDataException if the JSON field has an unexpected type or is * unexpectedly missing or null (e.g. if the server responded with an unexpected value). @@ -48,7 +63,8 @@ private constructor( fun audioEndMs(): Long = audioEndMs.getRequired("audio_end_ms") /** - * Millisecond offset where speech started within the buffered audio. + * Millisecond offset of audio written to the input audio buffer that was after the playback + * time of the last model response. * * @throws OpenAIInvalidDataException if the JSON field has an unexpected type or is * unexpectedly missing or null (e.g. if the server responded with an unexpected value). @@ -165,7 +181,10 @@ private constructor( inputAudioBufferTimeoutTriggered.additionalProperties.toMutableMap() } - /** Millisecond offset where speech ended within the buffered audio. */ + /** + * Millisecond offset of audio written to the input audio buffer at the time the timeout was + * triggered. + */ fun audioEndMs(audioEndMs: Long) = audioEndMs(JsonField.of(audioEndMs)) /** @@ -176,7 +195,10 @@ private constructor( */ fun audioEndMs(audioEndMs: JsonField) = apply { this.audioEndMs = audioEndMs } - /** Millisecond offset where speech started within the buffered audio. */ + /** + * Millisecond offset of audio written to the input audio buffer that was after the playback + * time of the last model response. + */ fun audioStartMs(audioStartMs: Long) = audioStartMs(JsonField.of(audioStartMs)) /** diff --git a/openai-java-core/src/main/kotlin/com/openai/models/realtime/RealtimeAudioConfigInput.kt b/openai-java-core/src/main/kotlin/com/openai/models/realtime/RealtimeAudioConfigInput.kt index 3f72a49f..accf1636 100644 --- a/openai-java-core/src/main/kotlin/com/openai/models/realtime/RealtimeAudioConfigInput.kt +++ b/openai-java-core/src/main/kotlin/com/openai/models/realtime/RealtimeAudioConfigInput.kt @@ -76,14 +76,16 @@ private constructor( /** * Configuration for turn detection, ether Server VAD or Semantic VAD. This can be set to `null` - * to turn off, in which case the client must manually trigger model response. Server VAD means - * that the model will detect the start and end of speech based on audio volume and respond at - * the end of user speech. Semantic VAD is more advanced and uses a turn detection model (in - * conjunction with VAD) to semantically estimate whether the user has finished speaking, then - * dynamically sets a timeout based on this probability. For example, if user audio trails off - * with "uhhm", the model will score a low probability of turn end and wait longer for the user - * to continue speaking. This can be useful for more natural conversations, but may have a - * higher latency. + * to turn off, in which case the client must manually trigger model response. + * + * Server VAD means that the model will detect the start and end of speech based on audio volume + * and respond at the end of user speech. + * + * Semantic VAD is more advanced and uses a turn detection model (in conjunction with VAD) to + * semantically estimate whether the user has finished speaking, then dynamically sets a timeout + * based on this probability. For example, if user audio trails off with "uhhm", the model will + * score a low probability of turn end and wait longer for the user to continue speaking. This + * can be useful for more natural conversations, but may have a higher latency. * * @throws OpenAIInvalidDataException if the JSON field has an unexpected type (e.g. if the * server responded with an unexpected value). @@ -230,17 +232,24 @@ private constructor( /** * Configuration for turn detection, ether Server VAD or Semantic VAD. This can be set to - * `null` to turn off, in which case the client must manually trigger model response. Server - * VAD means that the model will detect the start and end of speech based on audio volume - * and respond at the end of user speech. Semantic VAD is more advanced and uses a turn - * detection model (in conjunction with VAD) to semantically estimate whether the user has - * finished speaking, then dynamically sets a timeout based on this probability. For - * example, if user audio trails off with "uhhm", the model will score a low probability of - * turn end and wait longer for the user to continue speaking. This can be useful for more - * natural conversations, but may have a higher latency. + * `null` to turn off, in which case the client must manually trigger model response. + * + * Server VAD means that the model will detect the start and end of speech based on audio + * volume and respond at the end of user speech. + * + * Semantic VAD is more advanced and uses a turn detection model (in conjunction with VAD) + * to semantically estimate whether the user has finished speaking, then dynamically sets a + * timeout based on this probability. For example, if user audio trails off with "uhhm", the + * model will score a low probability of turn end and wait longer for the user to continue + * speaking. This can be useful for more natural conversations, but may have a higher + * latency. */ - fun turnDetection(turnDetection: RealtimeAudioInputTurnDetection) = - turnDetection(JsonField.of(turnDetection)) + fun turnDetection(turnDetection: RealtimeAudioInputTurnDetection?) = + turnDetection(JsonField.ofNullable(turnDetection)) + + /** Alias for calling [Builder.turnDetection] with `turnDetection.orElse(null)`. */ + fun turnDetection(turnDetection: Optional) = + turnDetection(turnDetection.getOrNull()) /** * Sets [Builder.turnDetection] to an arbitrary JSON value. @@ -253,6 +262,20 @@ private constructor( this.turnDetection = turnDetection } + /** + * Alias for calling [turnDetection] with + * `RealtimeAudioInputTurnDetection.ofServerVad(serverVad)`. + */ + fun turnDetection(serverVad: RealtimeAudioInputTurnDetection.ServerVad) = + turnDetection(RealtimeAudioInputTurnDetection.ofServerVad(serverVad)) + + /** + * Alias for calling [turnDetection] with + * `RealtimeAudioInputTurnDetection.ofSemanticVad(semanticVad)`. + */ + fun turnDetection(semanticVad: RealtimeAudioInputTurnDetection.SemanticVad) = + turnDetection(RealtimeAudioInputTurnDetection.ofSemanticVad(semanticVad)) + fun additionalProperties(additionalProperties: Map) = apply { this.additionalProperties.clear() putAllAdditionalProperties(additionalProperties) diff --git a/openai-java-core/src/main/kotlin/com/openai/models/realtime/RealtimeAudioInputTurnDetection.kt b/openai-java-core/src/main/kotlin/com/openai/models/realtime/RealtimeAudioInputTurnDetection.kt index 63985f43..8c403670 100644 --- a/openai-java-core/src/main/kotlin/com/openai/models/realtime/RealtimeAudioInputTurnDetection.kt +++ b/openai-java-core/src/main/kotlin/com/openai/models/realtime/RealtimeAudioInputTurnDetection.kt @@ -6,11 +6,21 @@ import com.fasterxml.jackson.annotation.JsonAnyGetter import com.fasterxml.jackson.annotation.JsonAnySetter import com.fasterxml.jackson.annotation.JsonCreator import com.fasterxml.jackson.annotation.JsonProperty +import com.fasterxml.jackson.core.JsonGenerator +import com.fasterxml.jackson.core.ObjectCodec +import com.fasterxml.jackson.databind.JsonNode +import com.fasterxml.jackson.databind.SerializerProvider +import com.fasterxml.jackson.databind.annotation.JsonDeserialize +import com.fasterxml.jackson.databind.annotation.JsonSerialize +import com.fasterxml.jackson.module.kotlin.jacksonTypeRef +import com.openai.core.BaseDeserializer +import com.openai.core.BaseSerializer import com.openai.core.Enum import com.openai.core.ExcludeMissing import com.openai.core.JsonField import com.openai.core.JsonMissing import com.openai.core.JsonValue +import com.openai.core.getOrThrow import com.openai.errors.OpenAIInvalidDataException import java.util.Collections import java.util.Objects @@ -19,584 +29,641 @@ import kotlin.jvm.optionals.getOrNull /** * Configuration for turn detection, ether Server VAD or Semantic VAD. This can be set to `null` to - * turn off, in which case the client must manually trigger model response. Server VAD means that - * the model will detect the start and end of speech based on audio volume and respond at the end of - * user speech. Semantic VAD is more advanced and uses a turn detection model (in conjunction with - * VAD) to semantically estimate whether the user has finished speaking, then dynamically sets a - * timeout based on this probability. For example, if user audio trails off with "uhhm", the model - * will score a low probability of turn end and wait longer for the user to continue speaking. This - * can be useful for more natural conversations, but may have a higher latency. + * turn off, in which case the client must manually trigger model response. + * + * Server VAD means that the model will detect the start and end of speech based on audio volume and + * respond at the end of user speech. + * + * Semantic VAD is more advanced and uses a turn detection model (in conjunction with VAD) to + * semantically estimate whether the user has finished speaking, then dynamically sets a timeout + * based on this probability. For example, if user audio trails off with "uhhm", the model will + * score a low probability of turn end and wait longer for the user to continue speaking. This can + * be useful for more natural conversations, but may have a higher latency. */ +@JsonDeserialize(using = RealtimeAudioInputTurnDetection.Deserializer::class) +@JsonSerialize(using = RealtimeAudioInputTurnDetection.Serializer::class) class RealtimeAudioInputTurnDetection private constructor( - private val createResponse: JsonField, - private val eagerness: JsonField, - private val idleTimeoutMs: JsonField, - private val interruptResponse: JsonField, - private val prefixPaddingMs: JsonField, - private val silenceDurationMs: JsonField, - private val threshold: JsonField, - private val type: JsonField, - private val additionalProperties: MutableMap, + private val serverVad: ServerVad? = null, + private val semanticVad: SemanticVad? = null, + private val _json: JsonValue? = null, ) { - @JsonCreator - private constructor( - @JsonProperty("create_response") - @ExcludeMissing - createResponse: JsonField = JsonMissing.of(), - @JsonProperty("eagerness") - @ExcludeMissing - eagerness: JsonField = JsonMissing.of(), - @JsonProperty("idle_timeout_ms") - @ExcludeMissing - idleTimeoutMs: JsonField = JsonMissing.of(), - @JsonProperty("interrupt_response") - @ExcludeMissing - interruptResponse: JsonField = JsonMissing.of(), - @JsonProperty("prefix_padding_ms") - @ExcludeMissing - prefixPaddingMs: JsonField = JsonMissing.of(), - @JsonProperty("silence_duration_ms") - @ExcludeMissing - silenceDurationMs: JsonField = JsonMissing.of(), - @JsonProperty("threshold") @ExcludeMissing threshold: JsonField = JsonMissing.of(), - @JsonProperty("type") @ExcludeMissing type: JsonField = JsonMissing.of(), - ) : this( - createResponse, - eagerness, - idleTimeoutMs, - interruptResponse, - prefixPaddingMs, - silenceDurationMs, - threshold, - type, - mutableMapOf(), - ) - /** - * Whether or not to automatically generate a response when a VAD stop event occurs. - * - * @throws OpenAIInvalidDataException if the JSON field has an unexpected type (e.g. if the - * server responded with an unexpected value). + * Server-side voice activity detection (VAD) which flips on when user speech is detected and + * off after a period of silence. */ - fun createResponse(): Optional = createResponse.getOptional("create_response") + fun serverVad(): Optional = Optional.ofNullable(serverVad) /** - * Used only for `semantic_vad` mode. The eagerness of the model to respond. `low` will wait - * longer for the user to continue speaking, `high` will respond more quickly. `auto` is the - * default and is equivalent to `medium`. `low`, `medium`, and `high` have max timeouts of 8s, - * 4s, and 2s respectively. - * - * @throws OpenAIInvalidDataException if the JSON field has an unexpected type (e.g. if the - * server responded with an unexpected value). + * Server-side semantic turn detection which uses a model to determine when the user has + * finished speaking. */ - fun eagerness(): Optional = eagerness.getOptional("eagerness") + fun semanticVad(): Optional = Optional.ofNullable(semanticVad) - /** - * Optional idle timeout after which turn detection will auto-timeout when no additional audio - * is received and emits a `timeout_triggered` event. - * - * @throws OpenAIInvalidDataException if the JSON field has an unexpected type (e.g. if the - * server responded with an unexpected value). - */ - fun idleTimeoutMs(): Optional = idleTimeoutMs.getOptional("idle_timeout_ms") + fun isServerVad(): Boolean = serverVad != null - /** - * Whether or not to automatically interrupt any ongoing response with output to the default - * conversation (i.e. `conversation` of `auto`) when a VAD start event occurs. - * - * @throws OpenAIInvalidDataException if the JSON field has an unexpected type (e.g. if the - * server responded with an unexpected value). - */ - fun interruptResponse(): Optional = interruptResponse.getOptional("interrupt_response") + fun isSemanticVad(): Boolean = semanticVad != null /** - * Used only for `server_vad` mode. Amount of audio to include before the VAD detected speech - * (in milliseconds). Defaults to 300ms. - * - * @throws OpenAIInvalidDataException if the JSON field has an unexpected type (e.g. if the - * server responded with an unexpected value). + * Server-side voice activity detection (VAD) which flips on when user speech is detected and + * off after a period of silence. */ - fun prefixPaddingMs(): Optional = prefixPaddingMs.getOptional("prefix_padding_ms") + fun asServerVad(): ServerVad = serverVad.getOrThrow("serverVad") /** - * Used only for `server_vad` mode. Duration of silence to detect speech stop (in milliseconds). - * Defaults to 500ms. With shorter values the model will respond more quickly, but may jump in - * on short pauses from the user. - * - * @throws OpenAIInvalidDataException if the JSON field has an unexpected type (e.g. if the - * server responded with an unexpected value). + * Server-side semantic turn detection which uses a model to determine when the user has + * finished speaking. */ - fun silenceDurationMs(): Optional = silenceDurationMs.getOptional("silence_duration_ms") + fun asSemanticVad(): SemanticVad = semanticVad.getOrThrow("semanticVad") - /** - * Used only for `server_vad` mode. Activation threshold for VAD (0.0 to 1.0), this defaults to - * 0.5. A higher threshold will require louder audio to activate the model, and thus might - * perform better in noisy environments. - * - * @throws OpenAIInvalidDataException if the JSON field has an unexpected type (e.g. if the - * server responded with an unexpected value). - */ - fun threshold(): Optional = threshold.getOptional("threshold") + fun _json(): Optional = Optional.ofNullable(_json) - /** - * Type of turn detection. - * - * @throws OpenAIInvalidDataException if the JSON field has an unexpected type (e.g. if the - * server responded with an unexpected value). - */ - fun type(): Optional = type.getOptional("type") + fun accept(visitor: Visitor): T = + when { + serverVad != null -> visitor.visitServerVad(serverVad) + semanticVad != null -> visitor.visitSemanticVad(semanticVad) + else -> visitor.unknown(_json) + } - /** - * Returns the raw JSON value of [createResponse]. - * - * Unlike [createResponse], this method doesn't throw if the JSON field has an unexpected type. - */ - @JsonProperty("create_response") - @ExcludeMissing - fun _createResponse(): JsonField = createResponse + private var validated: Boolean = false - /** - * Returns the raw JSON value of [eagerness]. - * - * Unlike [eagerness], this method doesn't throw if the JSON field has an unexpected type. - */ - @JsonProperty("eagerness") @ExcludeMissing fun _eagerness(): JsonField = eagerness + fun validate(): RealtimeAudioInputTurnDetection = apply { + if (validated) { + return@apply + } - /** - * Returns the raw JSON value of [idleTimeoutMs]. - * - * Unlike [idleTimeoutMs], this method doesn't throw if the JSON field has an unexpected type. - */ - @JsonProperty("idle_timeout_ms") - @ExcludeMissing - fun _idleTimeoutMs(): JsonField = idleTimeoutMs + accept( + object : Visitor { + override fun visitServerVad(serverVad: ServerVad) { + serverVad.validate() + } - /** - * Returns the raw JSON value of [interruptResponse]. - * - * Unlike [interruptResponse], this method doesn't throw if the JSON field has an unexpected - * type. - */ - @JsonProperty("interrupt_response") - @ExcludeMissing - fun _interruptResponse(): JsonField = interruptResponse + override fun visitSemanticVad(semanticVad: SemanticVad) { + semanticVad.validate() + } + } + ) + validated = true + } - /** - * Returns the raw JSON value of [prefixPaddingMs]. - * - * Unlike [prefixPaddingMs], this method doesn't throw if the JSON field has an unexpected type. - */ - @JsonProperty("prefix_padding_ms") - @ExcludeMissing - fun _prefixPaddingMs(): JsonField = prefixPaddingMs + fun isValid(): Boolean = + try { + validate() + true + } catch (e: OpenAIInvalidDataException) { + false + } /** - * Returns the raw JSON value of [silenceDurationMs]. + * Returns a score indicating how many valid values are contained in this object recursively. * - * Unlike [silenceDurationMs], this method doesn't throw if the JSON field has an unexpected - * type. + * Used for best match union deserialization. */ - @JsonProperty("silence_duration_ms") - @ExcludeMissing - fun _silenceDurationMs(): JsonField = silenceDurationMs + @JvmSynthetic + internal fun validity(): Int = + accept( + object : Visitor { + override fun visitServerVad(serverVad: ServerVad) = serverVad.validity() - /** - * Returns the raw JSON value of [threshold]. - * - * Unlike [threshold], this method doesn't throw if the JSON field has an unexpected type. - */ - @JsonProperty("threshold") @ExcludeMissing fun _threshold(): JsonField = threshold + override fun visitSemanticVad(semanticVad: SemanticVad) = semanticVad.validity() - /** - * Returns the raw JSON value of [type]. - * - * Unlike [type], this method doesn't throw if the JSON field has an unexpected type. - */ - @JsonProperty("type") @ExcludeMissing fun _type(): JsonField = type + override fun unknown(json: JsonValue?) = 0 + } + ) + + override fun equals(other: Any?): Boolean { + if (this === other) { + return true + } - @JsonAnySetter - private fun putAdditionalProperty(key: String, value: JsonValue) { - additionalProperties.put(key, value) + return other is RealtimeAudioInputTurnDetection && + serverVad == other.serverVad && + semanticVad == other.semanticVad } - @JsonAnyGetter - @ExcludeMissing - fun _additionalProperties(): Map = - Collections.unmodifiableMap(additionalProperties) + override fun hashCode(): Int = Objects.hash(serverVad, semanticVad) - fun toBuilder() = Builder().from(this) + override fun toString(): String = + when { + serverVad != null -> "RealtimeAudioInputTurnDetection{serverVad=$serverVad}" + semanticVad != null -> "RealtimeAudioInputTurnDetection{semanticVad=$semanticVad}" + _json != null -> "RealtimeAudioInputTurnDetection{_unknown=$_json}" + else -> throw IllegalStateException("Invalid RealtimeAudioInputTurnDetection") + } companion object { /** - * Returns a mutable builder for constructing an instance of - * [RealtimeAudioInputTurnDetection]. + * Server-side voice activity detection (VAD) which flips on when user speech is detected + * and off after a period of silence. */ - @JvmStatic fun builder() = Builder() - } - - /** A builder for [RealtimeAudioInputTurnDetection]. */ - class Builder internal constructor() { + @JvmStatic + fun ofServerVad(serverVad: ServerVad) = + RealtimeAudioInputTurnDetection(serverVad = serverVad) - private var createResponse: JsonField = JsonMissing.of() - private var eagerness: JsonField = JsonMissing.of() - private var idleTimeoutMs: JsonField = JsonMissing.of() - private var interruptResponse: JsonField = JsonMissing.of() - private var prefixPaddingMs: JsonField = JsonMissing.of() - private var silenceDurationMs: JsonField = JsonMissing.of() - private var threshold: JsonField = JsonMissing.of() - private var type: JsonField = JsonMissing.of() - private var additionalProperties: MutableMap = mutableMapOf() + /** + * Server-side semantic turn detection which uses a model to determine when the user has + * finished speaking. + */ + @JvmStatic + fun ofSemanticVad(semanticVad: SemanticVad) = + RealtimeAudioInputTurnDetection(semanticVad = semanticVad) + } - @JvmSynthetic - internal fun from(realtimeAudioInputTurnDetection: RealtimeAudioInputTurnDetection) = - apply { - createResponse = realtimeAudioInputTurnDetection.createResponse - eagerness = realtimeAudioInputTurnDetection.eagerness - idleTimeoutMs = realtimeAudioInputTurnDetection.idleTimeoutMs - interruptResponse = realtimeAudioInputTurnDetection.interruptResponse - prefixPaddingMs = realtimeAudioInputTurnDetection.prefixPaddingMs - silenceDurationMs = realtimeAudioInputTurnDetection.silenceDurationMs - threshold = realtimeAudioInputTurnDetection.threshold - type = realtimeAudioInputTurnDetection.type - additionalProperties = - realtimeAudioInputTurnDetection.additionalProperties.toMutableMap() - } - - /** Whether or not to automatically generate a response when a VAD stop event occurs. */ - fun createResponse(createResponse: Boolean) = createResponse(JsonField.of(createResponse)) + /** + * An interface that defines how to map each variant of [RealtimeAudioInputTurnDetection] to a + * value of type [T]. + */ + interface Visitor { /** - * Sets [Builder.createResponse] to an arbitrary JSON value. - * - * You should usually call [Builder.createResponse] with a well-typed [Boolean] value - * instead. This method is primarily for setting the field to an undocumented or not yet - * supported value. + * Server-side voice activity detection (VAD) which flips on when user speech is detected + * and off after a period of silence. */ - fun createResponse(createResponse: JsonField) = apply { - this.createResponse = createResponse - } + fun visitServerVad(serverVad: ServerVad): T /** - * Used only for `semantic_vad` mode. The eagerness of the model to respond. `low` will wait - * longer for the user to continue speaking, `high` will respond more quickly. `auto` is the - * default and is equivalent to `medium`. `low`, `medium`, and `high` have max timeouts of - * 8s, 4s, and 2s respectively. + * Server-side semantic turn detection which uses a model to determine when the user has + * finished speaking. */ - fun eagerness(eagerness: Eagerness) = eagerness(JsonField.of(eagerness)) + fun visitSemanticVad(semanticVad: SemanticVad): T /** - * Sets [Builder.eagerness] to an arbitrary JSON value. + * Maps an unknown variant of [RealtimeAudioInputTurnDetection] to a value of type [T]. + * + * An instance of [RealtimeAudioInputTurnDetection] can contain an unknown variant if it was + * deserialized from data that doesn't match any known variant. For example, if the SDK is + * on an older version than the API, then the API may respond with new variants that the SDK + * is unaware of. * - * You should usually call [Builder.eagerness] with a well-typed [Eagerness] value instead. - * This method is primarily for setting the field to an undocumented or not yet supported - * value. + * @throws OpenAIInvalidDataException in the default implementation. */ - fun eagerness(eagerness: JsonField) = apply { this.eagerness = eagerness } + fun unknown(json: JsonValue?): T { + throw OpenAIInvalidDataException("Unknown RealtimeAudioInputTurnDetection: $json") + } + } + + internal class Deserializer : + BaseDeserializer(RealtimeAudioInputTurnDetection::class) { + + override fun ObjectCodec.deserialize(node: JsonNode): RealtimeAudioInputTurnDetection { + val json = JsonValue.fromJsonNode(node) + val type = json.asObject().getOrNull()?.get("type")?.asString()?.getOrNull() + + when (type) { + "server_vad" -> { + return tryDeserialize(node, jacksonTypeRef())?.let { + RealtimeAudioInputTurnDetection(serverVad = it, _json = json) + } ?: RealtimeAudioInputTurnDetection(_json = json) + } + "semantic_vad" -> { + return tryDeserialize(node, jacksonTypeRef())?.let { + RealtimeAudioInputTurnDetection(semanticVad = it, _json = json) + } ?: RealtimeAudioInputTurnDetection(_json = json) + } + } + + return RealtimeAudioInputTurnDetection(_json = json) + } + } + + internal class Serializer : + BaseSerializer(RealtimeAudioInputTurnDetection::class) { + + override fun serialize( + value: RealtimeAudioInputTurnDetection, + generator: JsonGenerator, + provider: SerializerProvider, + ) { + when { + value.serverVad != null -> generator.writeObject(value.serverVad) + value.semanticVad != null -> generator.writeObject(value.semanticVad) + value._json != null -> generator.writeObject(value._json) + else -> throw IllegalStateException("Invalid RealtimeAudioInputTurnDetection") + } + } + } + + /** + * Server-side voice activity detection (VAD) which flips on when user speech is detected and + * off after a period of silence. + */ + class ServerVad + private constructor( + private val type: JsonValue, + private val createResponse: JsonField, + private val idleTimeoutMs: JsonField, + private val interruptResponse: JsonField, + private val prefixPaddingMs: JsonField, + private val silenceDurationMs: JsonField, + private val threshold: JsonField, + private val additionalProperties: MutableMap, + ) { + + @JsonCreator + private constructor( + @JsonProperty("type") @ExcludeMissing type: JsonValue = JsonMissing.of(), + @JsonProperty("create_response") + @ExcludeMissing + createResponse: JsonField = JsonMissing.of(), + @JsonProperty("idle_timeout_ms") + @ExcludeMissing + idleTimeoutMs: JsonField = JsonMissing.of(), + @JsonProperty("interrupt_response") + @ExcludeMissing + interruptResponse: JsonField = JsonMissing.of(), + @JsonProperty("prefix_padding_ms") + @ExcludeMissing + prefixPaddingMs: JsonField = JsonMissing.of(), + @JsonProperty("silence_duration_ms") + @ExcludeMissing + silenceDurationMs: JsonField = JsonMissing.of(), + @JsonProperty("threshold") + @ExcludeMissing + threshold: JsonField = JsonMissing.of(), + ) : this( + type, + createResponse, + idleTimeoutMs, + interruptResponse, + prefixPaddingMs, + silenceDurationMs, + threshold, + mutableMapOf(), + ) /** - * Optional idle timeout after which turn detection will auto-timeout when no additional - * audio is received and emits a `timeout_triggered` event. + * Type of turn detection, `server_vad` to turn on simple Server VAD. + * + * Expected to always return the following: + * ```java + * JsonValue.from("server_vad") + * ``` + * + * However, this method can be useful for debugging and logging (e.g. if the server + * responded with an unexpected value). */ - fun idleTimeoutMs(idleTimeoutMs: Long?) = idleTimeoutMs(JsonField.ofNullable(idleTimeoutMs)) + @JsonProperty("type") @ExcludeMissing fun _type(): JsonValue = type /** - * Alias for [Builder.idleTimeoutMs]. + * Whether or not to automatically generate a response when a VAD stop event occurs. * - * This unboxed primitive overload exists for backwards compatibility. + * @throws OpenAIInvalidDataException if the JSON field has an unexpected type (e.g. if the + * server responded with an unexpected value). */ - fun idleTimeoutMs(idleTimeoutMs: Long) = idleTimeoutMs(idleTimeoutMs as Long?) - - /** Alias for calling [Builder.idleTimeoutMs] with `idleTimeoutMs.orElse(null)`. */ - fun idleTimeoutMs(idleTimeoutMs: Optional) = idleTimeoutMs(idleTimeoutMs.getOrNull()) + fun createResponse(): Optional = createResponse.getOptional("create_response") /** - * Sets [Builder.idleTimeoutMs] to an arbitrary JSON value. + * Optional timeout after which a model response will be triggered automatically. This is + * useful for situations in which a long pause from the user is unexpected, such as a phone + * call. The model will effectively prompt the user to continue the conversation based on + * the current context. * - * You should usually call [Builder.idleTimeoutMs] with a well-typed [Long] value instead. - * This method is primarily for setting the field to an undocumented or not yet supported - * value. + * The timeout value will be applied after the last model response's audio has finished + * playing, i.e. it's set to the `response.done` time plus audio playback duration. + * + * An `input_audio_buffer.timeout_triggered` event (plus events associated with the + * Response) will be emitted when the timeout is reached. Idle timeout is currently only + * supported for `server_vad` mode. + * + * @throws OpenAIInvalidDataException if the JSON field has an unexpected type (e.g. if the + * server responded with an unexpected value). */ - fun idleTimeoutMs(idleTimeoutMs: JsonField) = apply { - this.idleTimeoutMs = idleTimeoutMs - } + fun idleTimeoutMs(): Optional = idleTimeoutMs.getOptional("idle_timeout_ms") /** * Whether or not to automatically interrupt any ongoing response with output to the default * conversation (i.e. `conversation` of `auto`) when a VAD start event occurs. - */ - fun interruptResponse(interruptResponse: Boolean) = - interruptResponse(JsonField.of(interruptResponse)) - - /** - * Sets [Builder.interruptResponse] to an arbitrary JSON value. * - * You should usually call [Builder.interruptResponse] with a well-typed [Boolean] value - * instead. This method is primarily for setting the field to an undocumented or not yet - * supported value. + * @throws OpenAIInvalidDataException if the JSON field has an unexpected type (e.g. if the + * server responded with an unexpected value). */ - fun interruptResponse(interruptResponse: JsonField) = apply { - this.interruptResponse = interruptResponse - } + fun interruptResponse(): Optional = + interruptResponse.getOptional("interrupt_response") /** * Used only for `server_vad` mode. Amount of audio to include before the VAD detected * speech (in milliseconds). Defaults to 300ms. - */ - fun prefixPaddingMs(prefixPaddingMs: Long) = prefixPaddingMs(JsonField.of(prefixPaddingMs)) - - /** - * Sets [Builder.prefixPaddingMs] to an arbitrary JSON value. * - * You should usually call [Builder.prefixPaddingMs] with a well-typed [Long] value instead. - * This method is primarily for setting the field to an undocumented or not yet supported - * value. + * @throws OpenAIInvalidDataException if the JSON field has an unexpected type (e.g. if the + * server responded with an unexpected value). */ - fun prefixPaddingMs(prefixPaddingMs: JsonField) = apply { - this.prefixPaddingMs = prefixPaddingMs - } + fun prefixPaddingMs(): Optional = prefixPaddingMs.getOptional("prefix_padding_ms") /** * Used only for `server_vad` mode. Duration of silence to detect speech stop (in * milliseconds). Defaults to 500ms. With shorter values the model will respond more * quickly, but may jump in on short pauses from the user. - */ - fun silenceDurationMs(silenceDurationMs: Long) = - silenceDurationMs(JsonField.of(silenceDurationMs)) - - /** - * Sets [Builder.silenceDurationMs] to an arbitrary JSON value. * - * You should usually call [Builder.silenceDurationMs] with a well-typed [Long] value - * instead. This method is primarily for setting the field to an undocumented or not yet - * supported value. + * @throws OpenAIInvalidDataException if the JSON field has an unexpected type (e.g. if the + * server responded with an unexpected value). */ - fun silenceDurationMs(silenceDurationMs: JsonField) = apply { - this.silenceDurationMs = silenceDurationMs - } + fun silenceDurationMs(): Optional = + silenceDurationMs.getOptional("silence_duration_ms") /** * Used only for `server_vad` mode. Activation threshold for VAD (0.0 to 1.0), this defaults * to 0.5. A higher threshold will require louder audio to activate the model, and thus * might perform better in noisy environments. + * + * @throws OpenAIInvalidDataException if the JSON field has an unexpected type (e.g. if the + * server responded with an unexpected value). */ - fun threshold(threshold: Double) = threshold(JsonField.of(threshold)) + fun threshold(): Optional = threshold.getOptional("threshold") + + /** + * Returns the raw JSON value of [createResponse]. + * + * Unlike [createResponse], this method doesn't throw if the JSON field has an unexpected + * type. + */ + @JsonProperty("create_response") + @ExcludeMissing + fun _createResponse(): JsonField = createResponse /** - * Sets [Builder.threshold] to an arbitrary JSON value. + * Returns the raw JSON value of [idleTimeoutMs]. * - * You should usually call [Builder.threshold] with a well-typed [Double] value instead. - * This method is primarily for setting the field to an undocumented or not yet supported - * value. + * Unlike [idleTimeoutMs], this method doesn't throw if the JSON field has an unexpected + * type. */ - fun threshold(threshold: JsonField) = apply { this.threshold = threshold } + @JsonProperty("idle_timeout_ms") + @ExcludeMissing + fun _idleTimeoutMs(): JsonField = idleTimeoutMs - /** Type of turn detection. */ - fun type(type: Type) = type(JsonField.of(type)) + /** + * Returns the raw JSON value of [interruptResponse]. + * + * Unlike [interruptResponse], this method doesn't throw if the JSON field has an unexpected + * type. + */ + @JsonProperty("interrupt_response") + @ExcludeMissing + fun _interruptResponse(): JsonField = interruptResponse /** - * Sets [Builder.type] to an arbitrary JSON value. + * Returns the raw JSON value of [prefixPaddingMs]. * - * You should usually call [Builder.type] with a well-typed [Type] value instead. This - * method is primarily for setting the field to an undocumented or not yet supported value. + * Unlike [prefixPaddingMs], this method doesn't throw if the JSON field has an unexpected + * type. */ - fun type(type: JsonField) = apply { this.type = type } + @JsonProperty("prefix_padding_ms") + @ExcludeMissing + fun _prefixPaddingMs(): JsonField = prefixPaddingMs - fun additionalProperties(additionalProperties: Map) = apply { - this.additionalProperties.clear() - putAllAdditionalProperties(additionalProperties) - } + /** + * Returns the raw JSON value of [silenceDurationMs]. + * + * Unlike [silenceDurationMs], this method doesn't throw if the JSON field has an unexpected + * type. + */ + @JsonProperty("silence_duration_ms") + @ExcludeMissing + fun _silenceDurationMs(): JsonField = silenceDurationMs - fun putAdditionalProperty(key: String, value: JsonValue) = apply { + /** + * Returns the raw JSON value of [threshold]. + * + * Unlike [threshold], this method doesn't throw if the JSON field has an unexpected type. + */ + @JsonProperty("threshold") @ExcludeMissing fun _threshold(): JsonField = threshold + + @JsonAnySetter + private fun putAdditionalProperty(key: String, value: JsonValue) { additionalProperties.put(key, value) } - fun putAllAdditionalProperties(additionalProperties: Map) = apply { - this.additionalProperties.putAll(additionalProperties) - } + @JsonAnyGetter + @ExcludeMissing + fun _additionalProperties(): Map = + Collections.unmodifiableMap(additionalProperties) + + fun toBuilder() = Builder().from(this) - fun removeAdditionalProperty(key: String) = apply { additionalProperties.remove(key) } + companion object { - fun removeAllAdditionalProperties(keys: Set) = apply { - keys.forEach(::removeAdditionalProperty) + /** Returns a mutable builder for constructing an instance of [ServerVad]. */ + @JvmStatic fun builder() = Builder() } - /** - * Returns an immutable instance of [RealtimeAudioInputTurnDetection]. - * - * Further updates to this [Builder] will not mutate the returned instance. - */ - fun build(): RealtimeAudioInputTurnDetection = - RealtimeAudioInputTurnDetection( - createResponse, - eagerness, - idleTimeoutMs, - interruptResponse, - prefixPaddingMs, - silenceDurationMs, - threshold, - type, - additionalProperties.toMutableMap(), - ) - } + /** A builder for [ServerVad]. */ + class Builder internal constructor() { + + private var type: JsonValue = JsonValue.from("server_vad") + private var createResponse: JsonField = JsonMissing.of() + private var idleTimeoutMs: JsonField = JsonMissing.of() + private var interruptResponse: JsonField = JsonMissing.of() + private var prefixPaddingMs: JsonField = JsonMissing.of() + private var silenceDurationMs: JsonField = JsonMissing.of() + private var threshold: JsonField = JsonMissing.of() + private var additionalProperties: MutableMap = mutableMapOf() + + @JvmSynthetic + internal fun from(serverVad: ServerVad) = apply { + type = serverVad.type + createResponse = serverVad.createResponse + idleTimeoutMs = serverVad.idleTimeoutMs + interruptResponse = serverVad.interruptResponse + prefixPaddingMs = serverVad.prefixPaddingMs + silenceDurationMs = serverVad.silenceDurationMs + threshold = serverVad.threshold + additionalProperties = serverVad.additionalProperties.toMutableMap() + } - private var validated: Boolean = false + /** + * Sets the field to an arbitrary JSON value. + * + * It is usually unnecessary to call this method because the field defaults to the + * following: + * ```java + * JsonValue.from("server_vad") + * ``` + * + * This method is primarily for setting the field to an undocumented or not yet + * supported value. + */ + fun type(type: JsonValue) = apply { this.type = type } - fun validate(): RealtimeAudioInputTurnDetection = apply { - if (validated) { - return@apply - } + /** Whether or not to automatically generate a response when a VAD stop event occurs. */ + fun createResponse(createResponse: Boolean) = + createResponse(JsonField.of(createResponse)) - createResponse() - eagerness().ifPresent { it.validate() } - idleTimeoutMs() - interruptResponse() - prefixPaddingMs() - silenceDurationMs() - threshold() - type().ifPresent { it.validate() } - validated = true - } + /** + * Sets [Builder.createResponse] to an arbitrary JSON value. + * + * You should usually call [Builder.createResponse] with a well-typed [Boolean] value + * instead. This method is primarily for setting the field to an undocumented or not yet + * supported value. + */ + fun createResponse(createResponse: JsonField) = apply { + this.createResponse = createResponse + } - fun isValid(): Boolean = - try { - validate() - true - } catch (e: OpenAIInvalidDataException) { - false - } + /** + * Optional timeout after which a model response will be triggered automatically. This + * is useful for situations in which a long pause from the user is unexpected, such as a + * phone call. The model will effectively prompt the user to continue the conversation + * based on the current context. + * + * The timeout value will be applied after the last model response's audio has finished + * playing, i.e. it's set to the `response.done` time plus audio playback duration. + * + * An `input_audio_buffer.timeout_triggered` event (plus events associated with the + * Response) will be emitted when the timeout is reached. Idle timeout is currently only + * supported for `server_vad` mode. + */ + fun idleTimeoutMs(idleTimeoutMs: Long?) = + idleTimeoutMs(JsonField.ofNullable(idleTimeoutMs)) - /** - * Returns a score indicating how many valid values are contained in this object recursively. - * - * Used for best match union deserialization. - */ - @JvmSynthetic - internal fun validity(): Int = - (if (createResponse.asKnown().isPresent) 1 else 0) + - (eagerness.asKnown().getOrNull()?.validity() ?: 0) + - (if (idleTimeoutMs.asKnown().isPresent) 1 else 0) + - (if (interruptResponse.asKnown().isPresent) 1 else 0) + - (if (prefixPaddingMs.asKnown().isPresent) 1 else 0) + - (if (silenceDurationMs.asKnown().isPresent) 1 else 0) + - (if (threshold.asKnown().isPresent) 1 else 0) + - (type.asKnown().getOrNull()?.validity() ?: 0) + /** + * Alias for [Builder.idleTimeoutMs]. + * + * This unboxed primitive overload exists for backwards compatibility. + */ + fun idleTimeoutMs(idleTimeoutMs: Long) = idleTimeoutMs(idleTimeoutMs as Long?) - /** - * Used only for `semantic_vad` mode. The eagerness of the model to respond. `low` will wait - * longer for the user to continue speaking, `high` will respond more quickly. `auto` is the - * default and is equivalent to `medium`. `low`, `medium`, and `high` have max timeouts of 8s, - * 4s, and 2s respectively. - */ - class Eagerness @JsonCreator private constructor(private val value: JsonField) : Enum { + /** Alias for calling [Builder.idleTimeoutMs] with `idleTimeoutMs.orElse(null)`. */ + fun idleTimeoutMs(idleTimeoutMs: Optional) = + idleTimeoutMs(idleTimeoutMs.getOrNull()) - /** - * Returns this class instance's raw value. - * - * This is usually only useful if this instance was deserialized from data that doesn't - * match any known member, and you want to know that value. For example, if the SDK is on an - * older version than the API, then the API may respond with new members that the SDK is - * unaware of. - */ - @com.fasterxml.jackson.annotation.JsonValue fun _value(): JsonField = value + /** + * Sets [Builder.idleTimeoutMs] to an arbitrary JSON value. + * + * You should usually call [Builder.idleTimeoutMs] with a well-typed [Long] value + * instead. This method is primarily for setting the field to an undocumented or not yet + * supported value. + */ + fun idleTimeoutMs(idleTimeoutMs: JsonField) = apply { + this.idleTimeoutMs = idleTimeoutMs + } - companion object { + /** + * Whether or not to automatically interrupt any ongoing response with output to the + * default conversation (i.e. `conversation` of `auto`) when a VAD start event occurs. + */ + fun interruptResponse(interruptResponse: Boolean) = + interruptResponse(JsonField.of(interruptResponse)) - @JvmField val LOW = of("low") + /** + * Sets [Builder.interruptResponse] to an arbitrary JSON value. + * + * You should usually call [Builder.interruptResponse] with a well-typed [Boolean] value + * instead. This method is primarily for setting the field to an undocumented or not yet + * supported value. + */ + fun interruptResponse(interruptResponse: JsonField) = apply { + this.interruptResponse = interruptResponse + } - @JvmField val MEDIUM = of("medium") + /** + * Used only for `server_vad` mode. Amount of audio to include before the VAD detected + * speech (in milliseconds). Defaults to 300ms. + */ + fun prefixPaddingMs(prefixPaddingMs: Long) = + prefixPaddingMs(JsonField.of(prefixPaddingMs)) - @JvmField val HIGH = of("high") + /** + * Sets [Builder.prefixPaddingMs] to an arbitrary JSON value. + * + * You should usually call [Builder.prefixPaddingMs] with a well-typed [Long] value + * instead. This method is primarily for setting the field to an undocumented or not yet + * supported value. + */ + fun prefixPaddingMs(prefixPaddingMs: JsonField) = apply { + this.prefixPaddingMs = prefixPaddingMs + } - @JvmField val AUTO = of("auto") + /** + * Used only for `server_vad` mode. Duration of silence to detect speech stop (in + * milliseconds). Defaults to 500ms. With shorter values the model will respond more + * quickly, but may jump in on short pauses from the user. + */ + fun silenceDurationMs(silenceDurationMs: Long) = + silenceDurationMs(JsonField.of(silenceDurationMs)) - @JvmStatic fun of(value: String) = Eagerness(JsonField.of(value)) - } + /** + * Sets [Builder.silenceDurationMs] to an arbitrary JSON value. + * + * You should usually call [Builder.silenceDurationMs] with a well-typed [Long] value + * instead. This method is primarily for setting the field to an undocumented or not yet + * supported value. + */ + fun silenceDurationMs(silenceDurationMs: JsonField) = apply { + this.silenceDurationMs = silenceDurationMs + } - /** An enum containing [Eagerness]'s known values. */ - enum class Known { - LOW, - MEDIUM, - HIGH, - AUTO, - } + /** + * Used only for `server_vad` mode. Activation threshold for VAD (0.0 to 1.0), this + * defaults to 0.5. A higher threshold will require louder audio to activate the model, + * and thus might perform better in noisy environments. + */ + fun threshold(threshold: Double) = threshold(JsonField.of(threshold)) - /** - * An enum containing [Eagerness]'s known values, as well as an [_UNKNOWN] member. - * - * An instance of [Eagerness] can contain an unknown value in a couple of cases: - * - It was deserialized from data that doesn't match any known member. For example, if the - * SDK is on an older version than the API, then the API may respond with new members that - * the SDK is unaware of. - * - It was constructed with an arbitrary value using the [of] method. - */ - enum class Value { - LOW, - MEDIUM, - HIGH, - AUTO, /** - * An enum member indicating that [Eagerness] was instantiated with an unknown value. + * Sets [Builder.threshold] to an arbitrary JSON value. + * + * You should usually call [Builder.threshold] with a well-typed [Double] value instead. + * This method is primarily for setting the field to an undocumented or not yet + * supported value. */ - _UNKNOWN, - } + fun threshold(threshold: JsonField) = apply { this.threshold = threshold } - /** - * Returns an enum member corresponding to this class instance's value, or [Value._UNKNOWN] - * if the class was instantiated with an unknown value. - * - * Use the [known] method instead if you're certain the value is always known or if you want - * to throw for the unknown case. - */ - fun value(): Value = - when (this) { - LOW -> Value.LOW - MEDIUM -> Value.MEDIUM - HIGH -> Value.HIGH - AUTO -> Value.AUTO - else -> Value._UNKNOWN + fun additionalProperties(additionalProperties: Map) = apply { + this.additionalProperties.clear() + putAllAdditionalProperties(additionalProperties) } - /** - * Returns an enum member corresponding to this class instance's value. - * - * Use the [value] method instead if you're uncertain the value is always known and don't - * want to throw for the unknown case. - * - * @throws OpenAIInvalidDataException if this class instance's value is a not a known - * member. - */ - fun known(): Known = - when (this) { - LOW -> Known.LOW - MEDIUM -> Known.MEDIUM - HIGH -> Known.HIGH - AUTO -> Known.AUTO - else -> throw OpenAIInvalidDataException("Unknown Eagerness: $value") + fun putAdditionalProperty(key: String, value: JsonValue) = apply { + additionalProperties.put(key, value) } - /** - * Returns this class instance's primitive wire representation. - * - * This differs from the [toString] method because that method is primarily for debugging - * and generally doesn't throw. - * - * @throws OpenAIInvalidDataException if this class instance's value does not have the - * expected primitive type. - */ - fun asString(): String = - _value().asString().orElseThrow { OpenAIInvalidDataException("Value is not a String") } + fun putAllAdditionalProperties(additionalProperties: Map) = apply { + this.additionalProperties.putAll(additionalProperties) + } + + fun removeAdditionalProperty(key: String) = apply { additionalProperties.remove(key) } + + fun removeAllAdditionalProperties(keys: Set) = apply { + keys.forEach(::removeAdditionalProperty) + } + + /** + * Returns an immutable instance of [ServerVad]. + * + * Further updates to this [Builder] will not mutate the returned instance. + */ + fun build(): ServerVad = + ServerVad( + type, + createResponse, + idleTimeoutMs, + interruptResponse, + prefixPaddingMs, + silenceDurationMs, + threshold, + additionalProperties.toMutableMap(), + ) + } private var validated: Boolean = false - fun validate(): Eagerness = apply { + fun validate(): ServerVad = apply { if (validated) { return@apply } - known() + _type().let { + if (it != JsonValue.from("server_vad")) { + throw OpenAIInvalidDataException("'type' is invalid, received $it") + } + } + createResponse() + idleTimeoutMs() + interruptResponse() + prefixPaddingMs() + silenceDurationMs() + threshold() validated = true } @@ -614,115 +681,298 @@ private constructor( * * Used for best match union deserialization. */ - @JvmSynthetic internal fun validity(): Int = if (value() == Value._UNKNOWN) 0 else 1 + @JvmSynthetic + internal fun validity(): Int = + type.let { if (it == JsonValue.from("server_vad")) 1 else 0 } + + (if (createResponse.asKnown().isPresent) 1 else 0) + + (if (idleTimeoutMs.asKnown().isPresent) 1 else 0) + + (if (interruptResponse.asKnown().isPresent) 1 else 0) + + (if (prefixPaddingMs.asKnown().isPresent) 1 else 0) + + (if (silenceDurationMs.asKnown().isPresent) 1 else 0) + + (if (threshold.asKnown().isPresent) 1 else 0) override fun equals(other: Any?): Boolean { if (this === other) { return true } - return other is Eagerness && value == other.value + return other is ServerVad && + type == other.type && + createResponse == other.createResponse && + idleTimeoutMs == other.idleTimeoutMs && + interruptResponse == other.interruptResponse && + prefixPaddingMs == other.prefixPaddingMs && + silenceDurationMs == other.silenceDurationMs && + threshold == other.threshold && + additionalProperties == other.additionalProperties } - override fun hashCode() = value.hashCode() + private val hashCode: Int by lazy { + Objects.hash( + type, + createResponse, + idleTimeoutMs, + interruptResponse, + prefixPaddingMs, + silenceDurationMs, + threshold, + additionalProperties, + ) + } - override fun toString() = value.toString() + override fun hashCode(): Int = hashCode + + override fun toString() = + "ServerVad{type=$type, createResponse=$createResponse, idleTimeoutMs=$idleTimeoutMs, interruptResponse=$interruptResponse, prefixPaddingMs=$prefixPaddingMs, silenceDurationMs=$silenceDurationMs, threshold=$threshold, additionalProperties=$additionalProperties}" } - /** Type of turn detection. */ - class Type @JsonCreator private constructor(private val value: JsonField) : Enum { + /** + * Server-side semantic turn detection which uses a model to determine when the user has + * finished speaking. + */ + class SemanticVad + private constructor( + private val type: JsonValue, + private val createResponse: JsonField, + private val eagerness: JsonField, + private val interruptResponse: JsonField, + private val additionalProperties: MutableMap, + ) { + + @JsonCreator + private constructor( + @JsonProperty("type") @ExcludeMissing type: JsonValue = JsonMissing.of(), + @JsonProperty("create_response") + @ExcludeMissing + createResponse: JsonField = JsonMissing.of(), + @JsonProperty("eagerness") + @ExcludeMissing + eagerness: JsonField = JsonMissing.of(), + @JsonProperty("interrupt_response") + @ExcludeMissing + interruptResponse: JsonField = JsonMissing.of(), + ) : this(type, createResponse, eagerness, interruptResponse, mutableMapOf()) /** - * Returns this class instance's raw value. + * Type of turn detection, `semantic_vad` to turn on Semantic VAD. + * + * Expected to always return the following: + * ```java + * JsonValue.from("semantic_vad") + * ``` * - * This is usually only useful if this instance was deserialized from data that doesn't - * match any known member, and you want to know that value. For example, if the SDK is on an - * older version than the API, then the API may respond with new members that the SDK is - * unaware of. + * However, this method can be useful for debugging and logging (e.g. if the server + * responded with an unexpected value). */ - @com.fasterxml.jackson.annotation.JsonValue fun _value(): JsonField = value - - companion object { - - @JvmField val SERVER_VAD = of("server_vad") - - @JvmField val SEMANTIC_VAD = of("semantic_vad") - - @JvmStatic fun of(value: String) = Type(JsonField.of(value)) - } - - /** An enum containing [Type]'s known values. */ - enum class Known { - SERVER_VAD, - SEMANTIC_VAD, - } + @JsonProperty("type") @ExcludeMissing fun _type(): JsonValue = type /** - * An enum containing [Type]'s known values, as well as an [_UNKNOWN] member. + * Whether or not to automatically generate a response when a VAD stop event occurs. * - * An instance of [Type] can contain an unknown value in a couple of cases: - * - It was deserialized from data that doesn't match any known member. For example, if the - * SDK is on an older version than the API, then the API may respond with new members that - * the SDK is unaware of. - * - It was constructed with an arbitrary value using the [of] method. + * @throws OpenAIInvalidDataException if the JSON field has an unexpected type (e.g. if the + * server responded with an unexpected value). */ - enum class Value { - SERVER_VAD, - SEMANTIC_VAD, - /** An enum member indicating that [Type] was instantiated with an unknown value. */ - _UNKNOWN, - } + fun createResponse(): Optional = createResponse.getOptional("create_response") /** - * Returns an enum member corresponding to this class instance's value, or [Value._UNKNOWN] - * if the class was instantiated with an unknown value. + * Used only for `semantic_vad` mode. The eagerness of the model to respond. `low` will wait + * longer for the user to continue speaking, `high` will respond more quickly. `auto` is the + * default and is equivalent to `medium`. `low`, `medium`, and `high` have max timeouts of + * 8s, 4s, and 2s respectively. * - * Use the [known] method instead if you're certain the value is always known or if you want - * to throw for the unknown case. + * @throws OpenAIInvalidDataException if the JSON field has an unexpected type (e.g. if the + * server responded with an unexpected value). */ - fun value(): Value = - when (this) { - SERVER_VAD -> Value.SERVER_VAD - SEMANTIC_VAD -> Value.SEMANTIC_VAD - else -> Value._UNKNOWN - } + fun eagerness(): Optional = eagerness.getOptional("eagerness") /** - * Returns an enum member corresponding to this class instance's value. + * Whether or not to automatically interrupt any ongoing response with output to the default + * conversation (i.e. `conversation` of `auto`) when a VAD start event occurs. * - * Use the [value] method instead if you're uncertain the value is always known and don't - * want to throw for the unknown case. + * @throws OpenAIInvalidDataException if the JSON field has an unexpected type (e.g. if the + * server responded with an unexpected value). + */ + fun interruptResponse(): Optional = + interruptResponse.getOptional("interrupt_response") + + /** + * Returns the raw JSON value of [createResponse]. * - * @throws OpenAIInvalidDataException if this class instance's value is a not a known - * member. + * Unlike [createResponse], this method doesn't throw if the JSON field has an unexpected + * type. */ - fun known(): Known = - when (this) { - SERVER_VAD -> Known.SERVER_VAD - SEMANTIC_VAD -> Known.SEMANTIC_VAD - else -> throw OpenAIInvalidDataException("Unknown Type: $value") - } + @JsonProperty("create_response") + @ExcludeMissing + fun _createResponse(): JsonField = createResponse /** - * Returns this class instance's primitive wire representation. + * Returns the raw JSON value of [eagerness]. * - * This differs from the [toString] method because that method is primarily for debugging - * and generally doesn't throw. + * Unlike [eagerness], this method doesn't throw if the JSON field has an unexpected type. + */ + @JsonProperty("eagerness") + @ExcludeMissing + fun _eagerness(): JsonField = eagerness + + /** + * Returns the raw JSON value of [interruptResponse]. * - * @throws OpenAIInvalidDataException if this class instance's value does not have the - * expected primitive type. + * Unlike [interruptResponse], this method doesn't throw if the JSON field has an unexpected + * type. */ - fun asString(): String = - _value().asString().orElseThrow { OpenAIInvalidDataException("Value is not a String") } + @JsonProperty("interrupt_response") + @ExcludeMissing + fun _interruptResponse(): JsonField = interruptResponse + + @JsonAnySetter + private fun putAdditionalProperty(key: String, value: JsonValue) { + additionalProperties.put(key, value) + } + + @JsonAnyGetter + @ExcludeMissing + fun _additionalProperties(): Map = + Collections.unmodifiableMap(additionalProperties) + + fun toBuilder() = Builder().from(this) + + companion object { + + /** Returns a mutable builder for constructing an instance of [SemanticVad]. */ + @JvmStatic fun builder() = Builder() + } + + /** A builder for [SemanticVad]. */ + class Builder internal constructor() { + + private var type: JsonValue = JsonValue.from("semantic_vad") + private var createResponse: JsonField = JsonMissing.of() + private var eagerness: JsonField = JsonMissing.of() + private var interruptResponse: JsonField = JsonMissing.of() + private var additionalProperties: MutableMap = mutableMapOf() + + @JvmSynthetic + internal fun from(semanticVad: SemanticVad) = apply { + type = semanticVad.type + createResponse = semanticVad.createResponse + eagerness = semanticVad.eagerness + interruptResponse = semanticVad.interruptResponse + additionalProperties = semanticVad.additionalProperties.toMutableMap() + } + + /** + * Sets the field to an arbitrary JSON value. + * + * It is usually unnecessary to call this method because the field defaults to the + * following: + * ```java + * JsonValue.from("semantic_vad") + * ``` + * + * This method is primarily for setting the field to an undocumented or not yet + * supported value. + */ + fun type(type: JsonValue) = apply { this.type = type } + + /** Whether or not to automatically generate a response when a VAD stop event occurs. */ + fun createResponse(createResponse: Boolean) = + createResponse(JsonField.of(createResponse)) + + /** + * Sets [Builder.createResponse] to an arbitrary JSON value. + * + * You should usually call [Builder.createResponse] with a well-typed [Boolean] value + * instead. This method is primarily for setting the field to an undocumented or not yet + * supported value. + */ + fun createResponse(createResponse: JsonField) = apply { + this.createResponse = createResponse + } + + /** + * Used only for `semantic_vad` mode. The eagerness of the model to respond. `low` will + * wait longer for the user to continue speaking, `high` will respond more quickly. + * `auto` is the default and is equivalent to `medium`. `low`, `medium`, and `high` have + * max timeouts of 8s, 4s, and 2s respectively. + */ + fun eagerness(eagerness: Eagerness) = eagerness(JsonField.of(eagerness)) + + /** + * Sets [Builder.eagerness] to an arbitrary JSON value. + * + * You should usually call [Builder.eagerness] with a well-typed [Eagerness] value + * instead. This method is primarily for setting the field to an undocumented or not yet + * supported value. + */ + fun eagerness(eagerness: JsonField) = apply { this.eagerness = eagerness } + + /** + * Whether or not to automatically interrupt any ongoing response with output to the + * default conversation (i.e. `conversation` of `auto`) when a VAD start event occurs. + */ + fun interruptResponse(interruptResponse: Boolean) = + interruptResponse(JsonField.of(interruptResponse)) + + /** + * Sets [Builder.interruptResponse] to an arbitrary JSON value. + * + * You should usually call [Builder.interruptResponse] with a well-typed [Boolean] value + * instead. This method is primarily for setting the field to an undocumented or not yet + * supported value. + */ + fun interruptResponse(interruptResponse: JsonField) = apply { + this.interruptResponse = interruptResponse + } + + fun additionalProperties(additionalProperties: Map) = apply { + this.additionalProperties.clear() + putAllAdditionalProperties(additionalProperties) + } + + fun putAdditionalProperty(key: String, value: JsonValue) = apply { + additionalProperties.put(key, value) + } + + fun putAllAdditionalProperties(additionalProperties: Map) = apply { + this.additionalProperties.putAll(additionalProperties) + } + + fun removeAdditionalProperty(key: String) = apply { additionalProperties.remove(key) } + + fun removeAllAdditionalProperties(keys: Set) = apply { + keys.forEach(::removeAdditionalProperty) + } + + /** + * Returns an immutable instance of [SemanticVad]. + * + * Further updates to this [Builder] will not mutate the returned instance. + */ + fun build(): SemanticVad = + SemanticVad( + type, + createResponse, + eagerness, + interruptResponse, + additionalProperties.toMutableMap(), + ) + } private var validated: Boolean = false - fun validate(): Type = apply { + fun validate(): SemanticVad = apply { if (validated) { return@apply } - known() + _type().let { + if (it != JsonValue.from("semantic_vad")) { + throw OpenAIInvalidDataException("'type' is invalid, received $it") + } + } + createResponse() + eagerness().ifPresent { it.validate() } + interruptResponse() validated = true } @@ -740,54 +990,182 @@ private constructor( * * Used for best match union deserialization. */ - @JvmSynthetic internal fun validity(): Int = if (value() == Value._UNKNOWN) 0 else 1 + @JvmSynthetic + internal fun validity(): Int = + type.let { if (it == JsonValue.from("semantic_vad")) 1 else 0 } + + (if (createResponse.asKnown().isPresent) 1 else 0) + + (eagerness.asKnown().getOrNull()?.validity() ?: 0) + + (if (interruptResponse.asKnown().isPresent) 1 else 0) + + /** + * Used only for `semantic_vad` mode. The eagerness of the model to respond. `low` will wait + * longer for the user to continue speaking, `high` will respond more quickly. `auto` is the + * default and is equivalent to `medium`. `low`, `medium`, and `high` have max timeouts of + * 8s, 4s, and 2s respectively. + */ + class Eagerness @JsonCreator private constructor(private val value: JsonField) : + Enum { + + /** + * Returns this class instance's raw value. + * + * This is usually only useful if this instance was deserialized from data that doesn't + * match any known member, and you want to know that value. For example, if the SDK is + * on an older version than the API, then the API may respond with new members that the + * SDK is unaware of. + */ + @com.fasterxml.jackson.annotation.JsonValue fun _value(): JsonField = value + + companion object { + + @JvmField val LOW = of("low") + + @JvmField val MEDIUM = of("medium") + + @JvmField val HIGH = of("high") + + @JvmField val AUTO = of("auto") + + @JvmStatic fun of(value: String) = Eagerness(JsonField.of(value)) + } + + /** An enum containing [Eagerness]'s known values. */ + enum class Known { + LOW, + MEDIUM, + HIGH, + AUTO, + } + + /** + * An enum containing [Eagerness]'s known values, as well as an [_UNKNOWN] member. + * + * An instance of [Eagerness] can contain an unknown value in a couple of cases: + * - It was deserialized from data that doesn't match any known member. For example, if + * the SDK is on an older version than the API, then the API may respond with new + * members that the SDK is unaware of. + * - It was constructed with an arbitrary value using the [of] method. + */ + enum class Value { + LOW, + MEDIUM, + HIGH, + AUTO, + /** + * An enum member indicating that [Eagerness] was instantiated with an unknown + * value. + */ + _UNKNOWN, + } + + /** + * Returns an enum member corresponding to this class instance's value, or + * [Value._UNKNOWN] if the class was instantiated with an unknown value. + * + * Use the [known] method instead if you're certain the value is always known or if you + * want to throw for the unknown case. + */ + fun value(): Value = + when (this) { + LOW -> Value.LOW + MEDIUM -> Value.MEDIUM + HIGH -> Value.HIGH + AUTO -> Value.AUTO + else -> Value._UNKNOWN + } + + /** + * Returns an enum member corresponding to this class instance's value. + * + * Use the [value] method instead if you're uncertain the value is always known and + * don't want to throw for the unknown case. + * + * @throws OpenAIInvalidDataException if this class instance's value is a not a known + * member. + */ + fun known(): Known = + when (this) { + LOW -> Known.LOW + MEDIUM -> Known.MEDIUM + HIGH -> Known.HIGH + AUTO -> Known.AUTO + else -> throw OpenAIInvalidDataException("Unknown Eagerness: $value") + } + + /** + * Returns this class instance's primitive wire representation. + * + * This differs from the [toString] method because that method is primarily for + * debugging and generally doesn't throw. + * + * @throws OpenAIInvalidDataException if this class instance's value does not have the + * expected primitive type. + */ + fun asString(): String = + _value().asString().orElseThrow { + OpenAIInvalidDataException("Value is not a String") + } + + private var validated: Boolean = false + + fun validate(): Eagerness = apply { + if (validated) { + return@apply + } + + known() + validated = true + } + + fun isValid(): Boolean = + try { + validate() + true + } catch (e: OpenAIInvalidDataException) { + false + } + + /** + * Returns a score indicating how many valid values are contained in this object + * recursively. + * + * Used for best match union deserialization. + */ + @JvmSynthetic internal fun validity(): Int = if (value() == Value._UNKNOWN) 0 else 1 + + override fun equals(other: Any?): Boolean { + if (this === other) { + return true + } + + return other is Eagerness && value == other.value + } + + override fun hashCode() = value.hashCode() + + override fun toString() = value.toString() + } override fun equals(other: Any?): Boolean { if (this === other) { return true } - return other is Type && value == other.value + return other is SemanticVad && + type == other.type && + createResponse == other.createResponse && + eagerness == other.eagerness && + interruptResponse == other.interruptResponse && + additionalProperties == other.additionalProperties } - override fun hashCode() = value.hashCode() - - override fun toString() = value.toString() - } - - override fun equals(other: Any?): Boolean { - if (this === other) { - return true + private val hashCode: Int by lazy { + Objects.hash(type, createResponse, eagerness, interruptResponse, additionalProperties) } - return other is RealtimeAudioInputTurnDetection && - createResponse == other.createResponse && - eagerness == other.eagerness && - idleTimeoutMs == other.idleTimeoutMs && - interruptResponse == other.interruptResponse && - prefixPaddingMs == other.prefixPaddingMs && - silenceDurationMs == other.silenceDurationMs && - threshold == other.threshold && - type == other.type && - additionalProperties == other.additionalProperties - } + override fun hashCode(): Int = hashCode - private val hashCode: Int by lazy { - Objects.hash( - createResponse, - eagerness, - idleTimeoutMs, - interruptResponse, - prefixPaddingMs, - silenceDurationMs, - threshold, - type, - additionalProperties, - ) + override fun toString() = + "SemanticVad{type=$type, createResponse=$createResponse, eagerness=$eagerness, interruptResponse=$interruptResponse, additionalProperties=$additionalProperties}" } - - override fun hashCode(): Int = hashCode - - override fun toString() = - "RealtimeAudioInputTurnDetection{createResponse=$createResponse, eagerness=$eagerness, idleTimeoutMs=$idleTimeoutMs, interruptResponse=$interruptResponse, prefixPaddingMs=$prefixPaddingMs, silenceDurationMs=$silenceDurationMs, threshold=$threshold, type=$type, additionalProperties=$additionalProperties}" } diff --git a/openai-java-core/src/main/kotlin/com/openai/models/realtime/RealtimeServerEvent.kt b/openai-java-core/src/main/kotlin/com/openai/models/realtime/RealtimeServerEvent.kt index 0cad1e4d..a8a56aa9 100644 --- a/openai-java-core/src/main/kotlin/com/openai/models/realtime/RealtimeServerEvent.kt +++ b/openai-java-core/src/main/kotlin/com/openai/models/realtime/RealtimeServerEvent.kt @@ -371,7 +371,21 @@ private constructor( fun conversationItemDone(): Optional = Optional.ofNullable(conversationItemDone) - /** Returned when the server VAD timeout is triggered for the input audio buffer. */ + /** + * Returned when the Server VAD timeout is triggered for the input audio buffer. This is + * configured with `idle_timeout_ms` in the `turn_detection` settings of the session, and it + * indicates that there hasn't been any speech detected for the configured duration. + * + * The `audio_start_ms` and `audio_end_ms` fields indicate the segment of audio after the last + * model response up to the triggering time, as an offset from the beginning of audio written to + * the input audio buffer. This means it demarcates the segment of audio that was silent and the + * difference between the start and end values will roughly match the configured timeout. + * + * The empty audio will be committed to the conversation as an `input_audio` item (there will be + * a `input_audio_buffer.committed` event) and a model response will be generated. There may be + * speech that didn't trigger VAD but is still detected by the model, so the model may respond + * with something relevant to the conversation or a prompt to continue speaking. + */ fun inputAudioBufferTimeoutTriggered(): Optional = Optional.ofNullable(inputAudioBufferTimeoutTriggered) @@ -794,7 +808,21 @@ private constructor( fun asConversationItemDone(): ConversationItemDone = conversationItemDone.getOrThrow("conversationItemDone") - /** Returned when the server VAD timeout is triggered for the input audio buffer. */ + /** + * Returned when the Server VAD timeout is triggered for the input audio buffer. This is + * configured with `idle_timeout_ms` in the `turn_detection` settings of the session, and it + * indicates that there hasn't been any speech detected for the configured duration. + * + * The `audio_start_ms` and `audio_end_ms` fields indicate the segment of audio after the last + * model response up to the triggering time, as an offset from the beginning of audio written to + * the input audio buffer. This means it demarcates the segment of audio that was silent and the + * difference between the start and end values will roughly match the configured timeout. + * + * The empty audio will be committed to the conversation as an `input_audio` item (there will be + * a `input_audio_buffer.committed` event) and a model response will be generated. There may be + * speech that didn't trigger VAD but is still detected by the model, so the model may respond + * with something relevant to the conversation or a prompt to continue speaking. + */ fun asInputAudioBufferTimeoutTriggered(): InputAudioBufferTimeoutTriggered = inputAudioBufferTimeoutTriggered.getOrThrow("inputAudioBufferTimeoutTriggered") @@ -1955,7 +1983,23 @@ private constructor( fun ofConversationItemDone(conversationItemDone: ConversationItemDone) = RealtimeServerEvent(conversationItemDone = conversationItemDone) - /** Returned when the server VAD timeout is triggered for the input audio buffer. */ + /** + * Returned when the Server VAD timeout is triggered for the input audio buffer. This is + * configured with `idle_timeout_ms` in the `turn_detection` settings of the session, and it + * indicates that there hasn't been any speech detected for the configured duration. + * + * The `audio_start_ms` and `audio_end_ms` fields indicate the segment of audio after the + * last model response up to the triggering time, as an offset from the beginning of audio + * written to the input audio buffer. This means it demarcates the segment of audio that was + * silent and the difference between the start and end values will roughly match the + * configured timeout. + * + * The empty audio will be committed to the conversation as an `input_audio` item (there + * will be a `input_audio_buffer.committed` event) and a model response will be generated. + * There may be speech that didn't trigger VAD but is still detected by the model, so the + * model may respond with something relevant to the conversation or a prompt to continue + * speaking. + */ @JvmStatic fun ofInputAudioBufferTimeoutTriggered( inputAudioBufferTimeoutTriggered: InputAudioBufferTimeoutTriggered @@ -2301,7 +2345,23 @@ private constructor( */ fun visitConversationItemDone(conversationItemDone: ConversationItemDone): T - /** Returned when the server VAD timeout is triggered for the input audio buffer. */ + /** + * Returned when the Server VAD timeout is triggered for the input audio buffer. This is + * configured with `idle_timeout_ms` in the `turn_detection` settings of the session, and it + * indicates that there hasn't been any speech detected for the configured duration. + * + * The `audio_start_ms` and `audio_end_ms` fields indicate the segment of audio after the + * last model response up to the triggering time, as an offset from the beginning of audio + * written to the input audio buffer. This means it demarcates the segment of audio that was + * silent and the difference between the start and end values will roughly match the + * configured timeout. + * + * The empty audio will be committed to the conversation as an `input_audio` item (there + * will be a `input_audio_buffer.committed` event) and a model response will be generated. + * There may be speech that didn't trigger VAD but is still detected by the model, so the + * model may respond with something relevant to the conversation or a prompt to continue + * speaking. + */ fun visitInputAudioBufferTimeoutTriggered( inputAudioBufferTimeoutTriggered: InputAudioBufferTimeoutTriggered ): T diff --git a/openai-java-core/src/main/kotlin/com/openai/models/realtime/RealtimeSession.kt b/openai-java-core/src/main/kotlin/com/openai/models/realtime/RealtimeSession.kt index cf15084c..f0675950 100644 --- a/openai-java-core/src/main/kotlin/com/openai/models/realtime/RealtimeSession.kt +++ b/openai-java-core/src/main/kotlin/com/openai/models/realtime/RealtimeSession.kt @@ -31,7 +31,7 @@ import java.util.Objects import java.util.Optional import kotlin.jvm.optionals.getOrNull -/** Realtime session object. */ +/** Realtime session object for the beta interface. */ class RealtimeSession private constructor( private val id: JsonField, @@ -311,14 +311,16 @@ private constructor( /** * Configuration for turn detection, ether Server VAD or Semantic VAD. This can be set to `null` - * to turn off, in which case the client must manually trigger model response. Server VAD means - * that the model will detect the start and end of speech based on audio volume and respond at - * the end of user speech. Semantic VAD is more advanced and uses a turn detection model (in - * conjunction with VAD) to semantically estimate whether the user has finished speaking, then - * dynamically sets a timeout based on this probability. For example, if user audio trails off - * with "uhhm", the model will score a low probability of turn end and wait longer for the user - * to continue speaking. This can be useful for more natural conversations, but may have a - * higher latency. + * to turn off, in which case the client must manually trigger model response. + * + * Server VAD means that the model will detect the start and end of speech based on audio volume + * and respond at the end of user speech. + * + * Semantic VAD is more advanced and uses a turn detection model (in conjunction with VAD) to + * semantically estimate whether the user has finished speaking, then dynamically sets a timeout + * based on this probability. For example, if user audio trails off with "uhhm", the model will + * score a low probability of turn end and wait longer for the user to continue speaking. This + * can be useful for more natural conversations, but may have a higher latency. * * @throws OpenAIInvalidDataException if the JSON field has an unexpected type (e.g. if the * server responded with an unexpected value). @@ -930,14 +932,17 @@ private constructor( /** * Configuration for turn detection, ether Server VAD or Semantic VAD. This can be set to - * `null` to turn off, in which case the client must manually trigger model response. Server - * VAD means that the model will detect the start and end of speech based on audio volume - * and respond at the end of user speech. Semantic VAD is more advanced and uses a turn - * detection model (in conjunction with VAD) to semantically estimate whether the user has - * finished speaking, then dynamically sets a timeout based on this probability. For - * example, if user audio trails off with "uhhm", the model will score a low probability of - * turn end and wait longer for the user to continue speaking. This can be useful for more - * natural conversations, but may have a higher latency. + * `null` to turn off, in which case the client must manually trigger model response. + * + * Server VAD means that the model will detect the start and end of speech based on audio + * volume and respond at the end of user speech. + * + * Semantic VAD is more advanced and uses a turn detection model (in conjunction with VAD) + * to semantically estimate whether the user has finished speaking, then dynamically sets a + * timeout based on this probability. For example, if user audio trails off with "uhhm", the + * model will score a low probability of turn end and wait longer for the user to continue + * speaking. This can be useful for more natural conversations, but may have a higher + * latency. */ fun turnDetection(turnDetection: TurnDetection?) = turnDetection(JsonField.ofNullable(turnDetection)) @@ -957,6 +962,14 @@ private constructor( this.turnDetection = turnDetection } + /** Alias for calling [turnDetection] with `TurnDetection.ofServerVad(serverVad)`. */ + fun turnDetection(serverVad: TurnDetection.ServerVad) = + turnDetection(TurnDetection.ofServerVad(serverVad)) + + /** Alias for calling [turnDetection] with `TurnDetection.ofSemanticVad(semanticVad)`. */ + fun turnDetection(semanticVad: TurnDetection.SemanticVad) = + turnDetection(TurnDetection.ofSemanticVad(semanticVad)) + /** * The voice the model uses to respond. Voice cannot be changed during the session once the * model has responded with audio at least once. Current voice options are `alloy`, `ash`, @@ -2675,596 +2688,647 @@ private constructor( /** * Configuration for turn detection, ether Server VAD or Semantic VAD. This can be set to `null` - * to turn off, in which case the client must manually trigger model response. Server VAD means - * that the model will detect the start and end of speech based on audio volume and respond at - * the end of user speech. Semantic VAD is more advanced and uses a turn detection model (in - * conjunction with VAD) to semantically estimate whether the user has finished speaking, then - * dynamically sets a timeout based on this probability. For example, if user audio trails off - * with "uhhm", the model will score a low probability of turn end and wait longer for the user - * to continue speaking. This can be useful for more natural conversations, but may have a - * higher latency. + * to turn off, in which case the client must manually trigger model response. + * + * Server VAD means that the model will detect the start and end of speech based on audio volume + * and respond at the end of user speech. + * + * Semantic VAD is more advanced and uses a turn detection model (in conjunction with VAD) to + * semantically estimate whether the user has finished speaking, then dynamically sets a timeout + * based on this probability. For example, if user audio trails off with "uhhm", the model will + * score a low probability of turn end and wait longer for the user to continue speaking. This + * can be useful for more natural conversations, but may have a higher latency. */ + @JsonDeserialize(using = TurnDetection.Deserializer::class) + @JsonSerialize(using = TurnDetection.Serializer::class) class TurnDetection private constructor( - private val createResponse: JsonField, - private val eagerness: JsonField, - private val idleTimeoutMs: JsonField, - private val interruptResponse: JsonField, - private val prefixPaddingMs: JsonField, - private val silenceDurationMs: JsonField, - private val threshold: JsonField, - private val type: JsonField, - private val additionalProperties: MutableMap, + private val serverVad: ServerVad? = null, + private val semanticVad: SemanticVad? = null, + private val _json: JsonValue? = null, ) { - @JsonCreator - private constructor( - @JsonProperty("create_response") - @ExcludeMissing - createResponse: JsonField = JsonMissing.of(), - @JsonProperty("eagerness") - @ExcludeMissing - eagerness: JsonField = JsonMissing.of(), - @JsonProperty("idle_timeout_ms") - @ExcludeMissing - idleTimeoutMs: JsonField = JsonMissing.of(), - @JsonProperty("interrupt_response") - @ExcludeMissing - interruptResponse: JsonField = JsonMissing.of(), - @JsonProperty("prefix_padding_ms") - @ExcludeMissing - prefixPaddingMs: JsonField = JsonMissing.of(), - @JsonProperty("silence_duration_ms") - @ExcludeMissing - silenceDurationMs: JsonField = JsonMissing.of(), - @JsonProperty("threshold") - @ExcludeMissing - threshold: JsonField = JsonMissing.of(), - @JsonProperty("type") @ExcludeMissing type: JsonField = JsonMissing.of(), - ) : this( - createResponse, - eagerness, - idleTimeoutMs, - interruptResponse, - prefixPaddingMs, - silenceDurationMs, - threshold, - type, - mutableMapOf(), - ) - /** - * Whether or not to automatically generate a response when a VAD stop event occurs. - * - * @throws OpenAIInvalidDataException if the JSON field has an unexpected type (e.g. if the - * server responded with an unexpected value). + * Server-side voice activity detection (VAD) which flips on when user speech is detected + * and off after a period of silence. */ - fun createResponse(): Optional = createResponse.getOptional("create_response") + fun serverVad(): Optional = Optional.ofNullable(serverVad) /** - * Used only for `semantic_vad` mode. The eagerness of the model to respond. `low` will wait - * longer for the user to continue speaking, `high` will respond more quickly. `auto` is the - * default and is equivalent to `medium`. - * - * @throws OpenAIInvalidDataException if the JSON field has an unexpected type (e.g. if the - * server responded with an unexpected value). + * Server-side semantic turn detection which uses a model to determine when the user has + * finished speaking. */ - fun eagerness(): Optional = eagerness.getOptional("eagerness") + fun semanticVad(): Optional = Optional.ofNullable(semanticVad) - /** - * Optional idle timeout after which turn detection will auto-timeout when no additional - * audio is received. - * - * @throws OpenAIInvalidDataException if the JSON field has an unexpected type (e.g. if the - * server responded with an unexpected value). - */ - fun idleTimeoutMs(): Optional = idleTimeoutMs.getOptional("idle_timeout_ms") + fun isServerVad(): Boolean = serverVad != null - /** - * Whether or not to automatically interrupt any ongoing response with output to the default - * conversation (i.e. `conversation` of `auto`) when a VAD start event occurs. - * - * @throws OpenAIInvalidDataException if the JSON field has an unexpected type (e.g. if the - * server responded with an unexpected value). - */ - fun interruptResponse(): Optional = - interruptResponse.getOptional("interrupt_response") + fun isSemanticVad(): Boolean = semanticVad != null /** - * Used only for `server_vad` mode. Amount of audio to include before the VAD detected - * speech (in milliseconds). Defaults to 300ms. - * - * @throws OpenAIInvalidDataException if the JSON field has an unexpected type (e.g. if the - * server responded with an unexpected value). + * Server-side voice activity detection (VAD) which flips on when user speech is detected + * and off after a period of silence. */ - fun prefixPaddingMs(): Optional = prefixPaddingMs.getOptional("prefix_padding_ms") + fun asServerVad(): ServerVad = serverVad.getOrThrow("serverVad") /** - * Used only for `server_vad` mode. Duration of silence to detect speech stop (in - * milliseconds). Defaults to 500ms. With shorter values the model will respond more - * quickly, but may jump in on short pauses from the user. - * - * @throws OpenAIInvalidDataException if the JSON field has an unexpected type (e.g. if the - * server responded with an unexpected value). + * Server-side semantic turn detection which uses a model to determine when the user has + * finished speaking. */ - fun silenceDurationMs(): Optional = - silenceDurationMs.getOptional("silence_duration_ms") + fun asSemanticVad(): SemanticVad = semanticVad.getOrThrow("semanticVad") - /** - * Used only for `server_vad` mode. Activation threshold for VAD (0.0 to 1.0), this defaults - * to 0.5. A higher threshold will require louder audio to activate the model, and thus - * might perform better in noisy environments. - * - * @throws OpenAIInvalidDataException if the JSON field has an unexpected type (e.g. if the - * server responded with an unexpected value). - */ - fun threshold(): Optional = threshold.getOptional("threshold") + fun _json(): Optional = Optional.ofNullable(_json) - /** - * Type of turn detection. - * - * @throws OpenAIInvalidDataException if the JSON field has an unexpected type (e.g. if the - * server responded with an unexpected value). - */ - fun type(): Optional = type.getOptional("type") + fun accept(visitor: Visitor): T = + when { + serverVad != null -> visitor.visitServerVad(serverVad) + semanticVad != null -> visitor.visitSemanticVad(semanticVad) + else -> visitor.unknown(_json) + } - /** - * Returns the raw JSON value of [createResponse]. - * - * Unlike [createResponse], this method doesn't throw if the JSON field has an unexpected - * type. - */ - @JsonProperty("create_response") - @ExcludeMissing - fun _createResponse(): JsonField = createResponse + private var validated: Boolean = false - /** - * Returns the raw JSON value of [eagerness]. - * - * Unlike [eagerness], this method doesn't throw if the JSON field has an unexpected type. - */ - @JsonProperty("eagerness") - @ExcludeMissing - fun _eagerness(): JsonField = eagerness + fun validate(): TurnDetection = apply { + if (validated) { + return@apply + } - /** - * Returns the raw JSON value of [idleTimeoutMs]. - * - * Unlike [idleTimeoutMs], this method doesn't throw if the JSON field has an unexpected - * type. - */ - @JsonProperty("idle_timeout_ms") - @ExcludeMissing - fun _idleTimeoutMs(): JsonField = idleTimeoutMs + accept( + object : Visitor { + override fun visitServerVad(serverVad: ServerVad) { + serverVad.validate() + } - /** - * Returns the raw JSON value of [interruptResponse]. - * - * Unlike [interruptResponse], this method doesn't throw if the JSON field has an unexpected - * type. - */ - @JsonProperty("interrupt_response") - @ExcludeMissing - fun _interruptResponse(): JsonField = interruptResponse + override fun visitSemanticVad(semanticVad: SemanticVad) { + semanticVad.validate() + } + } + ) + validated = true + } - /** - * Returns the raw JSON value of [prefixPaddingMs]. - * - * Unlike [prefixPaddingMs], this method doesn't throw if the JSON field has an unexpected - * type. - */ - @JsonProperty("prefix_padding_ms") - @ExcludeMissing - fun _prefixPaddingMs(): JsonField = prefixPaddingMs + fun isValid(): Boolean = + try { + validate() + true + } catch (e: OpenAIInvalidDataException) { + false + } /** - * Returns the raw JSON value of [silenceDurationMs]. + * Returns a score indicating how many valid values are contained in this object + * recursively. * - * Unlike [silenceDurationMs], this method doesn't throw if the JSON field has an unexpected - * type. + * Used for best match union deserialization. */ - @JsonProperty("silence_duration_ms") - @ExcludeMissing - fun _silenceDurationMs(): JsonField = silenceDurationMs + @JvmSynthetic + internal fun validity(): Int = + accept( + object : Visitor { + override fun visitServerVad(serverVad: ServerVad) = serverVad.validity() - /** - * Returns the raw JSON value of [threshold]. - * - * Unlike [threshold], this method doesn't throw if the JSON field has an unexpected type. - */ - @JsonProperty("threshold") @ExcludeMissing fun _threshold(): JsonField = threshold + override fun visitSemanticVad(semanticVad: SemanticVad) = semanticVad.validity() - /** - * Returns the raw JSON value of [type]. - * - * Unlike [type], this method doesn't throw if the JSON field has an unexpected type. - */ - @JsonProperty("type") @ExcludeMissing fun _type(): JsonField = type + override fun unknown(json: JsonValue?) = 0 + } + ) - @JsonAnySetter - private fun putAdditionalProperty(key: String, value: JsonValue) { - additionalProperties.put(key, value) + override fun equals(other: Any?): Boolean { + if (this === other) { + return true + } + + return other is TurnDetection && + serverVad == other.serverVad && + semanticVad == other.semanticVad } - @JsonAnyGetter - @ExcludeMissing - fun _additionalProperties(): Map = - Collections.unmodifiableMap(additionalProperties) + override fun hashCode(): Int = Objects.hash(serverVad, semanticVad) - fun toBuilder() = Builder().from(this) + override fun toString(): String = + when { + serverVad != null -> "TurnDetection{serverVad=$serverVad}" + semanticVad != null -> "TurnDetection{semanticVad=$semanticVad}" + _json != null -> "TurnDetection{_unknown=$_json}" + else -> throw IllegalStateException("Invalid TurnDetection") + } companion object { - /** Returns a mutable builder for constructing an instance of [TurnDetection]. */ - @JvmStatic fun builder() = Builder() - } - - /** A builder for [TurnDetection]. */ - class Builder internal constructor() { + /** + * Server-side voice activity detection (VAD) which flips on when user speech is + * detected and off after a period of silence. + */ + @JvmStatic fun ofServerVad(serverVad: ServerVad) = TurnDetection(serverVad = serverVad) - private var createResponse: JsonField = JsonMissing.of() - private var eagerness: JsonField = JsonMissing.of() - private var idleTimeoutMs: JsonField = JsonMissing.of() - private var interruptResponse: JsonField = JsonMissing.of() - private var prefixPaddingMs: JsonField = JsonMissing.of() - private var silenceDurationMs: JsonField = JsonMissing.of() - private var threshold: JsonField = JsonMissing.of() - private var type: JsonField = JsonMissing.of() - private var additionalProperties: MutableMap = mutableMapOf() + /** + * Server-side semantic turn detection which uses a model to determine when the user has + * finished speaking. + */ + @JvmStatic + fun ofSemanticVad(semanticVad: SemanticVad) = TurnDetection(semanticVad = semanticVad) + } - @JvmSynthetic - internal fun from(turnDetection: TurnDetection) = apply { - createResponse = turnDetection.createResponse - eagerness = turnDetection.eagerness - idleTimeoutMs = turnDetection.idleTimeoutMs - interruptResponse = turnDetection.interruptResponse - prefixPaddingMs = turnDetection.prefixPaddingMs - silenceDurationMs = turnDetection.silenceDurationMs - threshold = turnDetection.threshold - type = turnDetection.type - additionalProperties = turnDetection.additionalProperties.toMutableMap() - } - - /** Whether or not to automatically generate a response when a VAD stop event occurs. */ - fun createResponse(createResponse: Boolean) = - createResponse(JsonField.of(createResponse)) + /** + * An interface that defines how to map each variant of [TurnDetection] to a value of type + * [T]. + */ + interface Visitor { /** - * Sets [Builder.createResponse] to an arbitrary JSON value. - * - * You should usually call [Builder.createResponse] with a well-typed [Boolean] value - * instead. This method is primarily for setting the field to an undocumented or not yet - * supported value. + * Server-side voice activity detection (VAD) which flips on when user speech is + * detected and off after a period of silence. */ - fun createResponse(createResponse: JsonField) = apply { - this.createResponse = createResponse - } + fun visitServerVad(serverVad: ServerVad): T /** - * Used only for `semantic_vad` mode. The eagerness of the model to respond. `low` will - * wait longer for the user to continue speaking, `high` will respond more quickly. - * `auto` is the default and is equivalent to `medium`. + * Server-side semantic turn detection which uses a model to determine when the user has + * finished speaking. */ - fun eagerness(eagerness: Eagerness) = eagerness(JsonField.of(eagerness)) + fun visitSemanticVad(semanticVad: SemanticVad): T /** - * Sets [Builder.eagerness] to an arbitrary JSON value. + * Maps an unknown variant of [TurnDetection] to a value of type [T]. * - * You should usually call [Builder.eagerness] with a well-typed [Eagerness] value - * instead. This method is primarily for setting the field to an undocumented or not yet - * supported value. + * An instance of [TurnDetection] can contain an unknown variant if it was deserialized + * from data that doesn't match any known variant. For example, if the SDK is on an + * older version than the API, then the API may respond with new variants that the SDK + * is unaware of. + * + * @throws OpenAIInvalidDataException in the default implementation. */ - fun eagerness(eagerness: JsonField) = apply { this.eagerness = eagerness } + fun unknown(json: JsonValue?): T { + throw OpenAIInvalidDataException("Unknown TurnDetection: $json") + } + } + + internal class Deserializer : BaseDeserializer(TurnDetection::class) { + + override fun ObjectCodec.deserialize(node: JsonNode): TurnDetection { + val json = JsonValue.fromJsonNode(node) + val type = json.asObject().getOrNull()?.get("type")?.asString()?.getOrNull() + + when (type) { + "server_vad" -> { + return tryDeserialize(node, jacksonTypeRef())?.let { + TurnDetection(serverVad = it, _json = json) + } ?: TurnDetection(_json = json) + } + "semantic_vad" -> { + return tryDeserialize(node, jacksonTypeRef())?.let { + TurnDetection(semanticVad = it, _json = json) + } ?: TurnDetection(_json = json) + } + } + + return TurnDetection(_json = json) + } + } + + internal class Serializer : BaseSerializer(TurnDetection::class) { + + override fun serialize( + value: TurnDetection, + generator: JsonGenerator, + provider: SerializerProvider, + ) { + when { + value.serverVad != null -> generator.writeObject(value.serverVad) + value.semanticVad != null -> generator.writeObject(value.semanticVad) + value._json != null -> generator.writeObject(value._json) + else -> throw IllegalStateException("Invalid TurnDetection") + } + } + } + + /** + * Server-side voice activity detection (VAD) which flips on when user speech is detected + * and off after a period of silence. + */ + class ServerVad + private constructor( + private val type: JsonValue, + private val createResponse: JsonField, + private val idleTimeoutMs: JsonField, + private val interruptResponse: JsonField, + private val prefixPaddingMs: JsonField, + private val silenceDurationMs: JsonField, + private val threshold: JsonField, + private val additionalProperties: MutableMap, + ) { + + @JsonCreator + private constructor( + @JsonProperty("type") @ExcludeMissing type: JsonValue = JsonMissing.of(), + @JsonProperty("create_response") + @ExcludeMissing + createResponse: JsonField = JsonMissing.of(), + @JsonProperty("idle_timeout_ms") + @ExcludeMissing + idleTimeoutMs: JsonField = JsonMissing.of(), + @JsonProperty("interrupt_response") + @ExcludeMissing + interruptResponse: JsonField = JsonMissing.of(), + @JsonProperty("prefix_padding_ms") + @ExcludeMissing + prefixPaddingMs: JsonField = JsonMissing.of(), + @JsonProperty("silence_duration_ms") + @ExcludeMissing + silenceDurationMs: JsonField = JsonMissing.of(), + @JsonProperty("threshold") + @ExcludeMissing + threshold: JsonField = JsonMissing.of(), + ) : this( + type, + createResponse, + idleTimeoutMs, + interruptResponse, + prefixPaddingMs, + silenceDurationMs, + threshold, + mutableMapOf(), + ) /** - * Optional idle timeout after which turn detection will auto-timeout when no additional - * audio is received. + * Type of turn detection, `server_vad` to turn on simple Server VAD. + * + * Expected to always return the following: + * ```java + * JsonValue.from("server_vad") + * ``` + * + * However, this method can be useful for debugging and logging (e.g. if the server + * responded with an unexpected value). */ - fun idleTimeoutMs(idleTimeoutMs: Long?) = - idleTimeoutMs(JsonField.ofNullable(idleTimeoutMs)) + @JsonProperty("type") @ExcludeMissing fun _type(): JsonValue = type /** - * Alias for [Builder.idleTimeoutMs]. + * Whether or not to automatically generate a response when a VAD stop event occurs. * - * This unboxed primitive overload exists for backwards compatibility. + * @throws OpenAIInvalidDataException if the JSON field has an unexpected type (e.g. if + * the server responded with an unexpected value). */ - fun idleTimeoutMs(idleTimeoutMs: Long) = idleTimeoutMs(idleTimeoutMs as Long?) - - /** Alias for calling [Builder.idleTimeoutMs] with `idleTimeoutMs.orElse(null)`. */ - fun idleTimeoutMs(idleTimeoutMs: Optional) = - idleTimeoutMs(idleTimeoutMs.getOrNull()) + fun createResponse(): Optional = createResponse.getOptional("create_response") /** - * Sets [Builder.idleTimeoutMs] to an arbitrary JSON value. + * Optional timeout after which a model response will be triggered automatically. This + * is useful for situations in which a long pause from the user is unexpected, such as a + * phone call. The model will effectively prompt the user to continue the conversation + * based on the current context. * - * You should usually call [Builder.idleTimeoutMs] with a well-typed [Long] value - * instead. This method is primarily for setting the field to an undocumented or not yet - * supported value. + * The timeout value will be applied after the last model response's audio has finished + * playing, i.e. it's set to the `response.done` time plus audio playback duration. + * + * An `input_audio_buffer.timeout_triggered` event (plus events associated with the + * Response) will be emitted when the timeout is reached. Idle timeout is currently only + * supported for `server_vad` mode. + * + * @throws OpenAIInvalidDataException if the JSON field has an unexpected type (e.g. if + * the server responded with an unexpected value). */ - fun idleTimeoutMs(idleTimeoutMs: JsonField) = apply { - this.idleTimeoutMs = idleTimeoutMs - } + fun idleTimeoutMs(): Optional = idleTimeoutMs.getOptional("idle_timeout_ms") /** * Whether or not to automatically interrupt any ongoing response with output to the * default conversation (i.e. `conversation` of `auto`) when a VAD start event occurs. - */ - fun interruptResponse(interruptResponse: Boolean) = - interruptResponse(JsonField.of(interruptResponse)) - - /** - * Sets [Builder.interruptResponse] to an arbitrary JSON value. * - * You should usually call [Builder.interruptResponse] with a well-typed [Boolean] value - * instead. This method is primarily for setting the field to an undocumented or not yet - * supported value. + * @throws OpenAIInvalidDataException if the JSON field has an unexpected type (e.g. if + * the server responded with an unexpected value). */ - fun interruptResponse(interruptResponse: JsonField) = apply { - this.interruptResponse = interruptResponse - } + fun interruptResponse(): Optional = + interruptResponse.getOptional("interrupt_response") /** * Used only for `server_vad` mode. Amount of audio to include before the VAD detected * speech (in milliseconds). Defaults to 300ms. - */ - fun prefixPaddingMs(prefixPaddingMs: Long) = - prefixPaddingMs(JsonField.of(prefixPaddingMs)) - - /** - * Sets [Builder.prefixPaddingMs] to an arbitrary JSON value. * - * You should usually call [Builder.prefixPaddingMs] with a well-typed [Long] value - * instead. This method is primarily for setting the field to an undocumented or not yet - * supported value. + * @throws OpenAIInvalidDataException if the JSON field has an unexpected type (e.g. if + * the server responded with an unexpected value). */ - fun prefixPaddingMs(prefixPaddingMs: JsonField) = apply { - this.prefixPaddingMs = prefixPaddingMs - } + fun prefixPaddingMs(): Optional = prefixPaddingMs.getOptional("prefix_padding_ms") /** * Used only for `server_vad` mode. Duration of silence to detect speech stop (in * milliseconds). Defaults to 500ms. With shorter values the model will respond more * quickly, but may jump in on short pauses from the user. - */ - fun silenceDurationMs(silenceDurationMs: Long) = - silenceDurationMs(JsonField.of(silenceDurationMs)) - - /** - * Sets [Builder.silenceDurationMs] to an arbitrary JSON value. * - * You should usually call [Builder.silenceDurationMs] with a well-typed [Long] value - * instead. This method is primarily for setting the field to an undocumented or not yet - * supported value. + * @throws OpenAIInvalidDataException if the JSON field has an unexpected type (e.g. if + * the server responded with an unexpected value). */ - fun silenceDurationMs(silenceDurationMs: JsonField) = apply { - this.silenceDurationMs = silenceDurationMs - } + fun silenceDurationMs(): Optional = + silenceDurationMs.getOptional("silence_duration_ms") /** * Used only for `server_vad` mode. Activation threshold for VAD (0.0 to 1.0), this * defaults to 0.5. A higher threshold will require louder audio to activate the model, * and thus might perform better in noisy environments. + * + * @throws OpenAIInvalidDataException if the JSON field has an unexpected type (e.g. if + * the server responded with an unexpected value). */ - fun threshold(threshold: Double) = threshold(JsonField.of(threshold)) + fun threshold(): Optional = threshold.getOptional("threshold") /** - * Sets [Builder.threshold] to an arbitrary JSON value. + * Returns the raw JSON value of [createResponse]. * - * You should usually call [Builder.threshold] with a well-typed [Double] value instead. - * This method is primarily for setting the field to an undocumented or not yet - * supported value. + * Unlike [createResponse], this method doesn't throw if the JSON field has an + * unexpected type. */ - fun threshold(threshold: JsonField) = apply { this.threshold = threshold } - - /** Type of turn detection. */ - fun type(type: Type) = type(JsonField.of(type)) + @JsonProperty("create_response") + @ExcludeMissing + fun _createResponse(): JsonField = createResponse /** - * Sets [Builder.type] to an arbitrary JSON value. + * Returns the raw JSON value of [idleTimeoutMs]. * - * You should usually call [Builder.type] with a well-typed [Type] value instead. This - * method is primarily for setting the field to an undocumented or not yet supported - * value. + * Unlike [idleTimeoutMs], this method doesn't throw if the JSON field has an unexpected + * type. */ - fun type(type: JsonField) = apply { this.type = type } + @JsonProperty("idle_timeout_ms") + @ExcludeMissing + fun _idleTimeoutMs(): JsonField = idleTimeoutMs - fun additionalProperties(additionalProperties: Map) = apply { - this.additionalProperties.clear() - putAllAdditionalProperties(additionalProperties) - } + /** + * Returns the raw JSON value of [interruptResponse]. + * + * Unlike [interruptResponse], this method doesn't throw if the JSON field has an + * unexpected type. + */ + @JsonProperty("interrupt_response") + @ExcludeMissing + fun _interruptResponse(): JsonField = interruptResponse - fun putAdditionalProperty(key: String, value: JsonValue) = apply { + /** + * Returns the raw JSON value of [prefixPaddingMs]. + * + * Unlike [prefixPaddingMs], this method doesn't throw if the JSON field has an + * unexpected type. + */ + @JsonProperty("prefix_padding_ms") + @ExcludeMissing + fun _prefixPaddingMs(): JsonField = prefixPaddingMs + + /** + * Returns the raw JSON value of [silenceDurationMs]. + * + * Unlike [silenceDurationMs], this method doesn't throw if the JSON field has an + * unexpected type. + */ + @JsonProperty("silence_duration_ms") + @ExcludeMissing + fun _silenceDurationMs(): JsonField = silenceDurationMs + + /** + * Returns the raw JSON value of [threshold]. + * + * Unlike [threshold], this method doesn't throw if the JSON field has an unexpected + * type. + */ + @JsonProperty("threshold") + @ExcludeMissing + fun _threshold(): JsonField = threshold + + @JsonAnySetter + private fun putAdditionalProperty(key: String, value: JsonValue) { additionalProperties.put(key, value) } - fun putAllAdditionalProperties(additionalProperties: Map) = apply { - this.additionalProperties.putAll(additionalProperties) - } + @JsonAnyGetter + @ExcludeMissing + fun _additionalProperties(): Map = + Collections.unmodifiableMap(additionalProperties) - fun removeAdditionalProperty(key: String) = apply { additionalProperties.remove(key) } + fun toBuilder() = Builder().from(this) - fun removeAllAdditionalProperties(keys: Set) = apply { - keys.forEach(::removeAdditionalProperty) + companion object { + + /** Returns a mutable builder for constructing an instance of [ServerVad]. */ + @JvmStatic fun builder() = Builder() } - /** - * Returns an immutable instance of [TurnDetection]. - * - * Further updates to this [Builder] will not mutate the returned instance. - */ - fun build(): TurnDetection = - TurnDetection( - createResponse, - eagerness, - idleTimeoutMs, - interruptResponse, - prefixPaddingMs, - silenceDurationMs, - threshold, - type, - additionalProperties.toMutableMap(), - ) - } + /** A builder for [ServerVad]. */ + class Builder internal constructor() { - private var validated: Boolean = false + private var type: JsonValue = JsonValue.from("server_vad") + private var createResponse: JsonField = JsonMissing.of() + private var idleTimeoutMs: JsonField = JsonMissing.of() + private var interruptResponse: JsonField = JsonMissing.of() + private var prefixPaddingMs: JsonField = JsonMissing.of() + private var silenceDurationMs: JsonField = JsonMissing.of() + private var threshold: JsonField = JsonMissing.of() + private var additionalProperties: MutableMap = mutableMapOf() - fun validate(): TurnDetection = apply { - if (validated) { - return@apply - } + @JvmSynthetic + internal fun from(serverVad: ServerVad) = apply { + type = serverVad.type + createResponse = serverVad.createResponse + idleTimeoutMs = serverVad.idleTimeoutMs + interruptResponse = serverVad.interruptResponse + prefixPaddingMs = serverVad.prefixPaddingMs + silenceDurationMs = serverVad.silenceDurationMs + threshold = serverVad.threshold + additionalProperties = serverVad.additionalProperties.toMutableMap() + } - createResponse() - eagerness().ifPresent { it.validate() } - idleTimeoutMs() - interruptResponse() - prefixPaddingMs() - silenceDurationMs() - threshold() - type().ifPresent { it.validate() } - validated = true - } + /** + * Sets the field to an arbitrary JSON value. + * + * It is usually unnecessary to call this method because the field defaults to the + * following: + * ```java + * JsonValue.from("server_vad") + * ``` + * + * This method is primarily for setting the field to an undocumented or not yet + * supported value. + */ + fun type(type: JsonValue) = apply { this.type = type } - fun isValid(): Boolean = - try { - validate() - true - } catch (e: OpenAIInvalidDataException) { - false - } + /** + * Whether or not to automatically generate a response when a VAD stop event occurs. + */ + fun createResponse(createResponse: Boolean) = + createResponse(JsonField.of(createResponse)) - /** - * Returns a score indicating how many valid values are contained in this object - * recursively. - * - * Used for best match union deserialization. - */ - @JvmSynthetic - internal fun validity(): Int = - (if (createResponse.asKnown().isPresent) 1 else 0) + - (eagerness.asKnown().getOrNull()?.validity() ?: 0) + - (if (idleTimeoutMs.asKnown().isPresent) 1 else 0) + - (if (interruptResponse.asKnown().isPresent) 1 else 0) + - (if (prefixPaddingMs.asKnown().isPresent) 1 else 0) + - (if (silenceDurationMs.asKnown().isPresent) 1 else 0) + - (if (threshold.asKnown().isPresent) 1 else 0) + - (type.asKnown().getOrNull()?.validity() ?: 0) + /** + * Sets [Builder.createResponse] to an arbitrary JSON value. + * + * You should usually call [Builder.createResponse] with a well-typed [Boolean] + * value instead. This method is primarily for setting the field to an undocumented + * or not yet supported value. + */ + fun createResponse(createResponse: JsonField) = apply { + this.createResponse = createResponse + } - /** - * Used only for `semantic_vad` mode. The eagerness of the model to respond. `low` will wait - * longer for the user to continue speaking, `high` will respond more quickly. `auto` is the - * default and is equivalent to `medium`. - */ - class Eagerness @JsonCreator private constructor(private val value: JsonField) : - Enum { + /** + * Optional timeout after which a model response will be triggered automatically. + * This is useful for situations in which a long pause from the user is unexpected, + * such as a phone call. The model will effectively prompt the user to continue the + * conversation based on the current context. + * + * The timeout value will be applied after the last model response's audio has + * finished playing, i.e. it's set to the `response.done` time plus audio playback + * duration. + * + * An `input_audio_buffer.timeout_triggered` event (plus events associated with the + * Response) will be emitted when the timeout is reached. Idle timeout is currently + * only supported for `server_vad` mode. + */ + fun idleTimeoutMs(idleTimeoutMs: Long?) = + idleTimeoutMs(JsonField.ofNullable(idleTimeoutMs)) - /** - * Returns this class instance's raw value. - * - * This is usually only useful if this instance was deserialized from data that doesn't - * match any known member, and you want to know that value. For example, if the SDK is - * on an older version than the API, then the API may respond with new members that the - * SDK is unaware of. - */ - @com.fasterxml.jackson.annotation.JsonValue fun _value(): JsonField = value + /** + * Alias for [Builder.idleTimeoutMs]. + * + * This unboxed primitive overload exists for backwards compatibility. + */ + fun idleTimeoutMs(idleTimeoutMs: Long) = idleTimeoutMs(idleTimeoutMs as Long?) - companion object { + /** Alias for calling [Builder.idleTimeoutMs] with `idleTimeoutMs.orElse(null)`. */ + fun idleTimeoutMs(idleTimeoutMs: Optional) = + idleTimeoutMs(idleTimeoutMs.getOrNull()) - @JvmField val LOW = of("low") + /** + * Sets [Builder.idleTimeoutMs] to an arbitrary JSON value. + * + * You should usually call [Builder.idleTimeoutMs] with a well-typed [Long] value + * instead. This method is primarily for setting the field to an undocumented or not + * yet supported value. + */ + fun idleTimeoutMs(idleTimeoutMs: JsonField) = apply { + this.idleTimeoutMs = idleTimeoutMs + } - @JvmField val MEDIUM = of("medium") + /** + * Whether or not to automatically interrupt any ongoing response with output to the + * default conversation (i.e. `conversation` of `auto`) when a VAD start event + * occurs. + */ + fun interruptResponse(interruptResponse: Boolean) = + interruptResponse(JsonField.of(interruptResponse)) - @JvmField val HIGH = of("high") + /** + * Sets [Builder.interruptResponse] to an arbitrary JSON value. + * + * You should usually call [Builder.interruptResponse] with a well-typed [Boolean] + * value instead. This method is primarily for setting the field to an undocumented + * or not yet supported value. + */ + fun interruptResponse(interruptResponse: JsonField) = apply { + this.interruptResponse = interruptResponse + } - @JvmField val AUTO = of("auto") + /** + * Used only for `server_vad` mode. Amount of audio to include before the VAD + * detected speech (in milliseconds). Defaults to 300ms. + */ + fun prefixPaddingMs(prefixPaddingMs: Long) = + prefixPaddingMs(JsonField.of(prefixPaddingMs)) - @JvmStatic fun of(value: String) = Eagerness(JsonField.of(value)) - } + /** + * Sets [Builder.prefixPaddingMs] to an arbitrary JSON value. + * + * You should usually call [Builder.prefixPaddingMs] with a well-typed [Long] value + * instead. This method is primarily for setting the field to an undocumented or not + * yet supported value. + */ + fun prefixPaddingMs(prefixPaddingMs: JsonField) = apply { + this.prefixPaddingMs = prefixPaddingMs + } - /** An enum containing [Eagerness]'s known values. */ - enum class Known { - LOW, - MEDIUM, - HIGH, - AUTO, - } + /** + * Used only for `server_vad` mode. Duration of silence to detect speech stop (in + * milliseconds). Defaults to 500ms. With shorter values the model will respond more + * quickly, but may jump in on short pauses from the user. + */ + fun silenceDurationMs(silenceDurationMs: Long) = + silenceDurationMs(JsonField.of(silenceDurationMs)) - /** - * An enum containing [Eagerness]'s known values, as well as an [_UNKNOWN] member. - * - * An instance of [Eagerness] can contain an unknown value in a couple of cases: - * - It was deserialized from data that doesn't match any known member. For example, if - * the SDK is on an older version than the API, then the API may respond with new - * members that the SDK is unaware of. - * - It was constructed with an arbitrary value using the [of] method. - */ - enum class Value { - LOW, - MEDIUM, - HIGH, - AUTO, /** - * An enum member indicating that [Eagerness] was instantiated with an unknown - * value. + * Sets [Builder.silenceDurationMs] to an arbitrary JSON value. + * + * You should usually call [Builder.silenceDurationMs] with a well-typed [Long] + * value instead. This method is primarily for setting the field to an undocumented + * or not yet supported value. */ - _UNKNOWN, - } + fun silenceDurationMs(silenceDurationMs: JsonField) = apply { + this.silenceDurationMs = silenceDurationMs + } - /** - * Returns an enum member corresponding to this class instance's value, or - * [Value._UNKNOWN] if the class was instantiated with an unknown value. - * - * Use the [known] method instead if you're certain the value is always known or if you - * want to throw for the unknown case. - */ - fun value(): Value = - when (this) { - LOW -> Value.LOW - MEDIUM -> Value.MEDIUM - HIGH -> Value.HIGH - AUTO -> Value.AUTO - else -> Value._UNKNOWN + /** + * Used only for `server_vad` mode. Activation threshold for VAD (0.0 to 1.0), this + * defaults to 0.5. A higher threshold will require louder audio to activate the + * model, and thus might perform better in noisy environments. + */ + fun threshold(threshold: Double) = threshold(JsonField.of(threshold)) + + /** + * Sets [Builder.threshold] to an arbitrary JSON value. + * + * You should usually call [Builder.threshold] with a well-typed [Double] value + * instead. This method is primarily for setting the field to an undocumented or not + * yet supported value. + */ + fun threshold(threshold: JsonField) = apply { this.threshold = threshold } + + fun additionalProperties(additionalProperties: Map) = apply { + this.additionalProperties.clear() + putAllAdditionalProperties(additionalProperties) } - /** - * Returns an enum member corresponding to this class instance's value. - * - * Use the [value] method instead if you're uncertain the value is always known and - * don't want to throw for the unknown case. - * - * @throws OpenAIInvalidDataException if this class instance's value is a not a known - * member. - */ - fun known(): Known = - when (this) { - LOW -> Known.LOW - MEDIUM -> Known.MEDIUM - HIGH -> Known.HIGH - AUTO -> Known.AUTO - else -> throw OpenAIInvalidDataException("Unknown Eagerness: $value") + fun putAdditionalProperty(key: String, value: JsonValue) = apply { + additionalProperties.put(key, value) } - /** - * Returns this class instance's primitive wire representation. - * - * This differs from the [toString] method because that method is primarily for - * debugging and generally doesn't throw. - * - * @throws OpenAIInvalidDataException if this class instance's value does not have the - * expected primitive type. - */ - fun asString(): String = - _value().asString().orElseThrow { - OpenAIInvalidDataException("Value is not a String") + fun putAllAdditionalProperties(additionalProperties: Map) = + apply { + this.additionalProperties.putAll(additionalProperties) + } + + fun removeAdditionalProperty(key: String) = apply { + additionalProperties.remove(key) } + fun removeAllAdditionalProperties(keys: Set) = apply { + keys.forEach(::removeAdditionalProperty) + } + + /** + * Returns an immutable instance of [ServerVad]. + * + * Further updates to this [Builder] will not mutate the returned instance. + */ + fun build(): ServerVad = + ServerVad( + type, + createResponse, + idleTimeoutMs, + interruptResponse, + prefixPaddingMs, + silenceDurationMs, + threshold, + additionalProperties.toMutableMap(), + ) + } + private var validated: Boolean = false - fun validate(): Eagerness = apply { + fun validate(): ServerVad = apply { if (validated) { return@apply } - known() + _type().let { + if (it != JsonValue.from("server_vad")) { + throw OpenAIInvalidDataException("'type' is invalid, received $it") + } + } + createResponse() + idleTimeoutMs() + interruptResponse() + prefixPaddingMs() + silenceDurationMs() + threshold() validated = true } @@ -3282,117 +3346,307 @@ private constructor( * * Used for best match union deserialization. */ - @JvmSynthetic internal fun validity(): Int = if (value() == Value._UNKNOWN) 0 else 1 + @JvmSynthetic + internal fun validity(): Int = + type.let { if (it == JsonValue.from("server_vad")) 1 else 0 } + + (if (createResponse.asKnown().isPresent) 1 else 0) + + (if (idleTimeoutMs.asKnown().isPresent) 1 else 0) + + (if (interruptResponse.asKnown().isPresent) 1 else 0) + + (if (prefixPaddingMs.asKnown().isPresent) 1 else 0) + + (if (silenceDurationMs.asKnown().isPresent) 1 else 0) + + (if (threshold.asKnown().isPresent) 1 else 0) override fun equals(other: Any?): Boolean { if (this === other) { return true } - return other is Eagerness && value == other.value + return other is ServerVad && + type == other.type && + createResponse == other.createResponse && + idleTimeoutMs == other.idleTimeoutMs && + interruptResponse == other.interruptResponse && + prefixPaddingMs == other.prefixPaddingMs && + silenceDurationMs == other.silenceDurationMs && + threshold == other.threshold && + additionalProperties == other.additionalProperties + } + + private val hashCode: Int by lazy { + Objects.hash( + type, + createResponse, + idleTimeoutMs, + interruptResponse, + prefixPaddingMs, + silenceDurationMs, + threshold, + additionalProperties, + ) } - override fun hashCode() = value.hashCode() + override fun hashCode(): Int = hashCode - override fun toString() = value.toString() + override fun toString() = + "ServerVad{type=$type, createResponse=$createResponse, idleTimeoutMs=$idleTimeoutMs, interruptResponse=$interruptResponse, prefixPaddingMs=$prefixPaddingMs, silenceDurationMs=$silenceDurationMs, threshold=$threshold, additionalProperties=$additionalProperties}" } - /** Type of turn detection. */ - class Type @JsonCreator private constructor(private val value: JsonField) : Enum { + /** + * Server-side semantic turn detection which uses a model to determine when the user has + * finished speaking. + */ + class SemanticVad + private constructor( + private val type: JsonValue, + private val createResponse: JsonField, + private val eagerness: JsonField, + private val interruptResponse: JsonField, + private val additionalProperties: MutableMap, + ) { + + @JsonCreator + private constructor( + @JsonProperty("type") @ExcludeMissing type: JsonValue = JsonMissing.of(), + @JsonProperty("create_response") + @ExcludeMissing + createResponse: JsonField = JsonMissing.of(), + @JsonProperty("eagerness") + @ExcludeMissing + eagerness: JsonField = JsonMissing.of(), + @JsonProperty("interrupt_response") + @ExcludeMissing + interruptResponse: JsonField = JsonMissing.of(), + ) : this(type, createResponse, eagerness, interruptResponse, mutableMapOf()) /** - * Returns this class instance's raw value. + * Type of turn detection, `semantic_vad` to turn on Semantic VAD. * - * This is usually only useful if this instance was deserialized from data that doesn't - * match any known member, and you want to know that value. For example, if the SDK is - * on an older version than the API, then the API may respond with new members that the - * SDK is unaware of. + * Expected to always return the following: + * ```java + * JsonValue.from("semantic_vad") + * ``` + * + * However, this method can be useful for debugging and logging (e.g. if the server + * responded with an unexpected value). */ - @com.fasterxml.jackson.annotation.JsonValue fun _value(): JsonField = value - - companion object { - - @JvmField val SERVER_VAD = of("server_vad") - - @JvmField val SEMANTIC_VAD = of("semantic_vad") - - @JvmStatic fun of(value: String) = Type(JsonField.of(value)) - } - - /** An enum containing [Type]'s known values. */ - enum class Known { - SERVER_VAD, - SEMANTIC_VAD, - } + @JsonProperty("type") @ExcludeMissing fun _type(): JsonValue = type /** - * An enum containing [Type]'s known values, as well as an [_UNKNOWN] member. + * Whether or not to automatically generate a response when a VAD stop event occurs. * - * An instance of [Type] can contain an unknown value in a couple of cases: - * - It was deserialized from data that doesn't match any known member. For example, if - * the SDK is on an older version than the API, then the API may respond with new - * members that the SDK is unaware of. - * - It was constructed with an arbitrary value using the [of] method. + * @throws OpenAIInvalidDataException if the JSON field has an unexpected type (e.g. if + * the server responded with an unexpected value). */ - enum class Value { - SERVER_VAD, - SEMANTIC_VAD, - /** An enum member indicating that [Type] was instantiated with an unknown value. */ - _UNKNOWN, - } + fun createResponse(): Optional = createResponse.getOptional("create_response") /** - * Returns an enum member corresponding to this class instance's value, or - * [Value._UNKNOWN] if the class was instantiated with an unknown value. + * Used only for `semantic_vad` mode. The eagerness of the model to respond. `low` will + * wait longer for the user to continue speaking, `high` will respond more quickly. + * `auto` is the default and is equivalent to `medium`. `low`, `medium`, and `high` have + * max timeouts of 8s, 4s, and 2s respectively. * - * Use the [known] method instead if you're certain the value is always known or if you - * want to throw for the unknown case. + * @throws OpenAIInvalidDataException if the JSON field has an unexpected type (e.g. if + * the server responded with an unexpected value). */ - fun value(): Value = - when (this) { - SERVER_VAD -> Value.SERVER_VAD - SEMANTIC_VAD -> Value.SEMANTIC_VAD - else -> Value._UNKNOWN - } + fun eagerness(): Optional = eagerness.getOptional("eagerness") /** - * Returns an enum member corresponding to this class instance's value. + * Whether or not to automatically interrupt any ongoing response with output to the + * default conversation (i.e. `conversation` of `auto`) when a VAD start event occurs. * - * Use the [value] method instead if you're uncertain the value is always known and - * don't want to throw for the unknown case. + * @throws OpenAIInvalidDataException if the JSON field has an unexpected type (e.g. if + * the server responded with an unexpected value). + */ + fun interruptResponse(): Optional = + interruptResponse.getOptional("interrupt_response") + + /** + * Returns the raw JSON value of [createResponse]. * - * @throws OpenAIInvalidDataException if this class instance's value is a not a known - * member. + * Unlike [createResponse], this method doesn't throw if the JSON field has an + * unexpected type. */ - fun known(): Known = - when (this) { - SERVER_VAD -> Known.SERVER_VAD - SEMANTIC_VAD -> Known.SEMANTIC_VAD - else -> throw OpenAIInvalidDataException("Unknown Type: $value") - } + @JsonProperty("create_response") + @ExcludeMissing + fun _createResponse(): JsonField = createResponse /** - * Returns this class instance's primitive wire representation. + * Returns the raw JSON value of [eagerness]. * - * This differs from the [toString] method because that method is primarily for - * debugging and generally doesn't throw. + * Unlike [eagerness], this method doesn't throw if the JSON field has an unexpected + * type. + */ + @JsonProperty("eagerness") + @ExcludeMissing + fun _eagerness(): JsonField = eagerness + + /** + * Returns the raw JSON value of [interruptResponse]. * - * @throws OpenAIInvalidDataException if this class instance's value does not have the - * expected primitive type. + * Unlike [interruptResponse], this method doesn't throw if the JSON field has an + * unexpected type. */ - fun asString(): String = - _value().asString().orElseThrow { - OpenAIInvalidDataException("Value is not a String") + @JsonProperty("interrupt_response") + @ExcludeMissing + fun _interruptResponse(): JsonField = interruptResponse + + @JsonAnySetter + private fun putAdditionalProperty(key: String, value: JsonValue) { + additionalProperties.put(key, value) + } + + @JsonAnyGetter + @ExcludeMissing + fun _additionalProperties(): Map = + Collections.unmodifiableMap(additionalProperties) + + fun toBuilder() = Builder().from(this) + + companion object { + + /** Returns a mutable builder for constructing an instance of [SemanticVad]. */ + @JvmStatic fun builder() = Builder() + } + + /** A builder for [SemanticVad]. */ + class Builder internal constructor() { + + private var type: JsonValue = JsonValue.from("semantic_vad") + private var createResponse: JsonField = JsonMissing.of() + private var eagerness: JsonField = JsonMissing.of() + private var interruptResponse: JsonField = JsonMissing.of() + private var additionalProperties: MutableMap = mutableMapOf() + + @JvmSynthetic + internal fun from(semanticVad: SemanticVad) = apply { + type = semanticVad.type + createResponse = semanticVad.createResponse + eagerness = semanticVad.eagerness + interruptResponse = semanticVad.interruptResponse + additionalProperties = semanticVad.additionalProperties.toMutableMap() + } + + /** + * Sets the field to an arbitrary JSON value. + * + * It is usually unnecessary to call this method because the field defaults to the + * following: + * ```java + * JsonValue.from("semantic_vad") + * ``` + * + * This method is primarily for setting the field to an undocumented or not yet + * supported value. + */ + fun type(type: JsonValue) = apply { this.type = type } + + /** + * Whether or not to automatically generate a response when a VAD stop event occurs. + */ + fun createResponse(createResponse: Boolean) = + createResponse(JsonField.of(createResponse)) + + /** + * Sets [Builder.createResponse] to an arbitrary JSON value. + * + * You should usually call [Builder.createResponse] with a well-typed [Boolean] + * value instead. This method is primarily for setting the field to an undocumented + * or not yet supported value. + */ + fun createResponse(createResponse: JsonField) = apply { + this.createResponse = createResponse + } + + /** + * Used only for `semantic_vad` mode. The eagerness of the model to respond. `low` + * will wait longer for the user to continue speaking, `high` will respond more + * quickly. `auto` is the default and is equivalent to `medium`. `low`, `medium`, + * and `high` have max timeouts of 8s, 4s, and 2s respectively. + */ + fun eagerness(eagerness: Eagerness) = eagerness(JsonField.of(eagerness)) + + /** + * Sets [Builder.eagerness] to an arbitrary JSON value. + * + * You should usually call [Builder.eagerness] with a well-typed [Eagerness] value + * instead. This method is primarily for setting the field to an undocumented or not + * yet supported value. + */ + fun eagerness(eagerness: JsonField) = apply { + this.eagerness = eagerness + } + + /** + * Whether or not to automatically interrupt any ongoing response with output to the + * default conversation (i.e. `conversation` of `auto`) when a VAD start event + * occurs. + */ + fun interruptResponse(interruptResponse: Boolean) = + interruptResponse(JsonField.of(interruptResponse)) + + /** + * Sets [Builder.interruptResponse] to an arbitrary JSON value. + * + * You should usually call [Builder.interruptResponse] with a well-typed [Boolean] + * value instead. This method is primarily for setting the field to an undocumented + * or not yet supported value. + */ + fun interruptResponse(interruptResponse: JsonField) = apply { + this.interruptResponse = interruptResponse + } + + fun additionalProperties(additionalProperties: Map) = apply { + this.additionalProperties.clear() + putAllAdditionalProperties(additionalProperties) + } + + fun putAdditionalProperty(key: String, value: JsonValue) = apply { + additionalProperties.put(key, value) + } + + fun putAllAdditionalProperties(additionalProperties: Map) = + apply { + this.additionalProperties.putAll(additionalProperties) + } + + fun removeAdditionalProperty(key: String) = apply { + additionalProperties.remove(key) } + fun removeAllAdditionalProperties(keys: Set) = apply { + keys.forEach(::removeAdditionalProperty) + } + + /** + * Returns an immutable instance of [SemanticVad]. + * + * Further updates to this [Builder] will not mutate the returned instance. + */ + fun build(): SemanticVad = + SemanticVad( + type, + createResponse, + eagerness, + interruptResponse, + additionalProperties.toMutableMap(), + ) + } + private var validated: Boolean = false - fun validate(): Type = apply { + fun validate(): SemanticVad = apply { if (validated) { return@apply } - known() + _type().let { + if (it != JsonValue.from("semantic_vad")) { + throw OpenAIInvalidDataException("'type' is invalid, received $it") + } + } + createResponse() + eagerness().ifPresent { it.validate() } + interruptResponse() validated = true } @@ -3410,56 +3664,190 @@ private constructor( * * Used for best match union deserialization. */ - @JvmSynthetic internal fun validity(): Int = if (value() == Value._UNKNOWN) 0 else 1 + @JvmSynthetic + internal fun validity(): Int = + type.let { if (it == JsonValue.from("semantic_vad")) 1 else 0 } + + (if (createResponse.asKnown().isPresent) 1 else 0) + + (eagerness.asKnown().getOrNull()?.validity() ?: 0) + + (if (interruptResponse.asKnown().isPresent) 1 else 0) + + /** + * Used only for `semantic_vad` mode. The eagerness of the model to respond. `low` will + * wait longer for the user to continue speaking, `high` will respond more quickly. + * `auto` is the default and is equivalent to `medium`. `low`, `medium`, and `high` have + * max timeouts of 8s, 4s, and 2s respectively. + */ + class Eagerness @JsonCreator private constructor(private val value: JsonField) : + Enum { + + /** + * Returns this class instance's raw value. + * + * This is usually only useful if this instance was deserialized from data that + * doesn't match any known member, and you want to know that value. For example, if + * the SDK is on an older version than the API, then the API may respond with new + * members that the SDK is unaware of. + */ + @com.fasterxml.jackson.annotation.JsonValue fun _value(): JsonField = value + + companion object { + + @JvmField val LOW = of("low") + + @JvmField val MEDIUM = of("medium") + + @JvmField val HIGH = of("high") + + @JvmField val AUTO = of("auto") + + @JvmStatic fun of(value: String) = Eagerness(JsonField.of(value)) + } + + /** An enum containing [Eagerness]'s known values. */ + enum class Known { + LOW, + MEDIUM, + HIGH, + AUTO, + } + + /** + * An enum containing [Eagerness]'s known values, as well as an [_UNKNOWN] member. + * + * An instance of [Eagerness] can contain an unknown value in a couple of cases: + * - It was deserialized from data that doesn't match any known member. For example, + * if the SDK is on an older version than the API, then the API may respond with + * new members that the SDK is unaware of. + * - It was constructed with an arbitrary value using the [of] method. + */ + enum class Value { + LOW, + MEDIUM, + HIGH, + AUTO, + /** + * An enum member indicating that [Eagerness] was instantiated with an unknown + * value. + */ + _UNKNOWN, + } + + /** + * Returns an enum member corresponding to this class instance's value, or + * [Value._UNKNOWN] if the class was instantiated with an unknown value. + * + * Use the [known] method instead if you're certain the value is always known or if + * you want to throw for the unknown case. + */ + fun value(): Value = + when (this) { + LOW -> Value.LOW + MEDIUM -> Value.MEDIUM + HIGH -> Value.HIGH + AUTO -> Value.AUTO + else -> Value._UNKNOWN + } + + /** + * Returns an enum member corresponding to this class instance's value. + * + * Use the [value] method instead if you're uncertain the value is always known and + * don't want to throw for the unknown case. + * + * @throws OpenAIInvalidDataException if this class instance's value is a not a + * known member. + */ + fun known(): Known = + when (this) { + LOW -> Known.LOW + MEDIUM -> Known.MEDIUM + HIGH -> Known.HIGH + AUTO -> Known.AUTO + else -> throw OpenAIInvalidDataException("Unknown Eagerness: $value") + } + + /** + * Returns this class instance's primitive wire representation. + * + * This differs from the [toString] method because that method is primarily for + * debugging and generally doesn't throw. + * + * @throws OpenAIInvalidDataException if this class instance's value does not have + * the expected primitive type. + */ + fun asString(): String = + _value().asString().orElseThrow { + OpenAIInvalidDataException("Value is not a String") + } + + private var validated: Boolean = false + + fun validate(): Eagerness = apply { + if (validated) { + return@apply + } + + known() + validated = true + } + + fun isValid(): Boolean = + try { + validate() + true + } catch (e: OpenAIInvalidDataException) { + false + } + + /** + * Returns a score indicating how many valid values are contained in this object + * recursively. + * + * Used for best match union deserialization. + */ + @JvmSynthetic internal fun validity(): Int = if (value() == Value._UNKNOWN) 0 else 1 + + override fun equals(other: Any?): Boolean { + if (this === other) { + return true + } + + return other is Eagerness && value == other.value + } + + override fun hashCode() = value.hashCode() + + override fun toString() = value.toString() + } override fun equals(other: Any?): Boolean { if (this === other) { return true } - return other is Type && value == other.value + return other is SemanticVad && + type == other.type && + createResponse == other.createResponse && + eagerness == other.eagerness && + interruptResponse == other.interruptResponse && + additionalProperties == other.additionalProperties } - override fun hashCode() = value.hashCode() - - override fun toString() = value.toString() - } - - override fun equals(other: Any?): Boolean { - if (this === other) { - return true + private val hashCode: Int by lazy { + Objects.hash( + type, + createResponse, + eagerness, + interruptResponse, + additionalProperties, + ) } - return other is TurnDetection && - createResponse == other.createResponse && - eagerness == other.eagerness && - idleTimeoutMs == other.idleTimeoutMs && - interruptResponse == other.interruptResponse && - prefixPaddingMs == other.prefixPaddingMs && - silenceDurationMs == other.silenceDurationMs && - threshold == other.threshold && - type == other.type && - additionalProperties == other.additionalProperties - } + override fun hashCode(): Int = hashCode - private val hashCode: Int by lazy { - Objects.hash( - createResponse, - eagerness, - idleTimeoutMs, - interruptResponse, - prefixPaddingMs, - silenceDurationMs, - threshold, - type, - additionalProperties, - ) + override fun toString() = + "SemanticVad{type=$type, createResponse=$createResponse, eagerness=$eagerness, interruptResponse=$interruptResponse, additionalProperties=$additionalProperties}" } - - override fun hashCode(): Int = hashCode - - override fun toString() = - "TurnDetection{createResponse=$createResponse, eagerness=$eagerness, idleTimeoutMs=$idleTimeoutMs, interruptResponse=$interruptResponse, prefixPaddingMs=$prefixPaddingMs, silenceDurationMs=$silenceDurationMs, threshold=$threshold, type=$type, additionalProperties=$additionalProperties}" } /** diff --git a/openai-java-core/src/main/kotlin/com/openai/models/realtime/RealtimeTranscriptionSessionAudioInput.kt b/openai-java-core/src/main/kotlin/com/openai/models/realtime/RealtimeTranscriptionSessionAudioInput.kt index a9a03dcd..40e03222 100644 --- a/openai-java-core/src/main/kotlin/com/openai/models/realtime/RealtimeTranscriptionSessionAudioInput.kt +++ b/openai-java-core/src/main/kotlin/com/openai/models/realtime/RealtimeTranscriptionSessionAudioInput.kt @@ -77,14 +77,16 @@ private constructor( /** * Configuration for turn detection, ether Server VAD or Semantic VAD. This can be set to `null` - * to turn off, in which case the client must manually trigger model response. Server VAD means - * that the model will detect the start and end of speech based on audio volume and respond at - * the end of user speech. Semantic VAD is more advanced and uses a turn detection model (in - * conjunction with VAD) to semantically estimate whether the user has finished speaking, then - * dynamically sets a timeout based on this probability. For example, if user audio trails off - * with "uhhm", the model will score a low probability of turn end and wait longer for the user - * to continue speaking. This can be useful for more natural conversations, but may have a - * higher latency. + * to turn off, in which case the client must manually trigger model response. + * + * Server VAD means that the model will detect the start and end of speech based on audio volume + * and respond at the end of user speech. + * + * Semantic VAD is more advanced and uses a turn detection model (in conjunction with VAD) to + * semantically estimate whether the user has finished speaking, then dynamically sets a timeout + * based on this probability. For example, if user audio trails off with "uhhm", the model will + * score a low probability of turn end and wait longer for the user to continue speaking. This + * can be useful for more natural conversations, but may have a higher latency. * * @throws OpenAIInvalidDataException if the JSON field has an unexpected type (e.g. if the * server responded with an unexpected value). @@ -239,17 +241,25 @@ private constructor( /** * Configuration for turn detection, ether Server VAD or Semantic VAD. This can be set to - * `null` to turn off, in which case the client must manually trigger model response. Server - * VAD means that the model will detect the start and end of speech based on audio volume - * and respond at the end of user speech. Semantic VAD is more advanced and uses a turn - * detection model (in conjunction with VAD) to semantically estimate whether the user has - * finished speaking, then dynamically sets a timeout based on this probability. For - * example, if user audio trails off with "uhhm", the model will score a low probability of - * turn end and wait longer for the user to continue speaking. This can be useful for more - * natural conversations, but may have a higher latency. + * `null` to turn off, in which case the client must manually trigger model response. + * + * Server VAD means that the model will detect the start and end of speech based on audio + * volume and respond at the end of user speech. + * + * Semantic VAD is more advanced and uses a turn detection model (in conjunction with VAD) + * to semantically estimate whether the user has finished speaking, then dynamically sets a + * timeout based on this probability. For example, if user audio trails off with "uhhm", the + * model will score a low probability of turn end and wait longer for the user to continue + * speaking. This can be useful for more natural conversations, but may have a higher + * latency. */ - fun turnDetection(turnDetection: RealtimeTranscriptionSessionAudioInputTurnDetection) = - turnDetection(JsonField.of(turnDetection)) + fun turnDetection(turnDetection: RealtimeTranscriptionSessionAudioInputTurnDetection?) = + turnDetection(JsonField.ofNullable(turnDetection)) + + /** Alias for calling [Builder.turnDetection] with `turnDetection.orElse(null)`. */ + fun turnDetection( + turnDetection: Optional + ) = turnDetection(turnDetection.getOrNull()) /** * Sets [Builder.turnDetection] to an arbitrary JSON value. @@ -262,6 +272,28 @@ private constructor( turnDetection: JsonField ) = apply { this.turnDetection = turnDetection } + /** + * Alias for calling [turnDetection] with + * `RealtimeTranscriptionSessionAudioInputTurnDetection.ofServerVad(serverVad)`. + */ + fun turnDetection( + serverVad: RealtimeTranscriptionSessionAudioInputTurnDetection.ServerVad + ) = + turnDetection( + RealtimeTranscriptionSessionAudioInputTurnDetection.ofServerVad(serverVad) + ) + + /** + * Alias for calling [turnDetection] with + * `RealtimeTranscriptionSessionAudioInputTurnDetection.ofSemanticVad(semanticVad)`. + */ + fun turnDetection( + semanticVad: RealtimeTranscriptionSessionAudioInputTurnDetection.SemanticVad + ) = + turnDetection( + RealtimeTranscriptionSessionAudioInputTurnDetection.ofSemanticVad(semanticVad) + ) + fun additionalProperties(additionalProperties: Map) = apply { this.additionalProperties.clear() putAllAdditionalProperties(additionalProperties) diff --git a/openai-java-core/src/main/kotlin/com/openai/models/realtime/RealtimeTranscriptionSessionAudioInputTurnDetection.kt b/openai-java-core/src/main/kotlin/com/openai/models/realtime/RealtimeTranscriptionSessionAudioInputTurnDetection.kt index 5bd44966..46035471 100644 --- a/openai-java-core/src/main/kotlin/com/openai/models/realtime/RealtimeTranscriptionSessionAudioInputTurnDetection.kt +++ b/openai-java-core/src/main/kotlin/com/openai/models/realtime/RealtimeTranscriptionSessionAudioInputTurnDetection.kt @@ -6,11 +6,21 @@ import com.fasterxml.jackson.annotation.JsonAnyGetter import com.fasterxml.jackson.annotation.JsonAnySetter import com.fasterxml.jackson.annotation.JsonCreator import com.fasterxml.jackson.annotation.JsonProperty +import com.fasterxml.jackson.core.JsonGenerator +import com.fasterxml.jackson.core.ObjectCodec +import com.fasterxml.jackson.databind.JsonNode +import com.fasterxml.jackson.databind.SerializerProvider +import com.fasterxml.jackson.databind.annotation.JsonDeserialize +import com.fasterxml.jackson.databind.annotation.JsonSerialize +import com.fasterxml.jackson.module.kotlin.jacksonTypeRef +import com.openai.core.BaseDeserializer +import com.openai.core.BaseSerializer import com.openai.core.Enum import com.openai.core.ExcludeMissing import com.openai.core.JsonField import com.openai.core.JsonMissing import com.openai.core.JsonValue +import com.openai.core.getOrThrow import com.openai.errors.OpenAIInvalidDataException import java.util.Collections import java.util.Objects @@ -19,586 +29,664 @@ import kotlin.jvm.optionals.getOrNull /** * Configuration for turn detection, ether Server VAD or Semantic VAD. This can be set to `null` to - * turn off, in which case the client must manually trigger model response. Server VAD means that - * the model will detect the start and end of speech based on audio volume and respond at the end of - * user speech. Semantic VAD is more advanced and uses a turn detection model (in conjunction with - * VAD) to semantically estimate whether the user has finished speaking, then dynamically sets a - * timeout based on this probability. For example, if user audio trails off with "uhhm", the model - * will score a low probability of turn end and wait longer for the user to continue speaking. This - * can be useful for more natural conversations, but may have a higher latency. + * turn off, in which case the client must manually trigger model response. + * + * Server VAD means that the model will detect the start and end of speech based on audio volume and + * respond at the end of user speech. + * + * Semantic VAD is more advanced and uses a turn detection model (in conjunction with VAD) to + * semantically estimate whether the user has finished speaking, then dynamically sets a timeout + * based on this probability. For example, if user audio trails off with "uhhm", the model will + * score a low probability of turn end and wait longer for the user to continue speaking. This can + * be useful for more natural conversations, but may have a higher latency. */ +@JsonDeserialize(using = RealtimeTranscriptionSessionAudioInputTurnDetection.Deserializer::class) +@JsonSerialize(using = RealtimeTranscriptionSessionAudioInputTurnDetection.Serializer::class) class RealtimeTranscriptionSessionAudioInputTurnDetection private constructor( - private val createResponse: JsonField, - private val eagerness: JsonField, - private val idleTimeoutMs: JsonField, - private val interruptResponse: JsonField, - private val prefixPaddingMs: JsonField, - private val silenceDurationMs: JsonField, - private val threshold: JsonField, - private val type: JsonField, - private val additionalProperties: MutableMap, + private val serverVad: ServerVad? = null, + private val semanticVad: SemanticVad? = null, + private val _json: JsonValue? = null, ) { - @JsonCreator - private constructor( - @JsonProperty("create_response") - @ExcludeMissing - createResponse: JsonField = JsonMissing.of(), - @JsonProperty("eagerness") - @ExcludeMissing - eagerness: JsonField = JsonMissing.of(), - @JsonProperty("idle_timeout_ms") - @ExcludeMissing - idleTimeoutMs: JsonField = JsonMissing.of(), - @JsonProperty("interrupt_response") - @ExcludeMissing - interruptResponse: JsonField = JsonMissing.of(), - @JsonProperty("prefix_padding_ms") - @ExcludeMissing - prefixPaddingMs: JsonField = JsonMissing.of(), - @JsonProperty("silence_duration_ms") - @ExcludeMissing - silenceDurationMs: JsonField = JsonMissing.of(), - @JsonProperty("threshold") @ExcludeMissing threshold: JsonField = JsonMissing.of(), - @JsonProperty("type") @ExcludeMissing type: JsonField = JsonMissing.of(), - ) : this( - createResponse, - eagerness, - idleTimeoutMs, - interruptResponse, - prefixPaddingMs, - silenceDurationMs, - threshold, - type, - mutableMapOf(), - ) - /** - * Whether or not to automatically generate a response when a VAD stop event occurs. - * - * @throws OpenAIInvalidDataException if the JSON field has an unexpected type (e.g. if the - * server responded with an unexpected value). + * Server-side voice activity detection (VAD) which flips on when user speech is detected and + * off after a period of silence. */ - fun createResponse(): Optional = createResponse.getOptional("create_response") + fun serverVad(): Optional = Optional.ofNullable(serverVad) /** - * Used only for `semantic_vad` mode. The eagerness of the model to respond. `low` will wait - * longer for the user to continue speaking, `high` will respond more quickly. `auto` is the - * default and is equivalent to `medium`. - * - * @throws OpenAIInvalidDataException if the JSON field has an unexpected type (e.g. if the - * server responded with an unexpected value). + * Server-side semantic turn detection which uses a model to determine when the user has + * finished speaking. */ - fun eagerness(): Optional = eagerness.getOptional("eagerness") + fun semanticVad(): Optional = Optional.ofNullable(semanticVad) - /** - * Optional idle timeout after which turn detection will auto-timeout when no additional audio - * is received. - * - * @throws OpenAIInvalidDataException if the JSON field has an unexpected type (e.g. if the - * server responded with an unexpected value). - */ - fun idleTimeoutMs(): Optional = idleTimeoutMs.getOptional("idle_timeout_ms") + fun isServerVad(): Boolean = serverVad != null - /** - * Whether or not to automatically interrupt any ongoing response with output to the default - * conversation (i.e. `conversation` of `auto`) when a VAD start event occurs. - * - * @throws OpenAIInvalidDataException if the JSON field has an unexpected type (e.g. if the - * server responded with an unexpected value). - */ - fun interruptResponse(): Optional = interruptResponse.getOptional("interrupt_response") + fun isSemanticVad(): Boolean = semanticVad != null /** - * Used only for `server_vad` mode. Amount of audio to include before the VAD detected speech - * (in milliseconds). Defaults to 300ms. - * - * @throws OpenAIInvalidDataException if the JSON field has an unexpected type (e.g. if the - * server responded with an unexpected value). + * Server-side voice activity detection (VAD) which flips on when user speech is detected and + * off after a period of silence. */ - fun prefixPaddingMs(): Optional = prefixPaddingMs.getOptional("prefix_padding_ms") + fun asServerVad(): ServerVad = serverVad.getOrThrow("serverVad") /** - * Used only for `server_vad` mode. Duration of silence to detect speech stop (in milliseconds). - * Defaults to 500ms. With shorter values the model will respond more quickly, but may jump in - * on short pauses from the user. - * - * @throws OpenAIInvalidDataException if the JSON field has an unexpected type (e.g. if the - * server responded with an unexpected value). + * Server-side semantic turn detection which uses a model to determine when the user has + * finished speaking. */ - fun silenceDurationMs(): Optional = silenceDurationMs.getOptional("silence_duration_ms") + fun asSemanticVad(): SemanticVad = semanticVad.getOrThrow("semanticVad") - /** - * Used only for `server_vad` mode. Activation threshold for VAD (0.0 to 1.0), this defaults to - * 0.5. A higher threshold will require louder audio to activate the model, and thus might - * perform better in noisy environments. - * - * @throws OpenAIInvalidDataException if the JSON field has an unexpected type (e.g. if the - * server responded with an unexpected value). - */ - fun threshold(): Optional = threshold.getOptional("threshold") + fun _json(): Optional = Optional.ofNullable(_json) - /** - * Type of turn detection. - * - * @throws OpenAIInvalidDataException if the JSON field has an unexpected type (e.g. if the - * server responded with an unexpected value). - */ - fun type(): Optional = type.getOptional("type") + fun accept(visitor: Visitor): T = + when { + serverVad != null -> visitor.visitServerVad(serverVad) + semanticVad != null -> visitor.visitSemanticVad(semanticVad) + else -> visitor.unknown(_json) + } - /** - * Returns the raw JSON value of [createResponse]. - * - * Unlike [createResponse], this method doesn't throw if the JSON field has an unexpected type. - */ - @JsonProperty("create_response") - @ExcludeMissing - fun _createResponse(): JsonField = createResponse + private var validated: Boolean = false - /** - * Returns the raw JSON value of [eagerness]. - * - * Unlike [eagerness], this method doesn't throw if the JSON field has an unexpected type. - */ - @JsonProperty("eagerness") @ExcludeMissing fun _eagerness(): JsonField = eagerness + fun validate(): RealtimeTranscriptionSessionAudioInputTurnDetection = apply { + if (validated) { + return@apply + } - /** - * Returns the raw JSON value of [idleTimeoutMs]. - * - * Unlike [idleTimeoutMs], this method doesn't throw if the JSON field has an unexpected type. - */ - @JsonProperty("idle_timeout_ms") - @ExcludeMissing - fun _idleTimeoutMs(): JsonField = idleTimeoutMs + accept( + object : Visitor { + override fun visitServerVad(serverVad: ServerVad) { + serverVad.validate() + } - /** - * Returns the raw JSON value of [interruptResponse]. - * - * Unlike [interruptResponse], this method doesn't throw if the JSON field has an unexpected - * type. - */ - @JsonProperty("interrupt_response") - @ExcludeMissing - fun _interruptResponse(): JsonField = interruptResponse + override fun visitSemanticVad(semanticVad: SemanticVad) { + semanticVad.validate() + } + } + ) + validated = true + } - /** - * Returns the raw JSON value of [prefixPaddingMs]. - * - * Unlike [prefixPaddingMs], this method doesn't throw if the JSON field has an unexpected type. - */ - @JsonProperty("prefix_padding_ms") - @ExcludeMissing - fun _prefixPaddingMs(): JsonField = prefixPaddingMs + fun isValid(): Boolean = + try { + validate() + true + } catch (e: OpenAIInvalidDataException) { + false + } /** - * Returns the raw JSON value of [silenceDurationMs]. + * Returns a score indicating how many valid values are contained in this object recursively. * - * Unlike [silenceDurationMs], this method doesn't throw if the JSON field has an unexpected - * type. + * Used for best match union deserialization. */ - @JsonProperty("silence_duration_ms") - @ExcludeMissing - fun _silenceDurationMs(): JsonField = silenceDurationMs + @JvmSynthetic + internal fun validity(): Int = + accept( + object : Visitor { + override fun visitServerVad(serverVad: ServerVad) = serverVad.validity() - /** - * Returns the raw JSON value of [threshold]. - * - * Unlike [threshold], this method doesn't throw if the JSON field has an unexpected type. - */ - @JsonProperty("threshold") @ExcludeMissing fun _threshold(): JsonField = threshold + override fun visitSemanticVad(semanticVad: SemanticVad) = semanticVad.validity() - /** - * Returns the raw JSON value of [type]. - * - * Unlike [type], this method doesn't throw if the JSON field has an unexpected type. - */ - @JsonProperty("type") @ExcludeMissing fun _type(): JsonField = type + override fun unknown(json: JsonValue?) = 0 + } + ) - @JsonAnySetter - private fun putAdditionalProperty(key: String, value: JsonValue) { - additionalProperties.put(key, value) - } + override fun equals(other: Any?): Boolean { + if (this === other) { + return true + } - @JsonAnyGetter - @ExcludeMissing - fun _additionalProperties(): Map = - Collections.unmodifiableMap(additionalProperties) + return other is RealtimeTranscriptionSessionAudioInputTurnDetection && + serverVad == other.serverVad && + semanticVad == other.semanticVad + } - fun toBuilder() = Builder().from(this) + override fun hashCode(): Int = Objects.hash(serverVad, semanticVad) + + override fun toString(): String = + when { + serverVad != null -> + "RealtimeTranscriptionSessionAudioInputTurnDetection{serverVad=$serverVad}" + semanticVad != null -> + "RealtimeTranscriptionSessionAudioInputTurnDetection{semanticVad=$semanticVad}" + _json != null -> "RealtimeTranscriptionSessionAudioInputTurnDetection{_unknown=$_json}" + else -> + throw IllegalStateException( + "Invalid RealtimeTranscriptionSessionAudioInputTurnDetection" + ) + } companion object { /** - * Returns a mutable builder for constructing an instance of - * [RealtimeTranscriptionSessionAudioInputTurnDetection]. + * Server-side voice activity detection (VAD) which flips on when user speech is detected + * and off after a period of silence. */ - @JvmStatic fun builder() = Builder() - } - - /** A builder for [RealtimeTranscriptionSessionAudioInputTurnDetection]. */ - class Builder internal constructor() { - - private var createResponse: JsonField = JsonMissing.of() - private var eagerness: JsonField = JsonMissing.of() - private var idleTimeoutMs: JsonField = JsonMissing.of() - private var interruptResponse: JsonField = JsonMissing.of() - private var prefixPaddingMs: JsonField = JsonMissing.of() - private var silenceDurationMs: JsonField = JsonMissing.of() - private var threshold: JsonField = JsonMissing.of() - private var type: JsonField = JsonMissing.of() - private var additionalProperties: MutableMap = mutableMapOf() + @JvmStatic + fun ofServerVad(serverVad: ServerVad) = + RealtimeTranscriptionSessionAudioInputTurnDetection(serverVad = serverVad) - @JvmSynthetic - internal fun from( - realtimeTranscriptionSessionAudioInputTurnDetection: - RealtimeTranscriptionSessionAudioInputTurnDetection - ) = apply { - createResponse = realtimeTranscriptionSessionAudioInputTurnDetection.createResponse - eagerness = realtimeTranscriptionSessionAudioInputTurnDetection.eagerness - idleTimeoutMs = realtimeTranscriptionSessionAudioInputTurnDetection.idleTimeoutMs - interruptResponse = - realtimeTranscriptionSessionAudioInputTurnDetection.interruptResponse - prefixPaddingMs = realtimeTranscriptionSessionAudioInputTurnDetection.prefixPaddingMs - silenceDurationMs = - realtimeTranscriptionSessionAudioInputTurnDetection.silenceDurationMs - threshold = realtimeTranscriptionSessionAudioInputTurnDetection.threshold - type = realtimeTranscriptionSessionAudioInputTurnDetection.type - additionalProperties = - realtimeTranscriptionSessionAudioInputTurnDetection.additionalProperties - .toMutableMap() - } + /** + * Server-side semantic turn detection which uses a model to determine when the user has + * finished speaking. + */ + @JvmStatic + fun ofSemanticVad(semanticVad: SemanticVad) = + RealtimeTranscriptionSessionAudioInputTurnDetection(semanticVad = semanticVad) + } - /** Whether or not to automatically generate a response when a VAD stop event occurs. */ - fun createResponse(createResponse: Boolean) = createResponse(JsonField.of(createResponse)) + /** + * An interface that defines how to map each variant of + * [RealtimeTranscriptionSessionAudioInputTurnDetection] to a value of type [T]. + */ + interface Visitor { /** - * Sets [Builder.createResponse] to an arbitrary JSON value. - * - * You should usually call [Builder.createResponse] with a well-typed [Boolean] value - * instead. This method is primarily for setting the field to an undocumented or not yet - * supported value. + * Server-side voice activity detection (VAD) which flips on when user speech is detected + * and off after a period of silence. */ - fun createResponse(createResponse: JsonField) = apply { - this.createResponse = createResponse - } + fun visitServerVad(serverVad: ServerVad): T /** - * Used only for `semantic_vad` mode. The eagerness of the model to respond. `low` will wait - * longer for the user to continue speaking, `high` will respond more quickly. `auto` is the - * default and is equivalent to `medium`. + * Server-side semantic turn detection which uses a model to determine when the user has + * finished speaking. */ - fun eagerness(eagerness: Eagerness) = eagerness(JsonField.of(eagerness)) + fun visitSemanticVad(semanticVad: SemanticVad): T /** - * Sets [Builder.eagerness] to an arbitrary JSON value. + * Maps an unknown variant of [RealtimeTranscriptionSessionAudioInputTurnDetection] to a + * value of type [T]. + * + * An instance of [RealtimeTranscriptionSessionAudioInputTurnDetection] can contain an + * unknown variant if it was deserialized from data that doesn't match any known variant. + * For example, if the SDK is on an older version than the API, then the API may respond + * with new variants that the SDK is unaware of. * - * You should usually call [Builder.eagerness] with a well-typed [Eagerness] value instead. - * This method is primarily for setting the field to an undocumented or not yet supported - * value. + * @throws OpenAIInvalidDataException in the default implementation. */ - fun eagerness(eagerness: JsonField) = apply { this.eagerness = eagerness } + fun unknown(json: JsonValue?): T { + throw OpenAIInvalidDataException( + "Unknown RealtimeTranscriptionSessionAudioInputTurnDetection: $json" + ) + } + } + + internal class Deserializer : + BaseDeserializer( + RealtimeTranscriptionSessionAudioInputTurnDetection::class + ) { + + override fun ObjectCodec.deserialize( + node: JsonNode + ): RealtimeTranscriptionSessionAudioInputTurnDetection { + val json = JsonValue.fromJsonNode(node) + val type = json.asObject().getOrNull()?.get("type")?.asString()?.getOrNull() + + when (type) { + "server_vad" -> { + return tryDeserialize(node, jacksonTypeRef())?.let { + RealtimeTranscriptionSessionAudioInputTurnDetection( + serverVad = it, + _json = json, + ) + } ?: RealtimeTranscriptionSessionAudioInputTurnDetection(_json = json) + } + "semantic_vad" -> { + return tryDeserialize(node, jacksonTypeRef())?.let { + RealtimeTranscriptionSessionAudioInputTurnDetection( + semanticVad = it, + _json = json, + ) + } ?: RealtimeTranscriptionSessionAudioInputTurnDetection(_json = json) + } + } + + return RealtimeTranscriptionSessionAudioInputTurnDetection(_json = json) + } + } + + internal class Serializer : + BaseSerializer( + RealtimeTranscriptionSessionAudioInputTurnDetection::class + ) { + + override fun serialize( + value: RealtimeTranscriptionSessionAudioInputTurnDetection, + generator: JsonGenerator, + provider: SerializerProvider, + ) { + when { + value.serverVad != null -> generator.writeObject(value.serverVad) + value.semanticVad != null -> generator.writeObject(value.semanticVad) + value._json != null -> generator.writeObject(value._json) + else -> + throw IllegalStateException( + "Invalid RealtimeTranscriptionSessionAudioInputTurnDetection" + ) + } + } + } + + /** + * Server-side voice activity detection (VAD) which flips on when user speech is detected and + * off after a period of silence. + */ + class ServerVad + private constructor( + private val type: JsonValue, + private val createResponse: JsonField, + private val idleTimeoutMs: JsonField, + private val interruptResponse: JsonField, + private val prefixPaddingMs: JsonField, + private val silenceDurationMs: JsonField, + private val threshold: JsonField, + private val additionalProperties: MutableMap, + ) { + + @JsonCreator + private constructor( + @JsonProperty("type") @ExcludeMissing type: JsonValue = JsonMissing.of(), + @JsonProperty("create_response") + @ExcludeMissing + createResponse: JsonField = JsonMissing.of(), + @JsonProperty("idle_timeout_ms") + @ExcludeMissing + idleTimeoutMs: JsonField = JsonMissing.of(), + @JsonProperty("interrupt_response") + @ExcludeMissing + interruptResponse: JsonField = JsonMissing.of(), + @JsonProperty("prefix_padding_ms") + @ExcludeMissing + prefixPaddingMs: JsonField = JsonMissing.of(), + @JsonProperty("silence_duration_ms") + @ExcludeMissing + silenceDurationMs: JsonField = JsonMissing.of(), + @JsonProperty("threshold") + @ExcludeMissing + threshold: JsonField = JsonMissing.of(), + ) : this( + type, + createResponse, + idleTimeoutMs, + interruptResponse, + prefixPaddingMs, + silenceDurationMs, + threshold, + mutableMapOf(), + ) /** - * Optional idle timeout after which turn detection will auto-timeout when no additional - * audio is received. + * Type of turn detection, `server_vad` to turn on simple Server VAD. + * + * Expected to always return the following: + * ```java + * JsonValue.from("server_vad") + * ``` + * + * However, this method can be useful for debugging and logging (e.g. if the server + * responded with an unexpected value). */ - fun idleTimeoutMs(idleTimeoutMs: Long?) = idleTimeoutMs(JsonField.ofNullable(idleTimeoutMs)) + @JsonProperty("type") @ExcludeMissing fun _type(): JsonValue = type /** - * Alias for [Builder.idleTimeoutMs]. + * Whether or not to automatically generate a response when a VAD stop event occurs. * - * This unboxed primitive overload exists for backwards compatibility. + * @throws OpenAIInvalidDataException if the JSON field has an unexpected type (e.g. if the + * server responded with an unexpected value). */ - fun idleTimeoutMs(idleTimeoutMs: Long) = idleTimeoutMs(idleTimeoutMs as Long?) - - /** Alias for calling [Builder.idleTimeoutMs] with `idleTimeoutMs.orElse(null)`. */ - fun idleTimeoutMs(idleTimeoutMs: Optional) = idleTimeoutMs(idleTimeoutMs.getOrNull()) + fun createResponse(): Optional = createResponse.getOptional("create_response") /** - * Sets [Builder.idleTimeoutMs] to an arbitrary JSON value. + * Optional timeout after which a model response will be triggered automatically. This is + * useful for situations in which a long pause from the user is unexpected, such as a phone + * call. The model will effectively prompt the user to continue the conversation based on + * the current context. * - * You should usually call [Builder.idleTimeoutMs] with a well-typed [Long] value instead. - * This method is primarily for setting the field to an undocumented or not yet supported - * value. + * The timeout value will be applied after the last model response's audio has finished + * playing, i.e. it's set to the `response.done` time plus audio playback duration. + * + * An `input_audio_buffer.timeout_triggered` event (plus events associated with the + * Response) will be emitted when the timeout is reached. Idle timeout is currently only + * supported for `server_vad` mode. + * + * @throws OpenAIInvalidDataException if the JSON field has an unexpected type (e.g. if the + * server responded with an unexpected value). */ - fun idleTimeoutMs(idleTimeoutMs: JsonField) = apply { - this.idleTimeoutMs = idleTimeoutMs - } + fun idleTimeoutMs(): Optional = idleTimeoutMs.getOptional("idle_timeout_ms") /** * Whether or not to automatically interrupt any ongoing response with output to the default * conversation (i.e. `conversation` of `auto`) when a VAD start event occurs. - */ - fun interruptResponse(interruptResponse: Boolean) = - interruptResponse(JsonField.of(interruptResponse)) - - /** - * Sets [Builder.interruptResponse] to an arbitrary JSON value. * - * You should usually call [Builder.interruptResponse] with a well-typed [Boolean] value - * instead. This method is primarily for setting the field to an undocumented or not yet - * supported value. + * @throws OpenAIInvalidDataException if the JSON field has an unexpected type (e.g. if the + * server responded with an unexpected value). */ - fun interruptResponse(interruptResponse: JsonField) = apply { - this.interruptResponse = interruptResponse - } + fun interruptResponse(): Optional = + interruptResponse.getOptional("interrupt_response") /** * Used only for `server_vad` mode. Amount of audio to include before the VAD detected * speech (in milliseconds). Defaults to 300ms. - */ - fun prefixPaddingMs(prefixPaddingMs: Long) = prefixPaddingMs(JsonField.of(prefixPaddingMs)) - - /** - * Sets [Builder.prefixPaddingMs] to an arbitrary JSON value. * - * You should usually call [Builder.prefixPaddingMs] with a well-typed [Long] value instead. - * This method is primarily for setting the field to an undocumented or not yet supported - * value. + * @throws OpenAIInvalidDataException if the JSON field has an unexpected type (e.g. if the + * server responded with an unexpected value). */ - fun prefixPaddingMs(prefixPaddingMs: JsonField) = apply { - this.prefixPaddingMs = prefixPaddingMs - } + fun prefixPaddingMs(): Optional = prefixPaddingMs.getOptional("prefix_padding_ms") /** * Used only for `server_vad` mode. Duration of silence to detect speech stop (in * milliseconds). Defaults to 500ms. With shorter values the model will respond more * quickly, but may jump in on short pauses from the user. - */ - fun silenceDurationMs(silenceDurationMs: Long) = - silenceDurationMs(JsonField.of(silenceDurationMs)) - - /** - * Sets [Builder.silenceDurationMs] to an arbitrary JSON value. * - * You should usually call [Builder.silenceDurationMs] with a well-typed [Long] value - * instead. This method is primarily for setting the field to an undocumented or not yet - * supported value. + * @throws OpenAIInvalidDataException if the JSON field has an unexpected type (e.g. if the + * server responded with an unexpected value). */ - fun silenceDurationMs(silenceDurationMs: JsonField) = apply { - this.silenceDurationMs = silenceDurationMs - } + fun silenceDurationMs(): Optional = + silenceDurationMs.getOptional("silence_duration_ms") /** * Used only for `server_vad` mode. Activation threshold for VAD (0.0 to 1.0), this defaults * to 0.5. A higher threshold will require louder audio to activate the model, and thus * might perform better in noisy environments. + * + * @throws OpenAIInvalidDataException if the JSON field has an unexpected type (e.g. if the + * server responded with an unexpected value). + */ + fun threshold(): Optional = threshold.getOptional("threshold") + + /** + * Returns the raw JSON value of [createResponse]. + * + * Unlike [createResponse], this method doesn't throw if the JSON field has an unexpected + * type. */ - fun threshold(threshold: Double) = threshold(JsonField.of(threshold)) + @JsonProperty("create_response") + @ExcludeMissing + fun _createResponse(): JsonField = createResponse /** - * Sets [Builder.threshold] to an arbitrary JSON value. + * Returns the raw JSON value of [idleTimeoutMs]. * - * You should usually call [Builder.threshold] with a well-typed [Double] value instead. - * This method is primarily for setting the field to an undocumented or not yet supported - * value. + * Unlike [idleTimeoutMs], this method doesn't throw if the JSON field has an unexpected + * type. */ - fun threshold(threshold: JsonField) = apply { this.threshold = threshold } + @JsonProperty("idle_timeout_ms") + @ExcludeMissing + fun _idleTimeoutMs(): JsonField = idleTimeoutMs - /** Type of turn detection. */ - fun type(type: Type) = type(JsonField.of(type)) + /** + * Returns the raw JSON value of [interruptResponse]. + * + * Unlike [interruptResponse], this method doesn't throw if the JSON field has an unexpected + * type. + */ + @JsonProperty("interrupt_response") + @ExcludeMissing + fun _interruptResponse(): JsonField = interruptResponse /** - * Sets [Builder.type] to an arbitrary JSON value. + * Returns the raw JSON value of [prefixPaddingMs]. * - * You should usually call [Builder.type] with a well-typed [Type] value instead. This - * method is primarily for setting the field to an undocumented or not yet supported value. + * Unlike [prefixPaddingMs], this method doesn't throw if the JSON field has an unexpected + * type. */ - fun type(type: JsonField) = apply { this.type = type } + @JsonProperty("prefix_padding_ms") + @ExcludeMissing + fun _prefixPaddingMs(): JsonField = prefixPaddingMs - fun additionalProperties(additionalProperties: Map) = apply { - this.additionalProperties.clear() - putAllAdditionalProperties(additionalProperties) - } + /** + * Returns the raw JSON value of [silenceDurationMs]. + * + * Unlike [silenceDurationMs], this method doesn't throw if the JSON field has an unexpected + * type. + */ + @JsonProperty("silence_duration_ms") + @ExcludeMissing + fun _silenceDurationMs(): JsonField = silenceDurationMs - fun putAdditionalProperty(key: String, value: JsonValue) = apply { + /** + * Returns the raw JSON value of [threshold]. + * + * Unlike [threshold], this method doesn't throw if the JSON field has an unexpected type. + */ + @JsonProperty("threshold") @ExcludeMissing fun _threshold(): JsonField = threshold + + @JsonAnySetter + private fun putAdditionalProperty(key: String, value: JsonValue) { additionalProperties.put(key, value) } - fun putAllAdditionalProperties(additionalProperties: Map) = apply { - this.additionalProperties.putAll(additionalProperties) - } + @JsonAnyGetter + @ExcludeMissing + fun _additionalProperties(): Map = + Collections.unmodifiableMap(additionalProperties) + + fun toBuilder() = Builder().from(this) - fun removeAdditionalProperty(key: String) = apply { additionalProperties.remove(key) } + companion object { - fun removeAllAdditionalProperties(keys: Set) = apply { - keys.forEach(::removeAdditionalProperty) + /** Returns a mutable builder for constructing an instance of [ServerVad]. */ + @JvmStatic fun builder() = Builder() } - /** - * Returns an immutable instance of [RealtimeTranscriptionSessionAudioInputTurnDetection]. - * - * Further updates to this [Builder] will not mutate the returned instance. - */ - fun build(): RealtimeTranscriptionSessionAudioInputTurnDetection = - RealtimeTranscriptionSessionAudioInputTurnDetection( - createResponse, - eagerness, - idleTimeoutMs, - interruptResponse, - prefixPaddingMs, - silenceDurationMs, - threshold, - type, - additionalProperties.toMutableMap(), - ) - } + /** A builder for [ServerVad]. */ + class Builder internal constructor() { + + private var type: JsonValue = JsonValue.from("server_vad") + private var createResponse: JsonField = JsonMissing.of() + private var idleTimeoutMs: JsonField = JsonMissing.of() + private var interruptResponse: JsonField = JsonMissing.of() + private var prefixPaddingMs: JsonField = JsonMissing.of() + private var silenceDurationMs: JsonField = JsonMissing.of() + private var threshold: JsonField = JsonMissing.of() + private var additionalProperties: MutableMap = mutableMapOf() + + @JvmSynthetic + internal fun from(serverVad: ServerVad) = apply { + type = serverVad.type + createResponse = serverVad.createResponse + idleTimeoutMs = serverVad.idleTimeoutMs + interruptResponse = serverVad.interruptResponse + prefixPaddingMs = serverVad.prefixPaddingMs + silenceDurationMs = serverVad.silenceDurationMs + threshold = serverVad.threshold + additionalProperties = serverVad.additionalProperties.toMutableMap() + } - private var validated: Boolean = false + /** + * Sets the field to an arbitrary JSON value. + * + * It is usually unnecessary to call this method because the field defaults to the + * following: + * ```java + * JsonValue.from("server_vad") + * ``` + * + * This method is primarily for setting the field to an undocumented or not yet + * supported value. + */ + fun type(type: JsonValue) = apply { this.type = type } - fun validate(): RealtimeTranscriptionSessionAudioInputTurnDetection = apply { - if (validated) { - return@apply - } + /** Whether or not to automatically generate a response when a VAD stop event occurs. */ + fun createResponse(createResponse: Boolean) = + createResponse(JsonField.of(createResponse)) - createResponse() - eagerness().ifPresent { it.validate() } - idleTimeoutMs() - interruptResponse() - prefixPaddingMs() - silenceDurationMs() - threshold() - type().ifPresent { it.validate() } - validated = true - } + /** + * Sets [Builder.createResponse] to an arbitrary JSON value. + * + * You should usually call [Builder.createResponse] with a well-typed [Boolean] value + * instead. This method is primarily for setting the field to an undocumented or not yet + * supported value. + */ + fun createResponse(createResponse: JsonField) = apply { + this.createResponse = createResponse + } - fun isValid(): Boolean = - try { - validate() - true - } catch (e: OpenAIInvalidDataException) { - false - } + /** + * Optional timeout after which a model response will be triggered automatically. This + * is useful for situations in which a long pause from the user is unexpected, such as a + * phone call. The model will effectively prompt the user to continue the conversation + * based on the current context. + * + * The timeout value will be applied after the last model response's audio has finished + * playing, i.e. it's set to the `response.done` time plus audio playback duration. + * + * An `input_audio_buffer.timeout_triggered` event (plus events associated with the + * Response) will be emitted when the timeout is reached. Idle timeout is currently only + * supported for `server_vad` mode. + */ + fun idleTimeoutMs(idleTimeoutMs: Long?) = + idleTimeoutMs(JsonField.ofNullable(idleTimeoutMs)) - /** - * Returns a score indicating how many valid values are contained in this object recursively. - * - * Used for best match union deserialization. - */ - @JvmSynthetic - internal fun validity(): Int = - (if (createResponse.asKnown().isPresent) 1 else 0) + - (eagerness.asKnown().getOrNull()?.validity() ?: 0) + - (if (idleTimeoutMs.asKnown().isPresent) 1 else 0) + - (if (interruptResponse.asKnown().isPresent) 1 else 0) + - (if (prefixPaddingMs.asKnown().isPresent) 1 else 0) + - (if (silenceDurationMs.asKnown().isPresent) 1 else 0) + - (if (threshold.asKnown().isPresent) 1 else 0) + - (type.asKnown().getOrNull()?.validity() ?: 0) + /** + * Alias for [Builder.idleTimeoutMs]. + * + * This unboxed primitive overload exists for backwards compatibility. + */ + fun idleTimeoutMs(idleTimeoutMs: Long) = idleTimeoutMs(idleTimeoutMs as Long?) - /** - * Used only for `semantic_vad` mode. The eagerness of the model to respond. `low` will wait - * longer for the user to continue speaking, `high` will respond more quickly. `auto` is the - * default and is equivalent to `medium`. - */ - class Eagerness @JsonCreator private constructor(private val value: JsonField) : Enum { + /** Alias for calling [Builder.idleTimeoutMs] with `idleTimeoutMs.orElse(null)`. */ + fun idleTimeoutMs(idleTimeoutMs: Optional) = + idleTimeoutMs(idleTimeoutMs.getOrNull()) - /** - * Returns this class instance's raw value. - * - * This is usually only useful if this instance was deserialized from data that doesn't - * match any known member, and you want to know that value. For example, if the SDK is on an - * older version than the API, then the API may respond with new members that the SDK is - * unaware of. - */ - @com.fasterxml.jackson.annotation.JsonValue fun _value(): JsonField = value + /** + * Sets [Builder.idleTimeoutMs] to an arbitrary JSON value. + * + * You should usually call [Builder.idleTimeoutMs] with a well-typed [Long] value + * instead. This method is primarily for setting the field to an undocumented or not yet + * supported value. + */ + fun idleTimeoutMs(idleTimeoutMs: JsonField) = apply { + this.idleTimeoutMs = idleTimeoutMs + } - companion object { + /** + * Whether or not to automatically interrupt any ongoing response with output to the + * default conversation (i.e. `conversation` of `auto`) when a VAD start event occurs. + */ + fun interruptResponse(interruptResponse: Boolean) = + interruptResponse(JsonField.of(interruptResponse)) - @JvmField val LOW = of("low") + /** + * Sets [Builder.interruptResponse] to an arbitrary JSON value. + * + * You should usually call [Builder.interruptResponse] with a well-typed [Boolean] value + * instead. This method is primarily for setting the field to an undocumented or not yet + * supported value. + */ + fun interruptResponse(interruptResponse: JsonField) = apply { + this.interruptResponse = interruptResponse + } - @JvmField val MEDIUM = of("medium") + /** + * Used only for `server_vad` mode. Amount of audio to include before the VAD detected + * speech (in milliseconds). Defaults to 300ms. + */ + fun prefixPaddingMs(prefixPaddingMs: Long) = + prefixPaddingMs(JsonField.of(prefixPaddingMs)) - @JvmField val HIGH = of("high") + /** + * Sets [Builder.prefixPaddingMs] to an arbitrary JSON value. + * + * You should usually call [Builder.prefixPaddingMs] with a well-typed [Long] value + * instead. This method is primarily for setting the field to an undocumented or not yet + * supported value. + */ + fun prefixPaddingMs(prefixPaddingMs: JsonField) = apply { + this.prefixPaddingMs = prefixPaddingMs + } - @JvmField val AUTO = of("auto") + /** + * Used only for `server_vad` mode. Duration of silence to detect speech stop (in + * milliseconds). Defaults to 500ms. With shorter values the model will respond more + * quickly, but may jump in on short pauses from the user. + */ + fun silenceDurationMs(silenceDurationMs: Long) = + silenceDurationMs(JsonField.of(silenceDurationMs)) - @JvmStatic fun of(value: String) = Eagerness(JsonField.of(value)) - } + /** + * Sets [Builder.silenceDurationMs] to an arbitrary JSON value. + * + * You should usually call [Builder.silenceDurationMs] with a well-typed [Long] value + * instead. This method is primarily for setting the field to an undocumented or not yet + * supported value. + */ + fun silenceDurationMs(silenceDurationMs: JsonField) = apply { + this.silenceDurationMs = silenceDurationMs + } - /** An enum containing [Eagerness]'s known values. */ - enum class Known { - LOW, - MEDIUM, - HIGH, - AUTO, - } + /** + * Used only for `server_vad` mode. Activation threshold for VAD (0.0 to 1.0), this + * defaults to 0.5. A higher threshold will require louder audio to activate the model, + * and thus might perform better in noisy environments. + */ + fun threshold(threshold: Double) = threshold(JsonField.of(threshold)) - /** - * An enum containing [Eagerness]'s known values, as well as an [_UNKNOWN] member. - * - * An instance of [Eagerness] can contain an unknown value in a couple of cases: - * - It was deserialized from data that doesn't match any known member. For example, if the - * SDK is on an older version than the API, then the API may respond with new members that - * the SDK is unaware of. - * - It was constructed with an arbitrary value using the [of] method. - */ - enum class Value { - LOW, - MEDIUM, - HIGH, - AUTO, /** - * An enum member indicating that [Eagerness] was instantiated with an unknown value. + * Sets [Builder.threshold] to an arbitrary JSON value. + * + * You should usually call [Builder.threshold] with a well-typed [Double] value instead. + * This method is primarily for setting the field to an undocumented or not yet + * supported value. */ - _UNKNOWN, - } + fun threshold(threshold: JsonField) = apply { this.threshold = threshold } - /** - * Returns an enum member corresponding to this class instance's value, or [Value._UNKNOWN] - * if the class was instantiated with an unknown value. - * - * Use the [known] method instead if you're certain the value is always known or if you want - * to throw for the unknown case. - */ - fun value(): Value = - when (this) { - LOW -> Value.LOW - MEDIUM -> Value.MEDIUM - HIGH -> Value.HIGH - AUTO -> Value.AUTO - else -> Value._UNKNOWN + fun additionalProperties(additionalProperties: Map) = apply { + this.additionalProperties.clear() + putAllAdditionalProperties(additionalProperties) } - /** - * Returns an enum member corresponding to this class instance's value. - * - * Use the [value] method instead if you're uncertain the value is always known and don't - * want to throw for the unknown case. - * - * @throws OpenAIInvalidDataException if this class instance's value is a not a known - * member. - */ - fun known(): Known = - when (this) { - LOW -> Known.LOW - MEDIUM -> Known.MEDIUM - HIGH -> Known.HIGH - AUTO -> Known.AUTO - else -> throw OpenAIInvalidDataException("Unknown Eagerness: $value") + fun putAdditionalProperty(key: String, value: JsonValue) = apply { + additionalProperties.put(key, value) } - /** - * Returns this class instance's primitive wire representation. - * - * This differs from the [toString] method because that method is primarily for debugging - * and generally doesn't throw. - * - * @throws OpenAIInvalidDataException if this class instance's value does not have the - * expected primitive type. - */ - fun asString(): String = - _value().asString().orElseThrow { OpenAIInvalidDataException("Value is not a String") } + fun putAllAdditionalProperties(additionalProperties: Map) = apply { + this.additionalProperties.putAll(additionalProperties) + } + + fun removeAdditionalProperty(key: String) = apply { additionalProperties.remove(key) } + + fun removeAllAdditionalProperties(keys: Set) = apply { + keys.forEach(::removeAdditionalProperty) + } + + /** + * Returns an immutable instance of [ServerVad]. + * + * Further updates to this [Builder] will not mutate the returned instance. + */ + fun build(): ServerVad = + ServerVad( + type, + createResponse, + idleTimeoutMs, + interruptResponse, + prefixPaddingMs, + silenceDurationMs, + threshold, + additionalProperties.toMutableMap(), + ) + } private var validated: Boolean = false - fun validate(): Eagerness = apply { + fun validate(): ServerVad = apply { if (validated) { return@apply } - known() + _type().let { + if (it != JsonValue.from("server_vad")) { + throw OpenAIInvalidDataException("'type' is invalid, received $it") + } + } + createResponse() + idleTimeoutMs() + interruptResponse() + prefixPaddingMs() + silenceDurationMs() + threshold() validated = true } @@ -616,115 +704,298 @@ private constructor( * * Used for best match union deserialization. */ - @JvmSynthetic internal fun validity(): Int = if (value() == Value._UNKNOWN) 0 else 1 + @JvmSynthetic + internal fun validity(): Int = + type.let { if (it == JsonValue.from("server_vad")) 1 else 0 } + + (if (createResponse.asKnown().isPresent) 1 else 0) + + (if (idleTimeoutMs.asKnown().isPresent) 1 else 0) + + (if (interruptResponse.asKnown().isPresent) 1 else 0) + + (if (prefixPaddingMs.asKnown().isPresent) 1 else 0) + + (if (silenceDurationMs.asKnown().isPresent) 1 else 0) + + (if (threshold.asKnown().isPresent) 1 else 0) override fun equals(other: Any?): Boolean { if (this === other) { return true } - return other is Eagerness && value == other.value + return other is ServerVad && + type == other.type && + createResponse == other.createResponse && + idleTimeoutMs == other.idleTimeoutMs && + interruptResponse == other.interruptResponse && + prefixPaddingMs == other.prefixPaddingMs && + silenceDurationMs == other.silenceDurationMs && + threshold == other.threshold && + additionalProperties == other.additionalProperties + } + + private val hashCode: Int by lazy { + Objects.hash( + type, + createResponse, + idleTimeoutMs, + interruptResponse, + prefixPaddingMs, + silenceDurationMs, + threshold, + additionalProperties, + ) } - override fun hashCode() = value.hashCode() + override fun hashCode(): Int = hashCode - override fun toString() = value.toString() + override fun toString() = + "ServerVad{type=$type, createResponse=$createResponse, idleTimeoutMs=$idleTimeoutMs, interruptResponse=$interruptResponse, prefixPaddingMs=$prefixPaddingMs, silenceDurationMs=$silenceDurationMs, threshold=$threshold, additionalProperties=$additionalProperties}" } - /** Type of turn detection. */ - class Type @JsonCreator private constructor(private val value: JsonField) : Enum { + /** + * Server-side semantic turn detection which uses a model to determine when the user has + * finished speaking. + */ + class SemanticVad + private constructor( + private val type: JsonValue, + private val createResponse: JsonField, + private val eagerness: JsonField, + private val interruptResponse: JsonField, + private val additionalProperties: MutableMap, + ) { + + @JsonCreator + private constructor( + @JsonProperty("type") @ExcludeMissing type: JsonValue = JsonMissing.of(), + @JsonProperty("create_response") + @ExcludeMissing + createResponse: JsonField = JsonMissing.of(), + @JsonProperty("eagerness") + @ExcludeMissing + eagerness: JsonField = JsonMissing.of(), + @JsonProperty("interrupt_response") + @ExcludeMissing + interruptResponse: JsonField = JsonMissing.of(), + ) : this(type, createResponse, eagerness, interruptResponse, mutableMapOf()) /** - * Returns this class instance's raw value. + * Type of turn detection, `semantic_vad` to turn on Semantic VAD. + * + * Expected to always return the following: + * ```java + * JsonValue.from("semantic_vad") + * ``` * - * This is usually only useful if this instance was deserialized from data that doesn't - * match any known member, and you want to know that value. For example, if the SDK is on an - * older version than the API, then the API may respond with new members that the SDK is - * unaware of. + * However, this method can be useful for debugging and logging (e.g. if the server + * responded with an unexpected value). */ - @com.fasterxml.jackson.annotation.JsonValue fun _value(): JsonField = value - - companion object { - - @JvmField val SERVER_VAD = of("server_vad") - - @JvmField val SEMANTIC_VAD = of("semantic_vad") - - @JvmStatic fun of(value: String) = Type(JsonField.of(value)) - } - - /** An enum containing [Type]'s known values. */ - enum class Known { - SERVER_VAD, - SEMANTIC_VAD, - } + @JsonProperty("type") @ExcludeMissing fun _type(): JsonValue = type /** - * An enum containing [Type]'s known values, as well as an [_UNKNOWN] member. + * Whether or not to automatically generate a response when a VAD stop event occurs. * - * An instance of [Type] can contain an unknown value in a couple of cases: - * - It was deserialized from data that doesn't match any known member. For example, if the - * SDK is on an older version than the API, then the API may respond with new members that - * the SDK is unaware of. - * - It was constructed with an arbitrary value using the [of] method. + * @throws OpenAIInvalidDataException if the JSON field has an unexpected type (e.g. if the + * server responded with an unexpected value). */ - enum class Value { - SERVER_VAD, - SEMANTIC_VAD, - /** An enum member indicating that [Type] was instantiated with an unknown value. */ - _UNKNOWN, - } + fun createResponse(): Optional = createResponse.getOptional("create_response") /** - * Returns an enum member corresponding to this class instance's value, or [Value._UNKNOWN] - * if the class was instantiated with an unknown value. + * Used only for `semantic_vad` mode. The eagerness of the model to respond. `low` will wait + * longer for the user to continue speaking, `high` will respond more quickly. `auto` is the + * default and is equivalent to `medium`. `low`, `medium`, and `high` have max timeouts of + * 8s, 4s, and 2s respectively. * - * Use the [known] method instead if you're certain the value is always known or if you want - * to throw for the unknown case. + * @throws OpenAIInvalidDataException if the JSON field has an unexpected type (e.g. if the + * server responded with an unexpected value). */ - fun value(): Value = - when (this) { - SERVER_VAD -> Value.SERVER_VAD - SEMANTIC_VAD -> Value.SEMANTIC_VAD - else -> Value._UNKNOWN - } + fun eagerness(): Optional = eagerness.getOptional("eagerness") /** - * Returns an enum member corresponding to this class instance's value. + * Whether or not to automatically interrupt any ongoing response with output to the default + * conversation (i.e. `conversation` of `auto`) when a VAD start event occurs. * - * Use the [value] method instead if you're uncertain the value is always known and don't - * want to throw for the unknown case. + * @throws OpenAIInvalidDataException if the JSON field has an unexpected type (e.g. if the + * server responded with an unexpected value). + */ + fun interruptResponse(): Optional = + interruptResponse.getOptional("interrupt_response") + + /** + * Returns the raw JSON value of [createResponse]. * - * @throws OpenAIInvalidDataException if this class instance's value is a not a known - * member. + * Unlike [createResponse], this method doesn't throw if the JSON field has an unexpected + * type. */ - fun known(): Known = - when (this) { - SERVER_VAD -> Known.SERVER_VAD - SEMANTIC_VAD -> Known.SEMANTIC_VAD - else -> throw OpenAIInvalidDataException("Unknown Type: $value") - } + @JsonProperty("create_response") + @ExcludeMissing + fun _createResponse(): JsonField = createResponse /** - * Returns this class instance's primitive wire representation. + * Returns the raw JSON value of [eagerness]. * - * This differs from the [toString] method because that method is primarily for debugging - * and generally doesn't throw. + * Unlike [eagerness], this method doesn't throw if the JSON field has an unexpected type. + */ + @JsonProperty("eagerness") + @ExcludeMissing + fun _eagerness(): JsonField = eagerness + + /** + * Returns the raw JSON value of [interruptResponse]. * - * @throws OpenAIInvalidDataException if this class instance's value does not have the - * expected primitive type. + * Unlike [interruptResponse], this method doesn't throw if the JSON field has an unexpected + * type. */ - fun asString(): String = - _value().asString().orElseThrow { OpenAIInvalidDataException("Value is not a String") } + @JsonProperty("interrupt_response") + @ExcludeMissing + fun _interruptResponse(): JsonField = interruptResponse + + @JsonAnySetter + private fun putAdditionalProperty(key: String, value: JsonValue) { + additionalProperties.put(key, value) + } + + @JsonAnyGetter + @ExcludeMissing + fun _additionalProperties(): Map = + Collections.unmodifiableMap(additionalProperties) + + fun toBuilder() = Builder().from(this) + + companion object { + + /** Returns a mutable builder for constructing an instance of [SemanticVad]. */ + @JvmStatic fun builder() = Builder() + } + + /** A builder for [SemanticVad]. */ + class Builder internal constructor() { + + private var type: JsonValue = JsonValue.from("semantic_vad") + private var createResponse: JsonField = JsonMissing.of() + private var eagerness: JsonField = JsonMissing.of() + private var interruptResponse: JsonField = JsonMissing.of() + private var additionalProperties: MutableMap = mutableMapOf() + + @JvmSynthetic + internal fun from(semanticVad: SemanticVad) = apply { + type = semanticVad.type + createResponse = semanticVad.createResponse + eagerness = semanticVad.eagerness + interruptResponse = semanticVad.interruptResponse + additionalProperties = semanticVad.additionalProperties.toMutableMap() + } + + /** + * Sets the field to an arbitrary JSON value. + * + * It is usually unnecessary to call this method because the field defaults to the + * following: + * ```java + * JsonValue.from("semantic_vad") + * ``` + * + * This method is primarily for setting the field to an undocumented or not yet + * supported value. + */ + fun type(type: JsonValue) = apply { this.type = type } + + /** Whether or not to automatically generate a response when a VAD stop event occurs. */ + fun createResponse(createResponse: Boolean) = + createResponse(JsonField.of(createResponse)) + + /** + * Sets [Builder.createResponse] to an arbitrary JSON value. + * + * You should usually call [Builder.createResponse] with a well-typed [Boolean] value + * instead. This method is primarily for setting the field to an undocumented or not yet + * supported value. + */ + fun createResponse(createResponse: JsonField) = apply { + this.createResponse = createResponse + } + + /** + * Used only for `semantic_vad` mode. The eagerness of the model to respond. `low` will + * wait longer for the user to continue speaking, `high` will respond more quickly. + * `auto` is the default and is equivalent to `medium`. `low`, `medium`, and `high` have + * max timeouts of 8s, 4s, and 2s respectively. + */ + fun eagerness(eagerness: Eagerness) = eagerness(JsonField.of(eagerness)) + + /** + * Sets [Builder.eagerness] to an arbitrary JSON value. + * + * You should usually call [Builder.eagerness] with a well-typed [Eagerness] value + * instead. This method is primarily for setting the field to an undocumented or not yet + * supported value. + */ + fun eagerness(eagerness: JsonField) = apply { this.eagerness = eagerness } + + /** + * Whether or not to automatically interrupt any ongoing response with output to the + * default conversation (i.e. `conversation` of `auto`) when a VAD start event occurs. + */ + fun interruptResponse(interruptResponse: Boolean) = + interruptResponse(JsonField.of(interruptResponse)) + + /** + * Sets [Builder.interruptResponse] to an arbitrary JSON value. + * + * You should usually call [Builder.interruptResponse] with a well-typed [Boolean] value + * instead. This method is primarily for setting the field to an undocumented or not yet + * supported value. + */ + fun interruptResponse(interruptResponse: JsonField) = apply { + this.interruptResponse = interruptResponse + } + + fun additionalProperties(additionalProperties: Map) = apply { + this.additionalProperties.clear() + putAllAdditionalProperties(additionalProperties) + } + + fun putAdditionalProperty(key: String, value: JsonValue) = apply { + additionalProperties.put(key, value) + } + + fun putAllAdditionalProperties(additionalProperties: Map) = apply { + this.additionalProperties.putAll(additionalProperties) + } + + fun removeAdditionalProperty(key: String) = apply { additionalProperties.remove(key) } + + fun removeAllAdditionalProperties(keys: Set) = apply { + keys.forEach(::removeAdditionalProperty) + } + + /** + * Returns an immutable instance of [SemanticVad]. + * + * Further updates to this [Builder] will not mutate the returned instance. + */ + fun build(): SemanticVad = + SemanticVad( + type, + createResponse, + eagerness, + interruptResponse, + additionalProperties.toMutableMap(), + ) + } private var validated: Boolean = false - fun validate(): Type = apply { + fun validate(): SemanticVad = apply { if (validated) { return@apply } - known() + _type().let { + if (it != JsonValue.from("semantic_vad")) { + throw OpenAIInvalidDataException("'type' is invalid, received $it") + } + } + createResponse() + eagerness().ifPresent { it.validate() } + interruptResponse() validated = true } @@ -742,54 +1013,182 @@ private constructor( * * Used for best match union deserialization. */ - @JvmSynthetic internal fun validity(): Int = if (value() == Value._UNKNOWN) 0 else 1 + @JvmSynthetic + internal fun validity(): Int = + type.let { if (it == JsonValue.from("semantic_vad")) 1 else 0 } + + (if (createResponse.asKnown().isPresent) 1 else 0) + + (eagerness.asKnown().getOrNull()?.validity() ?: 0) + + (if (interruptResponse.asKnown().isPresent) 1 else 0) + + /** + * Used only for `semantic_vad` mode. The eagerness of the model to respond. `low` will wait + * longer for the user to continue speaking, `high` will respond more quickly. `auto` is the + * default and is equivalent to `medium`. `low`, `medium`, and `high` have max timeouts of + * 8s, 4s, and 2s respectively. + */ + class Eagerness @JsonCreator private constructor(private val value: JsonField) : + Enum { + + /** + * Returns this class instance's raw value. + * + * This is usually only useful if this instance was deserialized from data that doesn't + * match any known member, and you want to know that value. For example, if the SDK is + * on an older version than the API, then the API may respond with new members that the + * SDK is unaware of. + */ + @com.fasterxml.jackson.annotation.JsonValue fun _value(): JsonField = value + + companion object { + + @JvmField val LOW = of("low") + + @JvmField val MEDIUM = of("medium") + + @JvmField val HIGH = of("high") + + @JvmField val AUTO = of("auto") + + @JvmStatic fun of(value: String) = Eagerness(JsonField.of(value)) + } + + /** An enum containing [Eagerness]'s known values. */ + enum class Known { + LOW, + MEDIUM, + HIGH, + AUTO, + } + + /** + * An enum containing [Eagerness]'s known values, as well as an [_UNKNOWN] member. + * + * An instance of [Eagerness] can contain an unknown value in a couple of cases: + * - It was deserialized from data that doesn't match any known member. For example, if + * the SDK is on an older version than the API, then the API may respond with new + * members that the SDK is unaware of. + * - It was constructed with an arbitrary value using the [of] method. + */ + enum class Value { + LOW, + MEDIUM, + HIGH, + AUTO, + /** + * An enum member indicating that [Eagerness] was instantiated with an unknown + * value. + */ + _UNKNOWN, + } + + /** + * Returns an enum member corresponding to this class instance's value, or + * [Value._UNKNOWN] if the class was instantiated with an unknown value. + * + * Use the [known] method instead if you're certain the value is always known or if you + * want to throw for the unknown case. + */ + fun value(): Value = + when (this) { + LOW -> Value.LOW + MEDIUM -> Value.MEDIUM + HIGH -> Value.HIGH + AUTO -> Value.AUTO + else -> Value._UNKNOWN + } + + /** + * Returns an enum member corresponding to this class instance's value. + * + * Use the [value] method instead if you're uncertain the value is always known and + * don't want to throw for the unknown case. + * + * @throws OpenAIInvalidDataException if this class instance's value is a not a known + * member. + */ + fun known(): Known = + when (this) { + LOW -> Known.LOW + MEDIUM -> Known.MEDIUM + HIGH -> Known.HIGH + AUTO -> Known.AUTO + else -> throw OpenAIInvalidDataException("Unknown Eagerness: $value") + } + + /** + * Returns this class instance's primitive wire representation. + * + * This differs from the [toString] method because that method is primarily for + * debugging and generally doesn't throw. + * + * @throws OpenAIInvalidDataException if this class instance's value does not have the + * expected primitive type. + */ + fun asString(): String = + _value().asString().orElseThrow { + OpenAIInvalidDataException("Value is not a String") + } + + private var validated: Boolean = false + + fun validate(): Eagerness = apply { + if (validated) { + return@apply + } + + known() + validated = true + } + + fun isValid(): Boolean = + try { + validate() + true + } catch (e: OpenAIInvalidDataException) { + false + } + + /** + * Returns a score indicating how many valid values are contained in this object + * recursively. + * + * Used for best match union deserialization. + */ + @JvmSynthetic internal fun validity(): Int = if (value() == Value._UNKNOWN) 0 else 1 + + override fun equals(other: Any?): Boolean { + if (this === other) { + return true + } + + return other is Eagerness && value == other.value + } + + override fun hashCode() = value.hashCode() + + override fun toString() = value.toString() + } override fun equals(other: Any?): Boolean { if (this === other) { return true } - return other is Type && value == other.value + return other is SemanticVad && + type == other.type && + createResponse == other.createResponse && + eagerness == other.eagerness && + interruptResponse == other.interruptResponse && + additionalProperties == other.additionalProperties } - override fun hashCode() = value.hashCode() - - override fun toString() = value.toString() - } - - override fun equals(other: Any?): Boolean { - if (this === other) { - return true + private val hashCode: Int by lazy { + Objects.hash(type, createResponse, eagerness, interruptResponse, additionalProperties) } - return other is RealtimeTranscriptionSessionAudioInputTurnDetection && - createResponse == other.createResponse && - eagerness == other.eagerness && - idleTimeoutMs == other.idleTimeoutMs && - interruptResponse == other.interruptResponse && - prefixPaddingMs == other.prefixPaddingMs && - silenceDurationMs == other.silenceDurationMs && - threshold == other.threshold && - type == other.type && - additionalProperties == other.additionalProperties - } + override fun hashCode(): Int = hashCode - private val hashCode: Int by lazy { - Objects.hash( - createResponse, - eagerness, - idleTimeoutMs, - interruptResponse, - prefixPaddingMs, - silenceDurationMs, - threshold, - type, - additionalProperties, - ) + override fun toString() = + "SemanticVad{type=$type, createResponse=$createResponse, eagerness=$eagerness, interruptResponse=$interruptResponse, additionalProperties=$additionalProperties}" } - - override fun hashCode(): Int = hashCode - - override fun toString() = - "RealtimeTranscriptionSessionAudioInputTurnDetection{createResponse=$createResponse, eagerness=$eagerness, idleTimeoutMs=$idleTimeoutMs, interruptResponse=$interruptResponse, prefixPaddingMs=$prefixPaddingMs, silenceDurationMs=$silenceDurationMs, threshold=$threshold, type=$type, additionalProperties=$additionalProperties}" } diff --git a/openai-java-core/src/main/kotlin/com/openai/models/realtime/clientsecrets/RealtimeSessionCreateResponse.kt b/openai-java-core/src/main/kotlin/com/openai/models/realtime/clientsecrets/RealtimeSessionCreateResponse.kt index 721d6e94..4eed680c 100644 --- a/openai-java-core/src/main/kotlin/com/openai/models/realtime/clientsecrets/RealtimeSessionCreateResponse.kt +++ b/openai-java-core/src/main/kotlin/com/openai/models/realtime/clientsecrets/RealtimeSessionCreateResponse.kt @@ -1027,13 +1027,16 @@ private constructor( /** * Configuration for turn detection, ether Server VAD or Semantic VAD. This can be set * to `null` to turn off, in which case the client must manually trigger model response. + * * Server VAD means that the model will detect the start and end of speech based on - * audio volume and respond at the end of user speech. Semantic VAD is more advanced and - * uses a turn detection model (in conjunction with VAD) to semantically estimate - * whether the user has finished speaking, then dynamically sets a timeout based on this - * probability. For example, if user audio trails off with "uhhm", the model will score - * a low probability of turn end and wait longer for the user to continue speaking. This - * can be useful for more natural conversations, but may have a higher latency. + * audio volume and respond at the end of user speech. + * + * Semantic VAD is more advanced and uses a turn detection model (in conjunction with + * VAD) to semantically estimate whether the user has finished speaking, then + * dynamically sets a timeout based on this probability. For example, if user audio + * trails off with "uhhm", the model will score a low probability of turn end and wait + * longer for the user to continue speaking. This can be useful for more natural + * conversations, but may have a higher latency. * * @throws OpenAIInvalidDataException if the JSON field has an unexpected type (e.g. if * the server responded with an unexpected value). @@ -1192,17 +1195,24 @@ private constructor( /** * Configuration for turn detection, ether Server VAD or Semantic VAD. This can be * set to `null` to turn off, in which case the client must manually trigger model - * response. Server VAD means that the model will detect the start and end of speech - * based on audio volume and respond at the end of user speech. Semantic VAD is more - * advanced and uses a turn detection model (in conjunction with VAD) to - * semantically estimate whether the user has finished speaking, then dynamically - * sets a timeout based on this probability. For example, if user audio trails off - * with "uhhm", the model will score a low probability of turn end and wait longer - * for the user to continue speaking. This can be useful for more natural - * conversations, but may have a higher latency. + * response. + * + * Server VAD means that the model will detect the start and end of speech based on + * audio volume and respond at the end of user speech. + * + * Semantic VAD is more advanced and uses a turn detection model (in conjunction + * with VAD) to semantically estimate whether the user has finished speaking, then + * dynamically sets a timeout based on this probability. For example, if user audio + * trails off with "uhhm", the model will score a low probability of turn end and + * wait longer for the user to continue speaking. This can be useful for more + * natural conversations, but may have a higher latency. */ - fun turnDetection(turnDetection: TurnDetection) = - turnDetection(JsonField.of(turnDetection)) + fun turnDetection(turnDetection: TurnDetection?) = + turnDetection(JsonField.ofNullable(turnDetection)) + + /** Alias for calling [Builder.turnDetection] with `turnDetection.orElse(null)`. */ + fun turnDetection(turnDetection: Optional) = + turnDetection(turnDetection.getOrNull()) /** * Sets [Builder.turnDetection] to an arbitrary JSON value. @@ -1215,6 +1225,19 @@ private constructor( this.turnDetection = turnDetection } + /** + * Alias for calling [turnDetection] with `TurnDetection.ofServerVad(serverVad)`. + */ + fun turnDetection(serverVad: TurnDetection.ServerVad) = + turnDetection(TurnDetection.ofServerVad(serverVad)) + + /** + * Alias for calling [turnDetection] with + * `TurnDetection.ofSemanticVad(semanticVad)`. + */ + fun turnDetection(semanticVad: TurnDetection.SemanticVad) = + turnDetection(TurnDetection.ofSemanticVad(semanticVad)) + fun additionalProperties(additionalProperties: Map) = apply { this.additionalProperties.clear() putAllAdditionalProperties(additionalProperties) @@ -1455,624 +1478,669 @@ private constructor( /** * Configuration for turn detection, ether Server VAD or Semantic VAD. This can be set * to `null` to turn off, in which case the client must manually trigger model response. + * * Server VAD means that the model will detect the start and end of speech based on - * audio volume and respond at the end of user speech. Semantic VAD is more advanced and - * uses a turn detection model (in conjunction with VAD) to semantically estimate - * whether the user has finished speaking, then dynamically sets a timeout based on this - * probability. For example, if user audio trails off with "uhhm", the model will score - * a low probability of turn end and wait longer for the user to continue speaking. This - * can be useful for more natural conversations, but may have a higher latency. + * audio volume and respond at the end of user speech. + * + * Semantic VAD is more advanced and uses a turn detection model (in conjunction with + * VAD) to semantically estimate whether the user has finished speaking, then + * dynamically sets a timeout based on this probability. For example, if user audio + * trails off with "uhhm", the model will score a low probability of turn end and wait + * longer for the user to continue speaking. This can be useful for more natural + * conversations, but may have a higher latency. */ + @JsonDeserialize(using = TurnDetection.Deserializer::class) + @JsonSerialize(using = TurnDetection.Serializer::class) class TurnDetection private constructor( - private val createResponse: JsonField, - private val eagerness: JsonField, - private val idleTimeoutMs: JsonField, - private val interruptResponse: JsonField, - private val prefixPaddingMs: JsonField, - private val silenceDurationMs: JsonField, - private val threshold: JsonField, - private val type: JsonField, - private val additionalProperties: MutableMap, + private val serverVad: ServerVad? = null, + private val semanticVad: SemanticVad? = null, + private val _json: JsonValue? = null, ) { - @JsonCreator - private constructor( - @JsonProperty("create_response") - @ExcludeMissing - createResponse: JsonField = JsonMissing.of(), - @JsonProperty("eagerness") - @ExcludeMissing - eagerness: JsonField = JsonMissing.of(), - @JsonProperty("idle_timeout_ms") - @ExcludeMissing - idleTimeoutMs: JsonField = JsonMissing.of(), - @JsonProperty("interrupt_response") - @ExcludeMissing - interruptResponse: JsonField = JsonMissing.of(), - @JsonProperty("prefix_padding_ms") - @ExcludeMissing - prefixPaddingMs: JsonField = JsonMissing.of(), - @JsonProperty("silence_duration_ms") - @ExcludeMissing - silenceDurationMs: JsonField = JsonMissing.of(), - @JsonProperty("threshold") - @ExcludeMissing - threshold: JsonField = JsonMissing.of(), - @JsonProperty("type") @ExcludeMissing type: JsonField = JsonMissing.of(), - ) : this( - createResponse, - eagerness, - idleTimeoutMs, - interruptResponse, - prefixPaddingMs, - silenceDurationMs, - threshold, - type, - mutableMapOf(), - ) - /** - * Whether or not to automatically generate a response when a VAD stop event occurs. - * - * @throws OpenAIInvalidDataException if the JSON field has an unexpected type (e.g. - * if the server responded with an unexpected value). + * Server-side voice activity detection (VAD) which flips on when user speech is + * detected and off after a period of silence. */ - fun createResponse(): Optional = - createResponse.getOptional("create_response") + fun serverVad(): Optional = Optional.ofNullable(serverVad) /** - * Used only for `semantic_vad` mode. The eagerness of the model to respond. `low` - * will wait longer for the user to continue speaking, `high` will respond more - * quickly. `auto` is the default and is equivalent to `medium`. `low`, `medium`, - * and `high` have max timeouts of 8s, 4s, and 2s respectively. - * - * @throws OpenAIInvalidDataException if the JSON field has an unexpected type (e.g. - * if the server responded with an unexpected value). + * Server-side semantic turn detection which uses a model to determine when the user + * has finished speaking. */ - fun eagerness(): Optional = eagerness.getOptional("eagerness") + fun semanticVad(): Optional = Optional.ofNullable(semanticVad) - /** - * Optional idle timeout after which turn detection will auto-timeout when no - * additional audio is received and emits a `timeout_triggered` event. - * - * @throws OpenAIInvalidDataException if the JSON field has an unexpected type (e.g. - * if the server responded with an unexpected value). - */ - fun idleTimeoutMs(): Optional = idleTimeoutMs.getOptional("idle_timeout_ms") + fun isServerVad(): Boolean = serverVad != null - /** - * Whether or not to automatically interrupt any ongoing response with output to the - * default conversation (i.e. `conversation` of `auto`) when a VAD start event - * occurs. - * - * @throws OpenAIInvalidDataException if the JSON field has an unexpected type (e.g. - * if the server responded with an unexpected value). - */ - fun interruptResponse(): Optional = - interruptResponse.getOptional("interrupt_response") + fun isSemanticVad(): Boolean = semanticVad != null /** - * Used only for `server_vad` mode. Amount of audio to include before the VAD - * detected speech (in milliseconds). Defaults to 300ms. - * - * @throws OpenAIInvalidDataException if the JSON field has an unexpected type (e.g. - * if the server responded with an unexpected value). + * Server-side voice activity detection (VAD) which flips on when user speech is + * detected and off after a period of silence. */ - fun prefixPaddingMs(): Optional = - prefixPaddingMs.getOptional("prefix_padding_ms") + fun asServerVad(): ServerVad = serverVad.getOrThrow("serverVad") /** - * Used only for `server_vad` mode. Duration of silence to detect speech stop (in - * milliseconds). Defaults to 500ms. With shorter values the model will respond more - * quickly, but may jump in on short pauses from the user. - * - * @throws OpenAIInvalidDataException if the JSON field has an unexpected type (e.g. - * if the server responded with an unexpected value). + * Server-side semantic turn detection which uses a model to determine when the user + * has finished speaking. */ - fun silenceDurationMs(): Optional = - silenceDurationMs.getOptional("silence_duration_ms") + fun asSemanticVad(): SemanticVad = semanticVad.getOrThrow("semanticVad") - /** - * Used only for `server_vad` mode. Activation threshold for VAD (0.0 to 1.0), this - * defaults to 0.5. A higher threshold will require louder audio to activate the - * model, and thus might perform better in noisy environments. - * - * @throws OpenAIInvalidDataException if the JSON field has an unexpected type (e.g. - * if the server responded with an unexpected value). - */ - fun threshold(): Optional = threshold.getOptional("threshold") + fun _json(): Optional = Optional.ofNullable(_json) - /** - * Type of turn detection. - * - * @throws OpenAIInvalidDataException if the JSON field has an unexpected type (e.g. - * if the server responded with an unexpected value). - */ - fun type(): Optional = type.getOptional("type") + fun accept(visitor: Visitor): T = + when { + serverVad != null -> visitor.visitServerVad(serverVad) + semanticVad != null -> visitor.visitSemanticVad(semanticVad) + else -> visitor.unknown(_json) + } - /** - * Returns the raw JSON value of [createResponse]. - * - * Unlike [createResponse], this method doesn't throw if the JSON field has an - * unexpected type. - */ - @JsonProperty("create_response") - @ExcludeMissing - fun _createResponse(): JsonField = createResponse + private var validated: Boolean = false - /** - * Returns the raw JSON value of [eagerness]. - * - * Unlike [eagerness], this method doesn't throw if the JSON field has an unexpected - * type. - */ - @JsonProperty("eagerness") - @ExcludeMissing - fun _eagerness(): JsonField = eagerness + fun validate(): TurnDetection = apply { + if (validated) { + return@apply + } - /** - * Returns the raw JSON value of [idleTimeoutMs]. - * - * Unlike [idleTimeoutMs], this method doesn't throw if the JSON field has an - * unexpected type. - */ - @JsonProperty("idle_timeout_ms") - @ExcludeMissing - fun _idleTimeoutMs(): JsonField = idleTimeoutMs + accept( + object : Visitor { + override fun visitServerVad(serverVad: ServerVad) { + serverVad.validate() + } - /** - * Returns the raw JSON value of [interruptResponse]. - * - * Unlike [interruptResponse], this method doesn't throw if the JSON field has an - * unexpected type. - */ - @JsonProperty("interrupt_response") - @ExcludeMissing - fun _interruptResponse(): JsonField = interruptResponse + override fun visitSemanticVad(semanticVad: SemanticVad) { + semanticVad.validate() + } + } + ) + validated = true + } - /** - * Returns the raw JSON value of [prefixPaddingMs]. - * - * Unlike [prefixPaddingMs], this method doesn't throw if the JSON field has an - * unexpected type. - */ - @JsonProperty("prefix_padding_ms") - @ExcludeMissing - fun _prefixPaddingMs(): JsonField = prefixPaddingMs + fun isValid(): Boolean = + try { + validate() + true + } catch (e: OpenAIInvalidDataException) { + false + } /** - * Returns the raw JSON value of [silenceDurationMs]. + * Returns a score indicating how many valid values are contained in this object + * recursively. * - * Unlike [silenceDurationMs], this method doesn't throw if the JSON field has an - * unexpected type. + * Used for best match union deserialization. */ - @JsonProperty("silence_duration_ms") - @ExcludeMissing - fun _silenceDurationMs(): JsonField = silenceDurationMs + @JvmSynthetic + internal fun validity(): Int = + accept( + object : Visitor { + override fun visitServerVad(serverVad: ServerVad) = serverVad.validity() - /** - * Returns the raw JSON value of [threshold]. - * - * Unlike [threshold], this method doesn't throw if the JSON field has an unexpected - * type. - */ - @JsonProperty("threshold") - @ExcludeMissing - fun _threshold(): JsonField = threshold + override fun visitSemanticVad(semanticVad: SemanticVad) = + semanticVad.validity() - /** - * Returns the raw JSON value of [type]. - * - * Unlike [type], this method doesn't throw if the JSON field has an unexpected - * type. - */ - @JsonProperty("type") @ExcludeMissing fun _type(): JsonField = type + override fun unknown(json: JsonValue?) = 0 + } + ) - @JsonAnySetter - private fun putAdditionalProperty(key: String, value: JsonValue) { - additionalProperties.put(key, value) + override fun equals(other: Any?): Boolean { + if (this === other) { + return true + } + + return other is TurnDetection && + serverVad == other.serverVad && + semanticVad == other.semanticVad } - @JsonAnyGetter - @ExcludeMissing - fun _additionalProperties(): Map = - Collections.unmodifiableMap(additionalProperties) + override fun hashCode(): Int = Objects.hash(serverVad, semanticVad) - fun toBuilder() = Builder().from(this) + override fun toString(): String = + when { + serverVad != null -> "TurnDetection{serverVad=$serverVad}" + semanticVad != null -> "TurnDetection{semanticVad=$semanticVad}" + _json != null -> "TurnDetection{_unknown=$_json}" + else -> throw IllegalStateException("Invalid TurnDetection") + } companion object { /** - * Returns a mutable builder for constructing an instance of [TurnDetection]. + * Server-side voice activity detection (VAD) which flips on when user speech is + * detected and off after a period of silence. */ - @JvmStatic fun builder() = Builder() - } - - /** A builder for [TurnDetection]. */ - class Builder internal constructor() { - - private var createResponse: JsonField = JsonMissing.of() - private var eagerness: JsonField = JsonMissing.of() - private var idleTimeoutMs: JsonField = JsonMissing.of() - private var interruptResponse: JsonField = JsonMissing.of() - private var prefixPaddingMs: JsonField = JsonMissing.of() - private var silenceDurationMs: JsonField = JsonMissing.of() - private var threshold: JsonField = JsonMissing.of() - private var type: JsonField = JsonMissing.of() - private var additionalProperties: MutableMap = mutableMapOf() - - @JvmSynthetic - internal fun from(turnDetection: TurnDetection) = apply { - createResponse = turnDetection.createResponse - eagerness = turnDetection.eagerness - idleTimeoutMs = turnDetection.idleTimeoutMs - interruptResponse = turnDetection.interruptResponse - prefixPaddingMs = turnDetection.prefixPaddingMs - silenceDurationMs = turnDetection.silenceDurationMs - threshold = turnDetection.threshold - type = turnDetection.type - additionalProperties = turnDetection.additionalProperties.toMutableMap() - } + @JvmStatic + fun ofServerVad(serverVad: ServerVad) = TurnDetection(serverVad = serverVad) /** - * Whether or not to automatically generate a response when a VAD stop event - * occurs. + * Server-side semantic turn detection which uses a model to determine when the + * user has finished speaking. */ - fun createResponse(createResponse: Boolean) = - createResponse(JsonField.of(createResponse)) + @JvmStatic + fun ofSemanticVad(semanticVad: SemanticVad) = + TurnDetection(semanticVad = semanticVad) + } + + /** + * An interface that defines how to map each variant of [TurnDetection] to a value + * of type [T]. + */ + interface Visitor { /** - * Sets [Builder.createResponse] to an arbitrary JSON value. - * - * You should usually call [Builder.createResponse] with a well-typed [Boolean] - * value instead. This method is primarily for setting the field to an - * undocumented or not yet supported value. + * Server-side voice activity detection (VAD) which flips on when user speech is + * detected and off after a period of silence. */ - fun createResponse(createResponse: JsonField) = apply { - this.createResponse = createResponse - } + fun visitServerVad(serverVad: ServerVad): T /** - * Used only for `semantic_vad` mode. The eagerness of the model to respond. - * `low` will wait longer for the user to continue speaking, `high` will respond - * more quickly. `auto` is the default and is equivalent to `medium`. `low`, - * `medium`, and `high` have max timeouts of 8s, 4s, and 2s respectively. + * Server-side semantic turn detection which uses a model to determine when the + * user has finished speaking. */ - fun eagerness(eagerness: Eagerness) = eagerness(JsonField.of(eagerness)) + fun visitSemanticVad(semanticVad: SemanticVad): T /** - * Sets [Builder.eagerness] to an arbitrary JSON value. + * Maps an unknown variant of [TurnDetection] to a value of type [T]. * - * You should usually call [Builder.eagerness] with a well-typed [Eagerness] - * value instead. This method is primarily for setting the field to an - * undocumented or not yet supported value. + * An instance of [TurnDetection] can contain an unknown variant if it was + * deserialized from data that doesn't match any known variant. For example, if + * the SDK is on an older version than the API, then the API may respond with + * new variants that the SDK is unaware of. + * + * @throws OpenAIInvalidDataException in the default implementation. */ - fun eagerness(eagerness: JsonField) = apply { - this.eagerness = eagerness + fun unknown(json: JsonValue?): T { + throw OpenAIInvalidDataException("Unknown TurnDetection: $json") } + } - /** - * Optional idle timeout after which turn detection will auto-timeout when no - * additional audio is received and emits a `timeout_triggered` event. - */ - fun idleTimeoutMs(idleTimeoutMs: Long?) = - idleTimeoutMs(JsonField.ofNullable(idleTimeoutMs)) + internal class Deserializer : + BaseDeserializer(TurnDetection::class) { + + override fun ObjectCodec.deserialize(node: JsonNode): TurnDetection { + val json = JsonValue.fromJsonNode(node) + val type = json.asObject().getOrNull()?.get("type")?.asString()?.getOrNull() + + when (type) { + "server_vad" -> { + return tryDeserialize(node, jacksonTypeRef())?.let { + TurnDetection(serverVad = it, _json = json) + } ?: TurnDetection(_json = json) + } + "semantic_vad" -> { + return tryDeserialize(node, jacksonTypeRef())?.let { + TurnDetection(semanticVad = it, _json = json) + } ?: TurnDetection(_json = json) + } + } + + return TurnDetection(_json = json) + } + } + + internal class Serializer : BaseSerializer(TurnDetection::class) { + + override fun serialize( + value: TurnDetection, + generator: JsonGenerator, + provider: SerializerProvider, + ) { + when { + value.serverVad != null -> generator.writeObject(value.serverVad) + value.semanticVad != null -> generator.writeObject(value.semanticVad) + value._json != null -> generator.writeObject(value._json) + else -> throw IllegalStateException("Invalid TurnDetection") + } + } + } + + /** + * Server-side voice activity detection (VAD) which flips on when user speech is + * detected and off after a period of silence. + */ + class ServerVad + private constructor( + private val type: JsonValue, + private val createResponse: JsonField, + private val idleTimeoutMs: JsonField, + private val interruptResponse: JsonField, + private val prefixPaddingMs: JsonField, + private val silenceDurationMs: JsonField, + private val threshold: JsonField, + private val additionalProperties: MutableMap, + ) { + + @JsonCreator + private constructor( + @JsonProperty("type") @ExcludeMissing type: JsonValue = JsonMissing.of(), + @JsonProperty("create_response") + @ExcludeMissing + createResponse: JsonField = JsonMissing.of(), + @JsonProperty("idle_timeout_ms") + @ExcludeMissing + idleTimeoutMs: JsonField = JsonMissing.of(), + @JsonProperty("interrupt_response") + @ExcludeMissing + interruptResponse: JsonField = JsonMissing.of(), + @JsonProperty("prefix_padding_ms") + @ExcludeMissing + prefixPaddingMs: JsonField = JsonMissing.of(), + @JsonProperty("silence_duration_ms") + @ExcludeMissing + silenceDurationMs: JsonField = JsonMissing.of(), + @JsonProperty("threshold") + @ExcludeMissing + threshold: JsonField = JsonMissing.of(), + ) : this( + type, + createResponse, + idleTimeoutMs, + interruptResponse, + prefixPaddingMs, + silenceDurationMs, + threshold, + mutableMapOf(), + ) /** - * Alias for [Builder.idleTimeoutMs]. + * Type of turn detection, `server_vad` to turn on simple Server VAD. + * + * Expected to always return the following: + * ```java + * JsonValue.from("server_vad") + * ``` * - * This unboxed primitive overload exists for backwards compatibility. + * However, this method can be useful for debugging and logging (e.g. if the + * server responded with an unexpected value). */ - fun idleTimeoutMs(idleTimeoutMs: Long) = idleTimeoutMs(idleTimeoutMs as Long?) + @JsonProperty("type") @ExcludeMissing fun _type(): JsonValue = type /** - * Alias for calling [Builder.idleTimeoutMs] with `idleTimeoutMs.orElse(null)`. + * Whether or not to automatically generate a response when a VAD stop event + * occurs. + * + * @throws OpenAIInvalidDataException if the JSON field has an unexpected type + * (e.g. if the server responded with an unexpected value). */ - fun idleTimeoutMs(idleTimeoutMs: Optional) = - idleTimeoutMs(idleTimeoutMs.getOrNull()) + fun createResponse(): Optional = + createResponse.getOptional("create_response") /** - * Sets [Builder.idleTimeoutMs] to an arbitrary JSON value. + * Optional timeout after which a model response will be triggered + * automatically. This is useful for situations in which a long pause from the + * user is unexpected, such as a phone call. The model will effectively prompt + * the user to continue the conversation based on the current context. * - * You should usually call [Builder.idleTimeoutMs] with a well-typed [Long] - * value instead. This method is primarily for setting the field to an - * undocumented or not yet supported value. + * The timeout value will be applied after the last model response's audio has + * finished playing, i.e. it's set to the `response.done` time plus audio + * playback duration. + * + * An `input_audio_buffer.timeout_triggered` event (plus events associated with + * the Response) will be emitted when the timeout is reached. Idle timeout is + * currently only supported for `server_vad` mode. + * + * @throws OpenAIInvalidDataException if the JSON field has an unexpected type + * (e.g. if the server responded with an unexpected value). */ - fun idleTimeoutMs(idleTimeoutMs: JsonField) = apply { - this.idleTimeoutMs = idleTimeoutMs - } + fun idleTimeoutMs(): Optional = + idleTimeoutMs.getOptional("idle_timeout_ms") /** * Whether or not to automatically interrupt any ongoing response with output to * the default conversation (i.e. `conversation` of `auto`) when a VAD start * event occurs. - */ - fun interruptResponse(interruptResponse: Boolean) = - interruptResponse(JsonField.of(interruptResponse)) - - /** - * Sets [Builder.interruptResponse] to an arbitrary JSON value. * - * You should usually call [Builder.interruptResponse] with a well-typed - * [Boolean] value instead. This method is primarily for setting the field to an - * undocumented or not yet supported value. + * @throws OpenAIInvalidDataException if the JSON field has an unexpected type + * (e.g. if the server responded with an unexpected value). */ - fun interruptResponse(interruptResponse: JsonField) = apply { - this.interruptResponse = interruptResponse - } + fun interruptResponse(): Optional = + interruptResponse.getOptional("interrupt_response") /** * Used only for `server_vad` mode. Amount of audio to include before the VAD * detected speech (in milliseconds). Defaults to 300ms. + * + * @throws OpenAIInvalidDataException if the JSON field has an unexpected type + * (e.g. if the server responded with an unexpected value). */ - fun prefixPaddingMs(prefixPaddingMs: Long) = - prefixPaddingMs(JsonField.of(prefixPaddingMs)) + fun prefixPaddingMs(): Optional = + prefixPaddingMs.getOptional("prefix_padding_ms") /** - * Sets [Builder.prefixPaddingMs] to an arbitrary JSON value. + * Used only for `server_vad` mode. Duration of silence to detect speech stop + * (in milliseconds). Defaults to 500ms. With shorter values the model will + * respond more quickly, but may jump in on short pauses from the user. * - * You should usually call [Builder.prefixPaddingMs] with a well-typed [Long] - * value instead. This method is primarily for setting the field to an - * undocumented or not yet supported value. + * @throws OpenAIInvalidDataException if the JSON field has an unexpected type + * (e.g. if the server responded with an unexpected value). */ - fun prefixPaddingMs(prefixPaddingMs: JsonField) = apply { - this.prefixPaddingMs = prefixPaddingMs - } + fun silenceDurationMs(): Optional = + silenceDurationMs.getOptional("silence_duration_ms") /** - * Used only for `server_vad` mode. Duration of silence to detect speech stop - * (in milliseconds). Defaults to 500ms. With shorter values the model will - * respond more quickly, but may jump in on short pauses from the user. + * Used only for `server_vad` mode. Activation threshold for VAD (0.0 to 1.0), + * this defaults to 0.5. A higher threshold will require louder audio to + * activate the model, and thus might perform better in noisy environments. + * + * @throws OpenAIInvalidDataException if the JSON field has an unexpected type + * (e.g. if the server responded with an unexpected value). */ - fun silenceDurationMs(silenceDurationMs: Long) = - silenceDurationMs(JsonField.of(silenceDurationMs)) + fun threshold(): Optional = threshold.getOptional("threshold") /** - * Sets [Builder.silenceDurationMs] to an arbitrary JSON value. + * Returns the raw JSON value of [createResponse]. * - * You should usually call [Builder.silenceDurationMs] with a well-typed [Long] - * value instead. This method is primarily for setting the field to an - * undocumented or not yet supported value. + * Unlike [createResponse], this method doesn't throw if the JSON field has an + * unexpected type. */ - fun silenceDurationMs(silenceDurationMs: JsonField) = apply { - this.silenceDurationMs = silenceDurationMs - } + @JsonProperty("create_response") + @ExcludeMissing + fun _createResponse(): JsonField = createResponse /** - * Used only for `server_vad` mode. Activation threshold for VAD (0.0 to 1.0), - * this defaults to 0.5. A higher threshold will require louder audio to - * activate the model, and thus might perform better in noisy environments. + * Returns the raw JSON value of [idleTimeoutMs]. + * + * Unlike [idleTimeoutMs], this method doesn't throw if the JSON field has an + * unexpected type. */ - fun threshold(threshold: Double) = threshold(JsonField.of(threshold)) + @JsonProperty("idle_timeout_ms") + @ExcludeMissing + fun _idleTimeoutMs(): JsonField = idleTimeoutMs /** - * Sets [Builder.threshold] to an arbitrary JSON value. + * Returns the raw JSON value of [interruptResponse]. * - * You should usually call [Builder.threshold] with a well-typed [Double] value - * instead. This method is primarily for setting the field to an undocumented or - * not yet supported value. + * Unlike [interruptResponse], this method doesn't throw if the JSON field has + * an unexpected type. */ - fun threshold(threshold: JsonField) = apply { - this.threshold = threshold - } + @JsonProperty("interrupt_response") + @ExcludeMissing + fun _interruptResponse(): JsonField = interruptResponse - /** Type of turn detection. */ - fun type(type: Type) = type(JsonField.of(type)) + /** + * Returns the raw JSON value of [prefixPaddingMs]. + * + * Unlike [prefixPaddingMs], this method doesn't throw if the JSON field has an + * unexpected type. + */ + @JsonProperty("prefix_padding_ms") + @ExcludeMissing + fun _prefixPaddingMs(): JsonField = prefixPaddingMs /** - * Sets [Builder.type] to an arbitrary JSON value. + * Returns the raw JSON value of [silenceDurationMs]. * - * You should usually call [Builder.type] with a well-typed [Type] value - * instead. This method is primarily for setting the field to an undocumented or - * not yet supported value. + * Unlike [silenceDurationMs], this method doesn't throw if the JSON field has + * an unexpected type. */ - fun type(type: JsonField) = apply { this.type = type } + @JsonProperty("silence_duration_ms") + @ExcludeMissing + fun _silenceDurationMs(): JsonField = silenceDurationMs - fun additionalProperties(additionalProperties: Map) = apply { - this.additionalProperties.clear() - putAllAdditionalProperties(additionalProperties) - } + /** + * Returns the raw JSON value of [threshold]. + * + * Unlike [threshold], this method doesn't throw if the JSON field has an + * unexpected type. + */ + @JsonProperty("threshold") + @ExcludeMissing + fun _threshold(): JsonField = threshold - fun putAdditionalProperty(key: String, value: JsonValue) = apply { + @JsonAnySetter + private fun putAdditionalProperty(key: String, value: JsonValue) { additionalProperties.put(key, value) } - fun putAllAdditionalProperties(additionalProperties: Map) = - apply { - this.additionalProperties.putAll(additionalProperties) - } + @JsonAnyGetter + @ExcludeMissing + fun _additionalProperties(): Map = + Collections.unmodifiableMap(additionalProperties) - fun removeAdditionalProperty(key: String) = apply { - additionalProperties.remove(key) - } + fun toBuilder() = Builder().from(this) - fun removeAllAdditionalProperties(keys: Set) = apply { - keys.forEach(::removeAdditionalProperty) + companion object { + + /** + * Returns a mutable builder for constructing an instance of [ServerVad]. + */ + @JvmStatic fun builder() = Builder() } - /** - * Returns an immutable instance of [TurnDetection]. - * - * Further updates to this [Builder] will not mutate the returned instance. - */ - fun build(): TurnDetection = - TurnDetection( - createResponse, - eagerness, - idleTimeoutMs, - interruptResponse, - prefixPaddingMs, - silenceDurationMs, - threshold, - type, - additionalProperties.toMutableMap(), - ) - } + /** A builder for [ServerVad]. */ + class Builder internal constructor() { - private var validated: Boolean = false + private var type: JsonValue = JsonValue.from("server_vad") + private var createResponse: JsonField = JsonMissing.of() + private var idleTimeoutMs: JsonField = JsonMissing.of() + private var interruptResponse: JsonField = JsonMissing.of() + private var prefixPaddingMs: JsonField = JsonMissing.of() + private var silenceDurationMs: JsonField = JsonMissing.of() + private var threshold: JsonField = JsonMissing.of() + private var additionalProperties: MutableMap = + mutableMapOf() - fun validate(): TurnDetection = apply { - if (validated) { - return@apply - } + @JvmSynthetic + internal fun from(serverVad: ServerVad) = apply { + type = serverVad.type + createResponse = serverVad.createResponse + idleTimeoutMs = serverVad.idleTimeoutMs + interruptResponse = serverVad.interruptResponse + prefixPaddingMs = serverVad.prefixPaddingMs + silenceDurationMs = serverVad.silenceDurationMs + threshold = serverVad.threshold + additionalProperties = serverVad.additionalProperties.toMutableMap() + } - createResponse() - eagerness().ifPresent { it.validate() } - idleTimeoutMs() - interruptResponse() - prefixPaddingMs() - silenceDurationMs() - threshold() - type().ifPresent { it.validate() } - validated = true - } + /** + * Sets the field to an arbitrary JSON value. + * + * It is usually unnecessary to call this method because the field defaults + * to the following: + * ```java + * JsonValue.from("server_vad") + * ``` + * + * This method is primarily for setting the field to an undocumented or not + * yet supported value. + */ + fun type(type: JsonValue) = apply { this.type = type } - fun isValid(): Boolean = - try { - validate() - true - } catch (e: OpenAIInvalidDataException) { - false - } + /** + * Whether or not to automatically generate a response when a VAD stop event + * occurs. + */ + fun createResponse(createResponse: Boolean) = + createResponse(JsonField.of(createResponse)) - /** - * Returns a score indicating how many valid values are contained in this object - * recursively. - * - * Used for best match union deserialization. - */ - @JvmSynthetic - internal fun validity(): Int = - (if (createResponse.asKnown().isPresent) 1 else 0) + - (eagerness.asKnown().getOrNull()?.validity() ?: 0) + - (if (idleTimeoutMs.asKnown().isPresent) 1 else 0) + - (if (interruptResponse.asKnown().isPresent) 1 else 0) + - (if (prefixPaddingMs.asKnown().isPresent) 1 else 0) + - (if (silenceDurationMs.asKnown().isPresent) 1 else 0) + - (if (threshold.asKnown().isPresent) 1 else 0) + - (type.asKnown().getOrNull()?.validity() ?: 0) + /** + * Sets [Builder.createResponse] to an arbitrary JSON value. + * + * You should usually call [Builder.createResponse] with a well-typed + * [Boolean] value instead. This method is primarily for setting the field + * to an undocumented or not yet supported value. + */ + fun createResponse(createResponse: JsonField) = apply { + this.createResponse = createResponse + } - /** - * Used only for `semantic_vad` mode. The eagerness of the model to respond. `low` - * will wait longer for the user to continue speaking, `high` will respond more - * quickly. `auto` is the default and is equivalent to `medium`. `low`, `medium`, - * and `high` have max timeouts of 8s, 4s, and 2s respectively. - */ - class Eagerness - @JsonCreator - private constructor(private val value: JsonField) : Enum { + /** + * Optional timeout after which a model response will be triggered + * automatically. This is useful for situations in which a long pause from + * the user is unexpected, such as a phone call. The model will effectively + * prompt the user to continue the conversation based on the current + * context. + * + * The timeout value will be applied after the last model response's audio + * has finished playing, i.e. it's set to the `response.done` time plus + * audio playback duration. + * + * An `input_audio_buffer.timeout_triggered` event (plus events associated + * with the Response) will be emitted when the timeout is reached. Idle + * timeout is currently only supported for `server_vad` mode. + */ + fun idleTimeoutMs(idleTimeoutMs: Long?) = + idleTimeoutMs(JsonField.ofNullable(idleTimeoutMs)) - /** - * Returns this class instance's raw value. - * - * This is usually only useful if this instance was deserialized from data that - * doesn't match any known member, and you want to know that value. For example, - * if the SDK is on an older version than the API, then the API may respond with - * new members that the SDK is unaware of. - */ - @com.fasterxml.jackson.annotation.JsonValue - fun _value(): JsonField = value + /** + * Alias for [Builder.idleTimeoutMs]. + * + * This unboxed primitive overload exists for backwards compatibility. + */ + fun idleTimeoutMs(idleTimeoutMs: Long) = + idleTimeoutMs(idleTimeoutMs as Long?) - companion object { + /** + * Alias for calling [Builder.idleTimeoutMs] with + * `idleTimeoutMs.orElse(null)`. + */ + fun idleTimeoutMs(idleTimeoutMs: Optional) = + idleTimeoutMs(idleTimeoutMs.getOrNull()) - @JvmField val LOW = of("low") + /** + * Sets [Builder.idleTimeoutMs] to an arbitrary JSON value. + * + * You should usually call [Builder.idleTimeoutMs] with a well-typed [Long] + * value instead. This method is primarily for setting the field to an + * undocumented or not yet supported value. + */ + fun idleTimeoutMs(idleTimeoutMs: JsonField) = apply { + this.idleTimeoutMs = idleTimeoutMs + } - @JvmField val MEDIUM = of("medium") + /** + * Whether or not to automatically interrupt any ongoing response with + * output to the default conversation (i.e. `conversation` of `auto`) when a + * VAD start event occurs. + */ + fun interruptResponse(interruptResponse: Boolean) = + interruptResponse(JsonField.of(interruptResponse)) - @JvmField val HIGH = of("high") + /** + * Sets [Builder.interruptResponse] to an arbitrary JSON value. + * + * You should usually call [Builder.interruptResponse] with a well-typed + * [Boolean] value instead. This method is primarily for setting the field + * to an undocumented or not yet supported value. + */ + fun interruptResponse(interruptResponse: JsonField) = apply { + this.interruptResponse = interruptResponse + } - @JvmField val AUTO = of("auto") + /** + * Used only for `server_vad` mode. Amount of audio to include before the + * VAD detected speech (in milliseconds). Defaults to 300ms. + */ + fun prefixPaddingMs(prefixPaddingMs: Long) = + prefixPaddingMs(JsonField.of(prefixPaddingMs)) - @JvmStatic fun of(value: String) = Eagerness(JsonField.of(value)) - } + /** + * Sets [Builder.prefixPaddingMs] to an arbitrary JSON value. + * + * You should usually call [Builder.prefixPaddingMs] with a well-typed + * [Long] value instead. This method is primarily for setting the field to + * an undocumented or not yet supported value. + */ + fun prefixPaddingMs(prefixPaddingMs: JsonField) = apply { + this.prefixPaddingMs = prefixPaddingMs + } - /** An enum containing [Eagerness]'s known values. */ - enum class Known { - LOW, - MEDIUM, - HIGH, - AUTO, - } + /** + * Used only for `server_vad` mode. Duration of silence to detect speech + * stop (in milliseconds). Defaults to 500ms. With shorter values the model + * will respond more quickly, but may jump in on short pauses from the user. + */ + fun silenceDurationMs(silenceDurationMs: Long) = + silenceDurationMs(JsonField.of(silenceDurationMs)) - /** - * An enum containing [Eagerness]'s known values, as well as an [_UNKNOWN] - * member. - * - * An instance of [Eagerness] can contain an unknown value in a couple of cases: - * - It was deserialized from data that doesn't match any known member. For - * example, if the SDK is on an older version than the API, then the API may - * respond with new members that the SDK is unaware of. - * - It was constructed with an arbitrary value using the [of] method. - */ - enum class Value { - LOW, - MEDIUM, - HIGH, - AUTO, /** - * An enum member indicating that [Eagerness] was instantiated with an - * unknown value. + * Sets [Builder.silenceDurationMs] to an arbitrary JSON value. + * + * You should usually call [Builder.silenceDurationMs] with a well-typed + * [Long] value instead. This method is primarily for setting the field to + * an undocumented or not yet supported value. */ - _UNKNOWN, - } + fun silenceDurationMs(silenceDurationMs: JsonField) = apply { + this.silenceDurationMs = silenceDurationMs + } - /** - * Returns an enum member corresponding to this class instance's value, or - * [Value._UNKNOWN] if the class was instantiated with an unknown value. - * - * Use the [known] method instead if you're certain the value is always known or - * if you want to throw for the unknown case. - */ - fun value(): Value = - when (this) { - LOW -> Value.LOW - MEDIUM -> Value.MEDIUM - HIGH -> Value.HIGH - AUTO -> Value.AUTO - else -> Value._UNKNOWN + /** + * Used only for `server_vad` mode. Activation threshold for VAD (0.0 to + * 1.0), this defaults to 0.5. A higher threshold will require louder audio + * to activate the model, and thus might perform better in noisy + * environments. + */ + fun threshold(threshold: Double) = threshold(JsonField.of(threshold)) + + /** + * Sets [Builder.threshold] to an arbitrary JSON value. + * + * You should usually call [Builder.threshold] with a well-typed [Double] + * value instead. This method is primarily for setting the field to an + * undocumented or not yet supported value. + */ + fun threshold(threshold: JsonField) = apply { + this.threshold = threshold } - /** - * Returns an enum member corresponding to this class instance's value. - * - * Use the [value] method instead if you're uncertain the value is always known - * and don't want to throw for the unknown case. - * - * @throws OpenAIInvalidDataException if this class instance's value is a not a - * known member. - */ - fun known(): Known = - when (this) { - LOW -> Known.LOW - MEDIUM -> Known.MEDIUM - HIGH -> Known.HIGH - AUTO -> Known.AUTO - else -> throw OpenAIInvalidDataException("Unknown Eagerness: $value") + fun additionalProperties(additionalProperties: Map) = + apply { + this.additionalProperties.clear() + putAllAdditionalProperties(additionalProperties) + } + + fun putAdditionalProperty(key: String, value: JsonValue) = apply { + additionalProperties.put(key, value) } - /** - * Returns this class instance's primitive wire representation. - * - * This differs from the [toString] method because that method is primarily for - * debugging and generally doesn't throw. - * - * @throws OpenAIInvalidDataException if this class instance's value does not - * have the expected primitive type. - */ - fun asString(): String = - _value().asString().orElseThrow { - OpenAIInvalidDataException("Value is not a String") + fun putAllAdditionalProperties( + additionalProperties: Map + ) = apply { this.additionalProperties.putAll(additionalProperties) } + + fun removeAdditionalProperty(key: String) = apply { + additionalProperties.remove(key) } + fun removeAllAdditionalProperties(keys: Set) = apply { + keys.forEach(::removeAdditionalProperty) + } + + /** + * Returns an immutable instance of [ServerVad]. + * + * Further updates to this [Builder] will not mutate the returned instance. + */ + fun build(): ServerVad = + ServerVad( + type, + createResponse, + idleTimeoutMs, + interruptResponse, + prefixPaddingMs, + silenceDurationMs, + threshold, + additionalProperties.toMutableMap(), + ) + } + private var validated: Boolean = false - fun validate(): Eagerness = apply { + fun validate(): ServerVad = apply { if (validated) { return@apply } - known() + _type().let { + if (it != JsonValue.from("server_vad")) { + throw OpenAIInvalidDataException("'type' is invalid, received $it") + } + } + createResponse() + idleTimeoutMs() + interruptResponse() + prefixPaddingMs() + silenceDurationMs() + threshold() validated = true } @@ -2091,122 +2159,314 @@ private constructor( * Used for best match union deserialization. */ @JvmSynthetic - internal fun validity(): Int = if (value() == Value._UNKNOWN) 0 else 1 + internal fun validity(): Int = + type.let { if (it == JsonValue.from("server_vad")) 1 else 0 } + + (if (createResponse.asKnown().isPresent) 1 else 0) + + (if (idleTimeoutMs.asKnown().isPresent) 1 else 0) + + (if (interruptResponse.asKnown().isPresent) 1 else 0) + + (if (prefixPaddingMs.asKnown().isPresent) 1 else 0) + + (if (silenceDurationMs.asKnown().isPresent) 1 else 0) + + (if (threshold.asKnown().isPresent) 1 else 0) override fun equals(other: Any?): Boolean { if (this === other) { return true } - return other is Eagerness && value == other.value + return other is ServerVad && + type == other.type && + createResponse == other.createResponse && + idleTimeoutMs == other.idleTimeoutMs && + interruptResponse == other.interruptResponse && + prefixPaddingMs == other.prefixPaddingMs && + silenceDurationMs == other.silenceDurationMs && + threshold == other.threshold && + additionalProperties == other.additionalProperties } - override fun hashCode() = value.hashCode() + private val hashCode: Int by lazy { + Objects.hash( + type, + createResponse, + idleTimeoutMs, + interruptResponse, + prefixPaddingMs, + silenceDurationMs, + threshold, + additionalProperties, + ) + } - override fun toString() = value.toString() + override fun hashCode(): Int = hashCode + + override fun toString() = + "ServerVad{type=$type, createResponse=$createResponse, idleTimeoutMs=$idleTimeoutMs, interruptResponse=$interruptResponse, prefixPaddingMs=$prefixPaddingMs, silenceDurationMs=$silenceDurationMs, threshold=$threshold, additionalProperties=$additionalProperties}" } - /** Type of turn detection. */ - class Type @JsonCreator private constructor(private val value: JsonField) : - Enum { + /** + * Server-side semantic turn detection which uses a model to determine when the user + * has finished speaking. + */ + class SemanticVad + private constructor( + private val type: JsonValue, + private val createResponse: JsonField, + private val eagerness: JsonField, + private val interruptResponse: JsonField, + private val additionalProperties: MutableMap, + ) { + + @JsonCreator + private constructor( + @JsonProperty("type") @ExcludeMissing type: JsonValue = JsonMissing.of(), + @JsonProperty("create_response") + @ExcludeMissing + createResponse: JsonField = JsonMissing.of(), + @JsonProperty("eagerness") + @ExcludeMissing + eagerness: JsonField = JsonMissing.of(), + @JsonProperty("interrupt_response") + @ExcludeMissing + interruptResponse: JsonField = JsonMissing.of(), + ) : this(type, createResponse, eagerness, interruptResponse, mutableMapOf()) /** - * Returns this class instance's raw value. + * Type of turn detection, `semantic_vad` to turn on Semantic VAD. * - * This is usually only useful if this instance was deserialized from data that - * doesn't match any known member, and you want to know that value. For example, - * if the SDK is on an older version than the API, then the API may respond with - * new members that the SDK is unaware of. + * Expected to always return the following: + * ```java + * JsonValue.from("semantic_vad") + * ``` + * + * However, this method can be useful for debugging and logging (e.g. if the + * server responded with an unexpected value). */ - @com.fasterxml.jackson.annotation.JsonValue - fun _value(): JsonField = value - - companion object { - - @JvmField val SERVER_VAD = of("server_vad") - - @JvmField val SEMANTIC_VAD = of("semantic_vad") - - @JvmStatic fun of(value: String) = Type(JsonField.of(value)) - } - - /** An enum containing [Type]'s known values. */ - enum class Known { - SERVER_VAD, - SEMANTIC_VAD, - } + @JsonProperty("type") @ExcludeMissing fun _type(): JsonValue = type /** - * An enum containing [Type]'s known values, as well as an [_UNKNOWN] member. + * Whether or not to automatically generate a response when a VAD stop event + * occurs. * - * An instance of [Type] can contain an unknown value in a couple of cases: - * - It was deserialized from data that doesn't match any known member. For - * example, if the SDK is on an older version than the API, then the API may - * respond with new members that the SDK is unaware of. - * - It was constructed with an arbitrary value using the [of] method. + * @throws OpenAIInvalidDataException if the JSON field has an unexpected type + * (e.g. if the server responded with an unexpected value). */ - enum class Value { - SERVER_VAD, - SEMANTIC_VAD, - /** - * An enum member indicating that [Type] was instantiated with an unknown - * value. - */ - _UNKNOWN, - } + fun createResponse(): Optional = + createResponse.getOptional("create_response") /** - * Returns an enum member corresponding to this class instance's value, or - * [Value._UNKNOWN] if the class was instantiated with an unknown value. + * Used only for `semantic_vad` mode. The eagerness of the model to respond. + * `low` will wait longer for the user to continue speaking, `high` will respond + * more quickly. `auto` is the default and is equivalent to `medium`. `low`, + * `medium`, and `high` have max timeouts of 8s, 4s, and 2s respectively. * - * Use the [known] method instead if you're certain the value is always known or - * if you want to throw for the unknown case. + * @throws OpenAIInvalidDataException if the JSON field has an unexpected type + * (e.g. if the server responded with an unexpected value). */ - fun value(): Value = - when (this) { - SERVER_VAD -> Value.SERVER_VAD - SEMANTIC_VAD -> Value.SEMANTIC_VAD - else -> Value._UNKNOWN - } + fun eagerness(): Optional = eagerness.getOptional("eagerness") /** - * Returns an enum member corresponding to this class instance's value. + * Whether or not to automatically interrupt any ongoing response with output to + * the default conversation (i.e. `conversation` of `auto`) when a VAD start + * event occurs. * - * Use the [value] method instead if you're uncertain the value is always known - * and don't want to throw for the unknown case. + * @throws OpenAIInvalidDataException if the JSON field has an unexpected type + * (e.g. if the server responded with an unexpected value). + */ + fun interruptResponse(): Optional = + interruptResponse.getOptional("interrupt_response") + + /** + * Returns the raw JSON value of [createResponse]. * - * @throws OpenAIInvalidDataException if this class instance's value is a not a - * known member. + * Unlike [createResponse], this method doesn't throw if the JSON field has an + * unexpected type. */ - fun known(): Known = - when (this) { - SERVER_VAD -> Known.SERVER_VAD - SEMANTIC_VAD -> Known.SEMANTIC_VAD - else -> throw OpenAIInvalidDataException("Unknown Type: $value") - } + @JsonProperty("create_response") + @ExcludeMissing + fun _createResponse(): JsonField = createResponse /** - * Returns this class instance's primitive wire representation. + * Returns the raw JSON value of [eagerness]. * - * This differs from the [toString] method because that method is primarily for - * debugging and generally doesn't throw. + * Unlike [eagerness], this method doesn't throw if the JSON field has an + * unexpected type. + */ + @JsonProperty("eagerness") + @ExcludeMissing + fun _eagerness(): JsonField = eagerness + + /** + * Returns the raw JSON value of [interruptResponse]. * - * @throws OpenAIInvalidDataException if this class instance's value does not - * have the expected primitive type. + * Unlike [interruptResponse], this method doesn't throw if the JSON field has + * an unexpected type. */ - fun asString(): String = - _value().asString().orElseThrow { - OpenAIInvalidDataException("Value is not a String") + @JsonProperty("interrupt_response") + @ExcludeMissing + fun _interruptResponse(): JsonField = interruptResponse + + @JsonAnySetter + private fun putAdditionalProperty(key: String, value: JsonValue) { + additionalProperties.put(key, value) + } + + @JsonAnyGetter + @ExcludeMissing + fun _additionalProperties(): Map = + Collections.unmodifiableMap(additionalProperties) + + fun toBuilder() = Builder().from(this) + + companion object { + + /** + * Returns a mutable builder for constructing an instance of [SemanticVad]. + */ + @JvmStatic fun builder() = Builder() + } + + /** A builder for [SemanticVad]. */ + class Builder internal constructor() { + + private var type: JsonValue = JsonValue.from("semantic_vad") + private var createResponse: JsonField = JsonMissing.of() + private var eagerness: JsonField = JsonMissing.of() + private var interruptResponse: JsonField = JsonMissing.of() + private var additionalProperties: MutableMap = + mutableMapOf() + + @JvmSynthetic + internal fun from(semanticVad: SemanticVad) = apply { + type = semanticVad.type + createResponse = semanticVad.createResponse + eagerness = semanticVad.eagerness + interruptResponse = semanticVad.interruptResponse + additionalProperties = semanticVad.additionalProperties.toMutableMap() } + /** + * Sets the field to an arbitrary JSON value. + * + * It is usually unnecessary to call this method because the field defaults + * to the following: + * ```java + * JsonValue.from("semantic_vad") + * ``` + * + * This method is primarily for setting the field to an undocumented or not + * yet supported value. + */ + fun type(type: JsonValue) = apply { this.type = type } + + /** + * Whether or not to automatically generate a response when a VAD stop event + * occurs. + */ + fun createResponse(createResponse: Boolean) = + createResponse(JsonField.of(createResponse)) + + /** + * Sets [Builder.createResponse] to an arbitrary JSON value. + * + * You should usually call [Builder.createResponse] with a well-typed + * [Boolean] value instead. This method is primarily for setting the field + * to an undocumented or not yet supported value. + */ + fun createResponse(createResponse: JsonField) = apply { + this.createResponse = createResponse + } + + /** + * Used only for `semantic_vad` mode. The eagerness of the model to respond. + * `low` will wait longer for the user to continue speaking, `high` will + * respond more quickly. `auto` is the default and is equivalent to + * `medium`. `low`, `medium`, and `high` have max timeouts of 8s, 4s, and 2s + * respectively. + */ + fun eagerness(eagerness: Eagerness) = eagerness(JsonField.of(eagerness)) + + /** + * Sets [Builder.eagerness] to an arbitrary JSON value. + * + * You should usually call [Builder.eagerness] with a well-typed [Eagerness] + * value instead. This method is primarily for setting the field to an + * undocumented or not yet supported value. + */ + fun eagerness(eagerness: JsonField) = apply { + this.eagerness = eagerness + } + + /** + * Whether or not to automatically interrupt any ongoing response with + * output to the default conversation (i.e. `conversation` of `auto`) when a + * VAD start event occurs. + */ + fun interruptResponse(interruptResponse: Boolean) = + interruptResponse(JsonField.of(interruptResponse)) + + /** + * Sets [Builder.interruptResponse] to an arbitrary JSON value. + * + * You should usually call [Builder.interruptResponse] with a well-typed + * [Boolean] value instead. This method is primarily for setting the field + * to an undocumented or not yet supported value. + */ + fun interruptResponse(interruptResponse: JsonField) = apply { + this.interruptResponse = interruptResponse + } + + fun additionalProperties(additionalProperties: Map) = + apply { + this.additionalProperties.clear() + putAllAdditionalProperties(additionalProperties) + } + + fun putAdditionalProperty(key: String, value: JsonValue) = apply { + additionalProperties.put(key, value) + } + + fun putAllAdditionalProperties( + additionalProperties: Map + ) = apply { this.additionalProperties.putAll(additionalProperties) } + + fun removeAdditionalProperty(key: String) = apply { + additionalProperties.remove(key) + } + + fun removeAllAdditionalProperties(keys: Set) = apply { + keys.forEach(::removeAdditionalProperty) + } + + /** + * Returns an immutable instance of [SemanticVad]. + * + * Further updates to this [Builder] will not mutate the returned instance. + */ + fun build(): SemanticVad = + SemanticVad( + type, + createResponse, + eagerness, + interruptResponse, + additionalProperties.toMutableMap(), + ) + } + private var validated: Boolean = false - fun validate(): Type = apply { + fun validate(): SemanticVad = apply { if (validated) { return@apply } - known() + _type().let { + if (it != JsonValue.from("semantic_vad")) { + throw OpenAIInvalidDataException("'type' is invalid, received $it") + } + } + createResponse() + eagerness().ifPresent { it.validate() } + interruptResponse() validated = true } @@ -2225,56 +2485,195 @@ private constructor( * Used for best match union deserialization. */ @JvmSynthetic - internal fun validity(): Int = if (value() == Value._UNKNOWN) 0 else 1 + internal fun validity(): Int = + type.let { if (it == JsonValue.from("semantic_vad")) 1 else 0 } + + (if (createResponse.asKnown().isPresent) 1 else 0) + + (eagerness.asKnown().getOrNull()?.validity() ?: 0) + + (if (interruptResponse.asKnown().isPresent) 1 else 0) + + /** + * Used only for `semantic_vad` mode. The eagerness of the model to respond. + * `low` will wait longer for the user to continue speaking, `high` will respond + * more quickly. `auto` is the default and is equivalent to `medium`. `low`, + * `medium`, and `high` have max timeouts of 8s, 4s, and 2s respectively. + */ + class Eagerness + @JsonCreator + private constructor(private val value: JsonField) : Enum { + + /** + * Returns this class instance's raw value. + * + * This is usually only useful if this instance was deserialized from data + * that doesn't match any known member, and you want to know that value. For + * example, if the SDK is on an older version than the API, then the API may + * respond with new members that the SDK is unaware of. + */ + @com.fasterxml.jackson.annotation.JsonValue + fun _value(): JsonField = value + + companion object { + + @JvmField val LOW = of("low") + + @JvmField val MEDIUM = of("medium") + + @JvmField val HIGH = of("high") + + @JvmField val AUTO = of("auto") + + @JvmStatic fun of(value: String) = Eagerness(JsonField.of(value)) + } + + /** An enum containing [Eagerness]'s known values. */ + enum class Known { + LOW, + MEDIUM, + HIGH, + AUTO, + } + + /** + * An enum containing [Eagerness]'s known values, as well as an [_UNKNOWN] + * member. + * + * An instance of [Eagerness] can contain an unknown value in a couple of + * cases: + * - It was deserialized from data that doesn't match any known member. For + * example, if the SDK is on an older version than the API, then the API + * may respond with new members that the SDK is unaware of. + * - It was constructed with an arbitrary value using the [of] method. + */ + enum class Value { + LOW, + MEDIUM, + HIGH, + AUTO, + /** + * An enum member indicating that [Eagerness] was instantiated with an + * unknown value. + */ + _UNKNOWN, + } + + /** + * Returns an enum member corresponding to this class instance's value, or + * [Value._UNKNOWN] if the class was instantiated with an unknown value. + * + * Use the [known] method instead if you're certain the value is always + * known or if you want to throw for the unknown case. + */ + fun value(): Value = + when (this) { + LOW -> Value.LOW + MEDIUM -> Value.MEDIUM + HIGH -> Value.HIGH + AUTO -> Value.AUTO + else -> Value._UNKNOWN + } + + /** + * Returns an enum member corresponding to this class instance's value. + * + * Use the [value] method instead if you're uncertain the value is always + * known and don't want to throw for the unknown case. + * + * @throws OpenAIInvalidDataException if this class instance's value is a + * not a known member. + */ + fun known(): Known = + when (this) { + LOW -> Known.LOW + MEDIUM -> Known.MEDIUM + HIGH -> Known.HIGH + AUTO -> Known.AUTO + else -> + throw OpenAIInvalidDataException("Unknown Eagerness: $value") + } + + /** + * Returns this class instance's primitive wire representation. + * + * This differs from the [toString] method because that method is primarily + * for debugging and generally doesn't throw. + * + * @throws OpenAIInvalidDataException if this class instance's value does + * not have the expected primitive type. + */ + fun asString(): String = + _value().asString().orElseThrow { + OpenAIInvalidDataException("Value is not a String") + } + + private var validated: Boolean = false + + fun validate(): Eagerness = apply { + if (validated) { + return@apply + } + + known() + validated = true + } + + fun isValid(): Boolean = + try { + validate() + true + } catch (e: OpenAIInvalidDataException) { + false + } + + /** + * Returns a score indicating how many valid values are contained in this + * object recursively. + * + * Used for best match union deserialization. + */ + @JvmSynthetic + internal fun validity(): Int = if (value() == Value._UNKNOWN) 0 else 1 + + override fun equals(other: Any?): Boolean { + if (this === other) { + return true + } + + return other is Eagerness && value == other.value + } + + override fun hashCode() = value.hashCode() + + override fun toString() = value.toString() + } override fun equals(other: Any?): Boolean { if (this === other) { return true } - return other is Type && value == other.value + return other is SemanticVad && + type == other.type && + createResponse == other.createResponse && + eagerness == other.eagerness && + interruptResponse == other.interruptResponse && + additionalProperties == other.additionalProperties } - override fun hashCode() = value.hashCode() - - override fun toString() = value.toString() - } - - override fun equals(other: Any?): Boolean { - if (this === other) { - return true + private val hashCode: Int by lazy { + Objects.hash( + type, + createResponse, + eagerness, + interruptResponse, + additionalProperties, + ) } - return other is TurnDetection && - createResponse == other.createResponse && - eagerness == other.eagerness && - idleTimeoutMs == other.idleTimeoutMs && - interruptResponse == other.interruptResponse && - prefixPaddingMs == other.prefixPaddingMs && - silenceDurationMs == other.silenceDurationMs && - threshold == other.threshold && - type == other.type && - additionalProperties == other.additionalProperties - } + override fun hashCode(): Int = hashCode - private val hashCode: Int by lazy { - Objects.hash( - createResponse, - eagerness, - idleTimeoutMs, - interruptResponse, - prefixPaddingMs, - silenceDurationMs, - threshold, - type, - additionalProperties, - ) + override fun toString() = + "SemanticVad{type=$type, createResponse=$createResponse, eagerness=$eagerness, interruptResponse=$interruptResponse, additionalProperties=$additionalProperties}" } - - override fun hashCode(): Int = hashCode - - override fun toString() = - "TurnDetection{createResponse=$createResponse, eagerness=$eagerness, idleTimeoutMs=$idleTimeoutMs, interruptResponse=$interruptResponse, prefixPaddingMs=$prefixPaddingMs, silenceDurationMs=$silenceDurationMs, threshold=$threshold, type=$type, additionalProperties=$additionalProperties}" } override fun equals(other: Any?): Boolean { diff --git a/openai-java-core/src/main/kotlin/com/openai/models/responses/Response.kt b/openai-java-core/src/main/kotlin/com/openai/models/responses/Response.kt index a7c5cc75..f18ad919 100644 --- a/openai-java-core/src/main/kotlin/com/openai/models/responses/Response.kt +++ b/openai-java-core/src/main/kotlin/com/openai/models/responses/Response.kt @@ -473,10 +473,10 @@ private constructor( /** * The truncation strategy to use for the model response. - * - `auto`: If the context of this response and previous ones exceeds the model's context - * window size, the model will truncate the response to fit the context window by dropping - * input items in the middle of the conversation. - * - `disabled` (default): If a model response will exceed the context window size for a model, + * - `auto`: If the input to this Response exceeds the model's context window size, the model + * will truncate the response to fit the context window by dropping items from the beginning + * of the conversation. + * - `disabled` (default): If the input size will exceed the context window size for a model, * the request will fail with a 400 error. * * @throws OpenAIInvalidDataException if the JSON field has an unexpected type (e.g. if the @@ -1634,10 +1634,10 @@ private constructor( /** * The truncation strategy to use for the model response. - * - `auto`: If the context of this response and previous ones exceeds the model's context - * window size, the model will truncate the response to fit the context window by dropping - * input items in the middle of the conversation. - * - `disabled` (default): If a model response will exceed the context window size for a + * - `auto`: If the input to this Response exceeds the model's context window size, the + * model will truncate the response to fit the context window by dropping items from the + * beginning of the conversation. + * - `disabled` (default): If the input size will exceed the context window size for a * model, the request will fail with a 400 error. */ fun truncation(truncation: Truncation?) = truncation(JsonField.ofNullable(truncation)) @@ -3103,10 +3103,10 @@ private constructor( /** * The truncation strategy to use for the model response. - * - `auto`: If the context of this response and previous ones exceeds the model's context - * window size, the model will truncate the response to fit the context window by dropping - * input items in the middle of the conversation. - * - `disabled` (default): If a model response will exceed the context window size for a model, + * - `auto`: If the input to this Response exceeds the model's context window size, the model + * will truncate the response to fit the context window by dropping items from the beginning + * of the conversation. + * - `disabled` (default): If the input size will exceed the context window size for a model, * the request will fail with a 400 error. */ class Truncation @JsonCreator private constructor(private val value: JsonField) : Enum { diff --git a/openai-java-core/src/main/kotlin/com/openai/models/responses/ResponseCreateParams.kt b/openai-java-core/src/main/kotlin/com/openai/models/responses/ResponseCreateParams.kt index d842e305..fd89e687 100644 --- a/openai-java-core/src/main/kotlin/com/openai/models/responses/ResponseCreateParams.kt +++ b/openai-java-core/src/main/kotlin/com/openai/models/responses/ResponseCreateParams.kt @@ -338,10 +338,10 @@ private constructor( /** * The truncation strategy to use for the model response. - * - `auto`: If the context of this response and previous ones exceeds the model's context - * window size, the model will truncate the response to fit the context window by dropping - * input items in the middle of the conversation. - * - `disabled` (default): If a model response will exceed the context window size for a model, + * - `auto`: If the input to this Response exceeds the model's context window size, the model + * will truncate the response to fit the context window by dropping items from the beginning + * of the conversation. + * - `disabled` (default): If the input size will exceed the context window size for a model, * the request will fail with a 400 error. * * @throws OpenAIInvalidDataException if the JSON field has an unexpected type (e.g. if the @@ -1361,10 +1361,10 @@ private constructor( /** * The truncation strategy to use for the model response. - * - `auto`: If the context of this response and previous ones exceeds the model's context - * window size, the model will truncate the response to fit the context window by dropping - * input items in the middle of the conversation. - * - `disabled` (default): If a model response will exceed the context window size for a + * - `auto`: If the input to this Response exceeds the model's context window size, the + * model will truncate the response to fit the context window by dropping items from the + * beginning of the conversation. + * - `disabled` (default): If the input size will exceed the context window size for a * model, the request will fail with a 400 error. */ fun truncation(truncation: Truncation?) = apply { body.truncation(truncation) } @@ -1952,10 +1952,10 @@ private constructor( /** * The truncation strategy to use for the model response. - * - `auto`: If the context of this response and previous ones exceeds the model's context - * window size, the model will truncate the response to fit the context window by dropping - * input items in the middle of the conversation. - * - `disabled` (default): If a model response will exceed the context window size for a + * - `auto`: If the input to this Response exceeds the model's context window size, the + * model will truncate the response to fit the context window by dropping items from the + * beginning of the conversation. + * - `disabled` (default): If the input size will exceed the context window size for a * model, the request will fail with a 400 error. * * @throws OpenAIInvalidDataException if the JSON field has an unexpected type (e.g. if the @@ -3032,10 +3032,10 @@ private constructor( /** * The truncation strategy to use for the model response. - * - `auto`: If the context of this response and previous ones exceeds the model's - * context window size, the model will truncate the response to fit the context window - * by dropping input items in the middle of the conversation. - * - `disabled` (default): If a model response will exceed the context window size for a + * - `auto`: If the input to this Response exceeds the model's context window size, the + * model will truncate the response to fit the context window by dropping items from + * the beginning of the conversation. + * - `disabled` (default): If the input size will exceed the context window size for a * model, the request will fail with a 400 error. */ fun truncation(truncation: Truncation?) = truncation(JsonField.ofNullable(truncation)) @@ -4460,10 +4460,10 @@ private constructor( /** * The truncation strategy to use for the model response. - * - `auto`: If the context of this response and previous ones exceeds the model's context - * window size, the model will truncate the response to fit the context window by dropping - * input items in the middle of the conversation. - * - `disabled` (default): If a model response will exceed the context window size for a model, + * - `auto`: If the input to this Response exceeds the model's context window size, the model + * will truncate the response to fit the context window by dropping items from the beginning + * of the conversation. + * - `disabled` (default): If the input size will exceed the context window size for a model, * the request will fail with a 400 error. */ class Truncation @JsonCreator private constructor(private val value: JsonField) : Enum { diff --git a/openai-java-core/src/test/kotlin/com/openai/models/realtime/RealtimeAudioConfigInputTest.kt b/openai-java-core/src/test/kotlin/com/openai/models/realtime/RealtimeAudioConfigInputTest.kt index 5e1d51c5..be3b69f9 100644 --- a/openai-java-core/src/test/kotlin/com/openai/models/realtime/RealtimeAudioConfigInputTest.kt +++ b/openai-java-core/src/test/kotlin/com/openai/models/realtime/RealtimeAudioConfigInputTest.kt @@ -32,15 +32,13 @@ internal class RealtimeAudioConfigInputTest { .build() ) .turnDetection( - RealtimeAudioInputTurnDetection.builder() + RealtimeAudioInputTurnDetection.ServerVad.builder() .createResponse(true) - .eagerness(RealtimeAudioInputTurnDetection.Eagerness.LOW) - .idleTimeoutMs(0L) + .idleTimeoutMs(5000L) .interruptResponse(true) .prefixPaddingMs(0L) .silenceDurationMs(0L) .threshold(0.0) - .type(RealtimeAudioInputTurnDetection.Type.SERVER_VAD) .build() ) .build() @@ -70,16 +68,16 @@ internal class RealtimeAudioConfigInputTest { ) assertThat(realtimeAudioConfigInput.turnDetection()) .contains( - RealtimeAudioInputTurnDetection.builder() - .createResponse(true) - .eagerness(RealtimeAudioInputTurnDetection.Eagerness.LOW) - .idleTimeoutMs(0L) - .interruptResponse(true) - .prefixPaddingMs(0L) - .silenceDurationMs(0L) - .threshold(0.0) - .type(RealtimeAudioInputTurnDetection.Type.SERVER_VAD) - .build() + RealtimeAudioInputTurnDetection.ofServerVad( + RealtimeAudioInputTurnDetection.ServerVad.builder() + .createResponse(true) + .idleTimeoutMs(5000L) + .interruptResponse(true) + .prefixPaddingMs(0L) + .silenceDurationMs(0L) + .threshold(0.0) + .build() + ) ) } @@ -107,15 +105,13 @@ internal class RealtimeAudioConfigInputTest { .build() ) .turnDetection( - RealtimeAudioInputTurnDetection.builder() + RealtimeAudioInputTurnDetection.ServerVad.builder() .createResponse(true) - .eagerness(RealtimeAudioInputTurnDetection.Eagerness.LOW) - .idleTimeoutMs(0L) + .idleTimeoutMs(5000L) .interruptResponse(true) .prefixPaddingMs(0L) .silenceDurationMs(0L) .threshold(0.0) - .type(RealtimeAudioInputTurnDetection.Type.SERVER_VAD) .build() ) .build() diff --git a/openai-java-core/src/test/kotlin/com/openai/models/realtime/RealtimeAudioConfigTest.kt b/openai-java-core/src/test/kotlin/com/openai/models/realtime/RealtimeAudioConfigTest.kt index b1b5e001..01653524 100644 --- a/openai-java-core/src/test/kotlin/com/openai/models/realtime/RealtimeAudioConfigTest.kt +++ b/openai-java-core/src/test/kotlin/com/openai/models/realtime/RealtimeAudioConfigTest.kt @@ -34,15 +34,13 @@ internal class RealtimeAudioConfigTest { .build() ) .turnDetection( - RealtimeAudioInputTurnDetection.builder() + RealtimeAudioInputTurnDetection.ServerVad.builder() .createResponse(true) - .eagerness(RealtimeAudioInputTurnDetection.Eagerness.LOW) - .idleTimeoutMs(0L) + .idleTimeoutMs(5000L) .interruptResponse(true) .prefixPaddingMs(0L) .silenceDurationMs(0L) .threshold(0.0) - .type(RealtimeAudioInputTurnDetection.Type.SERVER_VAD) .build() ) .build() @@ -83,15 +81,13 @@ internal class RealtimeAudioConfigTest { .build() ) .turnDetection( - RealtimeAudioInputTurnDetection.builder() + RealtimeAudioInputTurnDetection.ServerVad.builder() .createResponse(true) - .eagerness(RealtimeAudioInputTurnDetection.Eagerness.LOW) - .idleTimeoutMs(0L) + .idleTimeoutMs(5000L) .interruptResponse(true) .prefixPaddingMs(0L) .silenceDurationMs(0L) .threshold(0.0) - .type(RealtimeAudioInputTurnDetection.Type.SERVER_VAD) .build() ) .build() @@ -137,15 +133,13 @@ internal class RealtimeAudioConfigTest { .build() ) .turnDetection( - RealtimeAudioInputTurnDetection.builder() + RealtimeAudioInputTurnDetection.ServerVad.builder() .createResponse(true) - .eagerness(RealtimeAudioInputTurnDetection.Eagerness.LOW) - .idleTimeoutMs(0L) + .idleTimeoutMs(5000L) .interruptResponse(true) .prefixPaddingMs(0L) .silenceDurationMs(0L) .threshold(0.0) - .type(RealtimeAudioInputTurnDetection.Type.SERVER_VAD) .build() ) .build() diff --git a/openai-java-core/src/test/kotlin/com/openai/models/realtime/RealtimeAudioInputTurnDetectionTest.kt b/openai-java-core/src/test/kotlin/com/openai/models/realtime/RealtimeAudioInputTurnDetectionTest.kt index c74579eb..cb8b26dd 100644 --- a/openai-java-core/src/test/kotlin/com/openai/models/realtime/RealtimeAudioInputTurnDetectionTest.kt +++ b/openai-java-core/src/test/kotlin/com/openai/models/realtime/RealtimeAudioInputTurnDetectionTest.kt @@ -3,53 +3,88 @@ package com.openai.models.realtime import com.fasterxml.jackson.module.kotlin.jacksonTypeRef +import com.openai.core.JsonValue import com.openai.core.jsonMapper +import com.openai.errors.OpenAIInvalidDataException import org.assertj.core.api.Assertions.assertThat import org.junit.jupiter.api.Test +import org.junit.jupiter.api.assertThrows +import org.junit.jupiter.params.ParameterizedTest +import org.junit.jupiter.params.provider.EnumSource internal class RealtimeAudioInputTurnDetectionTest { @Test - fun create() { - val realtimeAudioInputTurnDetection = - RealtimeAudioInputTurnDetection.builder() + fun ofServerVad() { + val serverVad = + RealtimeAudioInputTurnDetection.ServerVad.builder() .createResponse(true) - .eagerness(RealtimeAudioInputTurnDetection.Eagerness.LOW) - .idleTimeoutMs(0L) + .idleTimeoutMs(5000L) .interruptResponse(true) .prefixPaddingMs(0L) .silenceDurationMs(0L) .threshold(0.0) - .type(RealtimeAudioInputTurnDetection.Type.SERVER_VAD) .build() - assertThat(realtimeAudioInputTurnDetection.createResponse()).contains(true) - assertThat(realtimeAudioInputTurnDetection.eagerness()) - .contains(RealtimeAudioInputTurnDetection.Eagerness.LOW) - assertThat(realtimeAudioInputTurnDetection.idleTimeoutMs()).contains(0L) - assertThat(realtimeAudioInputTurnDetection.interruptResponse()).contains(true) - assertThat(realtimeAudioInputTurnDetection.prefixPaddingMs()).contains(0L) - assertThat(realtimeAudioInputTurnDetection.silenceDurationMs()).contains(0L) - assertThat(realtimeAudioInputTurnDetection.threshold()).contains(0.0) - assertThat(realtimeAudioInputTurnDetection.type()) - .contains(RealtimeAudioInputTurnDetection.Type.SERVER_VAD) + val realtimeAudioInputTurnDetection = RealtimeAudioInputTurnDetection.ofServerVad(serverVad) + + assertThat(realtimeAudioInputTurnDetection.serverVad()).contains(serverVad) + assertThat(realtimeAudioInputTurnDetection.semanticVad()).isEmpty } @Test - fun roundtrip() { + fun ofServerVadRoundtrip() { val jsonMapper = jsonMapper() val realtimeAudioInputTurnDetection = - RealtimeAudioInputTurnDetection.builder() + RealtimeAudioInputTurnDetection.ofServerVad( + RealtimeAudioInputTurnDetection.ServerVad.builder() + .createResponse(true) + .idleTimeoutMs(5000L) + .interruptResponse(true) + .prefixPaddingMs(0L) + .silenceDurationMs(0L) + .threshold(0.0) + .build() + ) + + val roundtrippedRealtimeAudioInputTurnDetection = + jsonMapper.readValue( + jsonMapper.writeValueAsString(realtimeAudioInputTurnDetection), + jacksonTypeRef(), + ) + + assertThat(roundtrippedRealtimeAudioInputTurnDetection) + .isEqualTo(realtimeAudioInputTurnDetection) + } + + @Test + fun ofSemanticVad() { + val semanticVad = + RealtimeAudioInputTurnDetection.SemanticVad.builder() .createResponse(true) - .eagerness(RealtimeAudioInputTurnDetection.Eagerness.LOW) - .idleTimeoutMs(0L) + .eagerness(RealtimeAudioInputTurnDetection.SemanticVad.Eagerness.LOW) .interruptResponse(true) - .prefixPaddingMs(0L) - .silenceDurationMs(0L) - .threshold(0.0) - .type(RealtimeAudioInputTurnDetection.Type.SERVER_VAD) .build() + val realtimeAudioInputTurnDetection = + RealtimeAudioInputTurnDetection.ofSemanticVad(semanticVad) + + assertThat(realtimeAudioInputTurnDetection.serverVad()).isEmpty + assertThat(realtimeAudioInputTurnDetection.semanticVad()).contains(semanticVad) + } + + @Test + fun ofSemanticVadRoundtrip() { + val jsonMapper = jsonMapper() + val realtimeAudioInputTurnDetection = + RealtimeAudioInputTurnDetection.ofSemanticVad( + RealtimeAudioInputTurnDetection.SemanticVad.builder() + .createResponse(true) + .eagerness(RealtimeAudioInputTurnDetection.SemanticVad.Eagerness.LOW) + .interruptResponse(true) + .build() + ) + val roundtrippedRealtimeAudioInputTurnDetection = jsonMapper.readValue( jsonMapper.writeValueAsString(realtimeAudioInputTurnDetection), @@ -59,4 +94,24 @@ internal class RealtimeAudioInputTurnDetectionTest { assertThat(roundtrippedRealtimeAudioInputTurnDetection) .isEqualTo(realtimeAudioInputTurnDetection) } + + enum class IncompatibleJsonShapeTestCase(val value: JsonValue) { + BOOLEAN(JsonValue.from(false)), + STRING(JsonValue.from("invalid")), + INTEGER(JsonValue.from(-1)), + FLOAT(JsonValue.from(3.14)), + ARRAY(JsonValue.from(listOf("invalid", "array"))), + } + + @ParameterizedTest + @EnumSource + fun incompatibleJsonShapeDeserializesToUnknown(testCase: IncompatibleJsonShapeTestCase) { + val realtimeAudioInputTurnDetection = + jsonMapper() + .convertValue(testCase.value, jacksonTypeRef()) + + val e = + assertThrows { realtimeAudioInputTurnDetection.validate() } + assertThat(e).hasMessageStartingWith("Unknown ") + } } diff --git a/openai-java-core/src/test/kotlin/com/openai/models/realtime/RealtimeClientEventTest.kt b/openai-java-core/src/test/kotlin/com/openai/models/realtime/RealtimeClientEventTest.kt index 2e19d158..722e0492 100644 --- a/openai-java-core/src/test/kotlin/com/openai/models/realtime/RealtimeClientEventTest.kt +++ b/openai-java-core/src/test/kotlin/com/openai/models/realtime/RealtimeClientEventTest.kt @@ -609,19 +609,13 @@ internal class RealtimeClientEventTest { .build() ) .turnDetection( - RealtimeAudioInputTurnDetection.builder() + RealtimeAudioInputTurnDetection.ServerVad.builder() .createResponse(true) - .eagerness( - RealtimeAudioInputTurnDetection.Eagerness.LOW - ) - .idleTimeoutMs(0L) + .idleTimeoutMs(5000L) .interruptResponse(true) .prefixPaddingMs(0L) .silenceDurationMs(0L) .threshold(0.0) - .type( - RealtimeAudioInputTurnDetection.Type.SERVER_VAD - ) .build() ) .build() @@ -723,21 +717,13 @@ internal class RealtimeClientEventTest { .build() ) .turnDetection( - RealtimeAudioInputTurnDetection.builder() + RealtimeAudioInputTurnDetection.ServerVad.builder() .createResponse(true) - .eagerness( - RealtimeAudioInputTurnDetection.Eagerness - .LOW - ) - .idleTimeoutMs(0L) + .idleTimeoutMs(5000L) .interruptResponse(true) .prefixPaddingMs(0L) .silenceDurationMs(0L) .threshold(0.0) - .type( - RealtimeAudioInputTurnDetection.Type - .SERVER_VAD - ) .build() ) .build() diff --git a/openai-java-core/src/test/kotlin/com/openai/models/realtime/RealtimeServerEventTest.kt b/openai-java-core/src/test/kotlin/com/openai/models/realtime/RealtimeServerEventTest.kt index 774ed591..3424a1e7 100644 --- a/openai-java-core/src/test/kotlin/com/openai/models/realtime/RealtimeServerEventTest.kt +++ b/openai-java-core/src/test/kotlin/com/openai/models/realtime/RealtimeServerEventTest.kt @@ -2947,19 +2947,13 @@ internal class RealtimeServerEventTest { .build() ) .turnDetection( - RealtimeAudioInputTurnDetection.builder() + RealtimeAudioInputTurnDetection.ServerVad.builder() .createResponse(true) - .eagerness( - RealtimeAudioInputTurnDetection.Eagerness.LOW - ) - .idleTimeoutMs(0L) + .idleTimeoutMs(5000L) .interruptResponse(true) .prefixPaddingMs(0L) .silenceDurationMs(0L) .threshold(0.0) - .type( - RealtimeAudioInputTurnDetection.Type.SERVER_VAD - ) .build() ) .build() @@ -3095,21 +3089,13 @@ internal class RealtimeServerEventTest { .build() ) .turnDetection( - RealtimeAudioInputTurnDetection.builder() + RealtimeAudioInputTurnDetection.ServerVad.builder() .createResponse(true) - .eagerness( - RealtimeAudioInputTurnDetection.Eagerness - .LOW - ) - .idleTimeoutMs(0L) + .idleTimeoutMs(5000L) .interruptResponse(true) .prefixPaddingMs(0L) .silenceDurationMs(0L) .threshold(0.0) - .type( - RealtimeAudioInputTurnDetection.Type - .SERVER_VAD - ) .build() ) .build() @@ -3204,19 +3190,13 @@ internal class RealtimeServerEventTest { .build() ) .turnDetection( - RealtimeAudioInputTurnDetection.builder() + RealtimeAudioInputTurnDetection.ServerVad.builder() .createResponse(true) - .eagerness( - RealtimeAudioInputTurnDetection.Eagerness.LOW - ) - .idleTimeoutMs(0L) + .idleTimeoutMs(5000L) .interruptResponse(true) .prefixPaddingMs(0L) .silenceDurationMs(0L) .threshold(0.0) - .type( - RealtimeAudioInputTurnDetection.Type.SERVER_VAD - ) .build() ) .build() @@ -3352,21 +3332,13 @@ internal class RealtimeServerEventTest { .build() ) .turnDetection( - RealtimeAudioInputTurnDetection.builder() + RealtimeAudioInputTurnDetection.ServerVad.builder() .createResponse(true) - .eagerness( - RealtimeAudioInputTurnDetection.Eagerness - .LOW - ) - .idleTimeoutMs(0L) + .idleTimeoutMs(5000L) .interruptResponse(true) .prefixPaddingMs(0L) .silenceDurationMs(0L) .threshold(0.0) - .type( - RealtimeAudioInputTurnDetection.Type - .SERVER_VAD - ) .build() ) .build() diff --git a/openai-java-core/src/test/kotlin/com/openai/models/realtime/RealtimeSessionCreateRequestTest.kt b/openai-java-core/src/test/kotlin/com/openai/models/realtime/RealtimeSessionCreateRequestTest.kt index 7869e759..b9125dc7 100644 --- a/openai-java-core/src/test/kotlin/com/openai/models/realtime/RealtimeSessionCreateRequestTest.kt +++ b/openai-java-core/src/test/kotlin/com/openai/models/realtime/RealtimeSessionCreateRequestTest.kt @@ -40,15 +40,13 @@ internal class RealtimeSessionCreateRequestTest { .build() ) .turnDetection( - RealtimeAudioInputTurnDetection.builder() + RealtimeAudioInputTurnDetection.ServerVad.builder() .createResponse(true) - .eagerness(RealtimeAudioInputTurnDetection.Eagerness.LOW) - .idleTimeoutMs(0L) + .idleTimeoutMs(5000L) .interruptResponse(true) .prefixPaddingMs(0L) .silenceDurationMs(0L) .threshold(0.0) - .type(RealtimeAudioInputTurnDetection.Type.SERVER_VAD) .build() ) .build() @@ -122,15 +120,13 @@ internal class RealtimeSessionCreateRequestTest { .build() ) .turnDetection( - RealtimeAudioInputTurnDetection.builder() + RealtimeAudioInputTurnDetection.ServerVad.builder() .createResponse(true) - .eagerness(RealtimeAudioInputTurnDetection.Eagerness.LOW) - .idleTimeoutMs(0L) + .idleTimeoutMs(5000L) .interruptResponse(true) .prefixPaddingMs(0L) .silenceDurationMs(0L) .threshold(0.0) - .type(RealtimeAudioInputTurnDetection.Type.SERVER_VAD) .build() ) .build() @@ -220,15 +216,13 @@ internal class RealtimeSessionCreateRequestTest { .build() ) .turnDetection( - RealtimeAudioInputTurnDetection.builder() + RealtimeAudioInputTurnDetection.ServerVad.builder() .createResponse(true) - .eagerness(RealtimeAudioInputTurnDetection.Eagerness.LOW) - .idleTimeoutMs(0L) + .idleTimeoutMs(5000L) .interruptResponse(true) .prefixPaddingMs(0L) .silenceDurationMs(0L) .threshold(0.0) - .type(RealtimeAudioInputTurnDetection.Type.SERVER_VAD) .build() ) .build() diff --git a/openai-java-core/src/test/kotlin/com/openai/models/realtime/RealtimeSessionTest.kt b/openai-java-core/src/test/kotlin/com/openai/models/realtime/RealtimeSessionTest.kt index e116ccaf..adc2d004 100644 --- a/openai-java-core/src/test/kotlin/com/openai/models/realtime/RealtimeSessionTest.kt +++ b/openai-java-core/src/test/kotlin/com/openai/models/realtime/RealtimeSessionTest.kt @@ -62,15 +62,13 @@ internal class RealtimeSessionTest { ) .tracingAuto() .turnDetection( - RealtimeSession.TurnDetection.builder() + RealtimeSession.TurnDetection.ServerVad.builder() .createResponse(true) - .eagerness(RealtimeSession.TurnDetection.Eagerness.LOW) - .idleTimeoutMs(0L) + .idleTimeoutMs(5000L) .interruptResponse(true) .prefixPaddingMs(0L) .silenceDurationMs(0L) .threshold(0.0) - .type(RealtimeSession.TurnDetection.Type.SERVER_VAD) .build() ) .voice(RealtimeSession.Voice.ALLOY) @@ -132,16 +130,16 @@ internal class RealtimeSessionTest { assertThat(realtimeSession.tracing()).contains(RealtimeSession.Tracing.ofAuto()) assertThat(realtimeSession.turnDetection()) .contains( - RealtimeSession.TurnDetection.builder() - .createResponse(true) - .eagerness(RealtimeSession.TurnDetection.Eagerness.LOW) - .idleTimeoutMs(0L) - .interruptResponse(true) - .prefixPaddingMs(0L) - .silenceDurationMs(0L) - .threshold(0.0) - .type(RealtimeSession.TurnDetection.Type.SERVER_VAD) - .build() + RealtimeSession.TurnDetection.ofServerVad( + RealtimeSession.TurnDetection.ServerVad.builder() + .createResponse(true) + .idleTimeoutMs(5000L) + .interruptResponse(true) + .prefixPaddingMs(0L) + .silenceDurationMs(0L) + .threshold(0.0) + .build() + ) ) assertThat(realtimeSession.voice()).contains(RealtimeSession.Voice.ALLOY) } @@ -197,15 +195,13 @@ internal class RealtimeSessionTest { ) .tracingAuto() .turnDetection( - RealtimeSession.TurnDetection.builder() + RealtimeSession.TurnDetection.ServerVad.builder() .createResponse(true) - .eagerness(RealtimeSession.TurnDetection.Eagerness.LOW) - .idleTimeoutMs(0L) + .idleTimeoutMs(5000L) .interruptResponse(true) .prefixPaddingMs(0L) .silenceDurationMs(0L) .threshold(0.0) - .type(RealtimeSession.TurnDetection.Type.SERVER_VAD) .build() ) .voice(RealtimeSession.Voice.ALLOY) diff --git a/openai-java-core/src/test/kotlin/com/openai/models/realtime/RealtimeTranscriptionSessionAudioInputTest.kt b/openai-java-core/src/test/kotlin/com/openai/models/realtime/RealtimeTranscriptionSessionAudioInputTest.kt index 2ff8c916..62bd850a 100644 --- a/openai-java-core/src/test/kotlin/com/openai/models/realtime/RealtimeTranscriptionSessionAudioInputTest.kt +++ b/openai-java-core/src/test/kotlin/com/openai/models/realtime/RealtimeTranscriptionSessionAudioInputTest.kt @@ -32,17 +32,13 @@ internal class RealtimeTranscriptionSessionAudioInputTest { .build() ) .turnDetection( - RealtimeTranscriptionSessionAudioInputTurnDetection.builder() + RealtimeTranscriptionSessionAudioInputTurnDetection.ServerVad.builder() .createResponse(true) - .eagerness( - RealtimeTranscriptionSessionAudioInputTurnDetection.Eagerness.LOW - ) - .idleTimeoutMs(0L) + .idleTimeoutMs(5000L) .interruptResponse(true) .prefixPaddingMs(0L) .silenceDurationMs(0L) .threshold(0.0) - .type(RealtimeTranscriptionSessionAudioInputTurnDetection.Type.SERVER_VAD) .build() ) .build() @@ -72,16 +68,16 @@ internal class RealtimeTranscriptionSessionAudioInputTest { ) assertThat(realtimeTranscriptionSessionAudioInput.turnDetection()) .contains( - RealtimeTranscriptionSessionAudioInputTurnDetection.builder() - .createResponse(true) - .eagerness(RealtimeTranscriptionSessionAudioInputTurnDetection.Eagerness.LOW) - .idleTimeoutMs(0L) - .interruptResponse(true) - .prefixPaddingMs(0L) - .silenceDurationMs(0L) - .threshold(0.0) - .type(RealtimeTranscriptionSessionAudioInputTurnDetection.Type.SERVER_VAD) - .build() + RealtimeTranscriptionSessionAudioInputTurnDetection.ofServerVad( + RealtimeTranscriptionSessionAudioInputTurnDetection.ServerVad.builder() + .createResponse(true) + .idleTimeoutMs(5000L) + .interruptResponse(true) + .prefixPaddingMs(0L) + .silenceDurationMs(0L) + .threshold(0.0) + .build() + ) ) } @@ -109,17 +105,13 @@ internal class RealtimeTranscriptionSessionAudioInputTest { .build() ) .turnDetection( - RealtimeTranscriptionSessionAudioInputTurnDetection.builder() + RealtimeTranscriptionSessionAudioInputTurnDetection.ServerVad.builder() .createResponse(true) - .eagerness( - RealtimeTranscriptionSessionAudioInputTurnDetection.Eagerness.LOW - ) - .idleTimeoutMs(0L) + .idleTimeoutMs(5000L) .interruptResponse(true) .prefixPaddingMs(0L) .silenceDurationMs(0L) .threshold(0.0) - .type(RealtimeTranscriptionSessionAudioInputTurnDetection.Type.SERVER_VAD) .build() ) .build() diff --git a/openai-java-core/src/test/kotlin/com/openai/models/realtime/RealtimeTranscriptionSessionAudioInputTurnDetectionTest.kt b/openai-java-core/src/test/kotlin/com/openai/models/realtime/RealtimeTranscriptionSessionAudioInputTurnDetectionTest.kt index f03b5761..e5e5835a 100644 --- a/openai-java-core/src/test/kotlin/com/openai/models/realtime/RealtimeTranscriptionSessionAudioInputTurnDetectionTest.kt +++ b/openai-java-core/src/test/kotlin/com/openai/models/realtime/RealtimeTranscriptionSessionAudioInputTurnDetectionTest.kt @@ -3,57 +3,96 @@ package com.openai.models.realtime import com.fasterxml.jackson.module.kotlin.jacksonTypeRef +import com.openai.core.JsonValue import com.openai.core.jsonMapper +import com.openai.errors.OpenAIInvalidDataException import org.assertj.core.api.Assertions.assertThat import org.junit.jupiter.api.Test +import org.junit.jupiter.api.assertThrows +import org.junit.jupiter.params.ParameterizedTest +import org.junit.jupiter.params.provider.EnumSource internal class RealtimeTranscriptionSessionAudioInputTurnDetectionTest { @Test - fun create() { - val realtimeTranscriptionSessionAudioInputTurnDetection = - RealtimeTranscriptionSessionAudioInputTurnDetection.builder() + fun ofServerVad() { + val serverVad = + RealtimeTranscriptionSessionAudioInputTurnDetection.ServerVad.builder() .createResponse(true) - .eagerness(RealtimeTranscriptionSessionAudioInputTurnDetection.Eagerness.LOW) - .idleTimeoutMs(0L) + .idleTimeoutMs(5000L) .interruptResponse(true) .prefixPaddingMs(0L) .silenceDurationMs(0L) .threshold(0.0) - .type(RealtimeTranscriptionSessionAudioInputTurnDetection.Type.SERVER_VAD) .build() - assertThat(realtimeTranscriptionSessionAudioInputTurnDetection.createResponse()) - .contains(true) - assertThat(realtimeTranscriptionSessionAudioInputTurnDetection.eagerness()) - .contains(RealtimeTranscriptionSessionAudioInputTurnDetection.Eagerness.LOW) - assertThat(realtimeTranscriptionSessionAudioInputTurnDetection.idleTimeoutMs()).contains(0L) - assertThat(realtimeTranscriptionSessionAudioInputTurnDetection.interruptResponse()) - .contains(true) - assertThat(realtimeTranscriptionSessionAudioInputTurnDetection.prefixPaddingMs()) - .contains(0L) - assertThat(realtimeTranscriptionSessionAudioInputTurnDetection.silenceDurationMs()) - .contains(0L) - assertThat(realtimeTranscriptionSessionAudioInputTurnDetection.threshold()).contains(0.0) - assertThat(realtimeTranscriptionSessionAudioInputTurnDetection.type()) - .contains(RealtimeTranscriptionSessionAudioInputTurnDetection.Type.SERVER_VAD) + val realtimeTranscriptionSessionAudioInputTurnDetection = + RealtimeTranscriptionSessionAudioInputTurnDetection.ofServerVad(serverVad) + + assertThat(realtimeTranscriptionSessionAudioInputTurnDetection.serverVad()) + .contains(serverVad) + assertThat(realtimeTranscriptionSessionAudioInputTurnDetection.semanticVad()).isEmpty } @Test - fun roundtrip() { + fun ofServerVadRoundtrip() { val jsonMapper = jsonMapper() val realtimeTranscriptionSessionAudioInputTurnDetection = - RealtimeTranscriptionSessionAudioInputTurnDetection.builder() + RealtimeTranscriptionSessionAudioInputTurnDetection.ofServerVad( + RealtimeTranscriptionSessionAudioInputTurnDetection.ServerVad.builder() + .createResponse(true) + .idleTimeoutMs(5000L) + .interruptResponse(true) + .prefixPaddingMs(0L) + .silenceDurationMs(0L) + .threshold(0.0) + .build() + ) + + val roundtrippedRealtimeTranscriptionSessionAudioInputTurnDetection = + jsonMapper.readValue( + jsonMapper.writeValueAsString(realtimeTranscriptionSessionAudioInputTurnDetection), + jacksonTypeRef(), + ) + + assertThat(roundtrippedRealtimeTranscriptionSessionAudioInputTurnDetection) + .isEqualTo(realtimeTranscriptionSessionAudioInputTurnDetection) + } + + @Test + fun ofSemanticVad() { + val semanticVad = + RealtimeTranscriptionSessionAudioInputTurnDetection.SemanticVad.builder() .createResponse(true) - .eagerness(RealtimeTranscriptionSessionAudioInputTurnDetection.Eagerness.LOW) - .idleTimeoutMs(0L) + .eagerness( + RealtimeTranscriptionSessionAudioInputTurnDetection.SemanticVad.Eagerness.LOW + ) .interruptResponse(true) - .prefixPaddingMs(0L) - .silenceDurationMs(0L) - .threshold(0.0) - .type(RealtimeTranscriptionSessionAudioInputTurnDetection.Type.SERVER_VAD) .build() + val realtimeTranscriptionSessionAudioInputTurnDetection = + RealtimeTranscriptionSessionAudioInputTurnDetection.ofSemanticVad(semanticVad) + + assertThat(realtimeTranscriptionSessionAudioInputTurnDetection.serverVad()).isEmpty + assertThat(realtimeTranscriptionSessionAudioInputTurnDetection.semanticVad()) + .contains(semanticVad) + } + + @Test + fun ofSemanticVadRoundtrip() { + val jsonMapper = jsonMapper() + val realtimeTranscriptionSessionAudioInputTurnDetection = + RealtimeTranscriptionSessionAudioInputTurnDetection.ofSemanticVad( + RealtimeTranscriptionSessionAudioInputTurnDetection.SemanticVad.builder() + .createResponse(true) + .eagerness( + RealtimeTranscriptionSessionAudioInputTurnDetection.SemanticVad.Eagerness + .LOW + ) + .interruptResponse(true) + .build() + ) + val roundtrippedRealtimeTranscriptionSessionAudioInputTurnDetection = jsonMapper.readValue( jsonMapper.writeValueAsString(realtimeTranscriptionSessionAudioInputTurnDetection), @@ -63,4 +102,29 @@ internal class RealtimeTranscriptionSessionAudioInputTurnDetectionTest { assertThat(roundtrippedRealtimeTranscriptionSessionAudioInputTurnDetection) .isEqualTo(realtimeTranscriptionSessionAudioInputTurnDetection) } + + enum class IncompatibleJsonShapeTestCase(val value: JsonValue) { + BOOLEAN(JsonValue.from(false)), + STRING(JsonValue.from("invalid")), + INTEGER(JsonValue.from(-1)), + FLOAT(JsonValue.from(3.14)), + ARRAY(JsonValue.from(listOf("invalid", "array"))), + } + + @ParameterizedTest + @EnumSource + fun incompatibleJsonShapeDeserializesToUnknown(testCase: IncompatibleJsonShapeTestCase) { + val realtimeTranscriptionSessionAudioInputTurnDetection = + jsonMapper() + .convertValue( + testCase.value, + jacksonTypeRef(), + ) + + val e = + assertThrows { + realtimeTranscriptionSessionAudioInputTurnDetection.validate() + } + assertThat(e).hasMessageStartingWith("Unknown ") + } } diff --git a/openai-java-core/src/test/kotlin/com/openai/models/realtime/RealtimeTranscriptionSessionAudioTest.kt b/openai-java-core/src/test/kotlin/com/openai/models/realtime/RealtimeTranscriptionSessionAudioTest.kt index 6d161640..e30b3a66 100644 --- a/openai-java-core/src/test/kotlin/com/openai/models/realtime/RealtimeTranscriptionSessionAudioTest.kt +++ b/openai-java-core/src/test/kotlin/com/openai/models/realtime/RealtimeTranscriptionSessionAudioTest.kt @@ -34,21 +34,13 @@ internal class RealtimeTranscriptionSessionAudioTest { .build() ) .turnDetection( - RealtimeTranscriptionSessionAudioInputTurnDetection.builder() + RealtimeTranscriptionSessionAudioInputTurnDetection.ServerVad.builder() .createResponse(true) - .eagerness( - RealtimeTranscriptionSessionAudioInputTurnDetection.Eagerness - .LOW - ) - .idleTimeoutMs(0L) + .idleTimeoutMs(5000L) .interruptResponse(true) .prefixPaddingMs(0L) .silenceDurationMs(0L) .threshold(0.0) - .type( - RealtimeTranscriptionSessionAudioInputTurnDetection.Type - .SERVER_VAD - ) .build() ) .build() @@ -77,19 +69,13 @@ internal class RealtimeTranscriptionSessionAudioTest { .build() ) .turnDetection( - RealtimeTranscriptionSessionAudioInputTurnDetection.builder() + RealtimeTranscriptionSessionAudioInputTurnDetection.ServerVad.builder() .createResponse(true) - .eagerness( - RealtimeTranscriptionSessionAudioInputTurnDetection.Eagerness.LOW - ) - .idleTimeoutMs(0L) + .idleTimeoutMs(5000L) .interruptResponse(true) .prefixPaddingMs(0L) .silenceDurationMs(0L) .threshold(0.0) - .type( - RealtimeTranscriptionSessionAudioInputTurnDetection.Type.SERVER_VAD - ) .build() ) .build() @@ -122,21 +108,13 @@ internal class RealtimeTranscriptionSessionAudioTest { .build() ) .turnDetection( - RealtimeTranscriptionSessionAudioInputTurnDetection.builder() + RealtimeTranscriptionSessionAudioInputTurnDetection.ServerVad.builder() .createResponse(true) - .eagerness( - RealtimeTranscriptionSessionAudioInputTurnDetection.Eagerness - .LOW - ) - .idleTimeoutMs(0L) + .idleTimeoutMs(5000L) .interruptResponse(true) .prefixPaddingMs(0L) .silenceDurationMs(0L) .threshold(0.0) - .type( - RealtimeTranscriptionSessionAudioInputTurnDetection.Type - .SERVER_VAD - ) .build() ) .build() diff --git a/openai-java-core/src/test/kotlin/com/openai/models/realtime/RealtimeTranscriptionSessionCreateRequestTest.kt b/openai-java-core/src/test/kotlin/com/openai/models/realtime/RealtimeTranscriptionSessionCreateRequestTest.kt index 5e6f56cd..8f220797 100644 --- a/openai-java-core/src/test/kotlin/com/openai/models/realtime/RealtimeTranscriptionSessionCreateRequestTest.kt +++ b/openai-java-core/src/test/kotlin/com/openai/models/realtime/RealtimeTranscriptionSessionCreateRequestTest.kt @@ -37,22 +37,14 @@ internal class RealtimeTranscriptionSessionCreateRequestTest { .build() ) .turnDetection( - RealtimeTranscriptionSessionAudioInputTurnDetection.builder() + RealtimeTranscriptionSessionAudioInputTurnDetection.ServerVad + .builder() .createResponse(true) - .eagerness( - RealtimeTranscriptionSessionAudioInputTurnDetection - .Eagerness - .LOW - ) - .idleTimeoutMs(0L) + .idleTimeoutMs(5000L) .interruptResponse(true) .prefixPaddingMs(0L) .silenceDurationMs(0L) .threshold(0.0) - .type( - RealtimeTranscriptionSessionAudioInputTurnDetection.Type - .SERVER_VAD - ) .build() ) .build() @@ -89,22 +81,14 @@ internal class RealtimeTranscriptionSessionCreateRequestTest { .build() ) .turnDetection( - RealtimeTranscriptionSessionAudioInputTurnDetection.builder() + RealtimeTranscriptionSessionAudioInputTurnDetection.ServerVad + .builder() .createResponse(true) - .eagerness( - RealtimeTranscriptionSessionAudioInputTurnDetection - .Eagerness - .LOW - ) - .idleTimeoutMs(0L) + .idleTimeoutMs(5000L) .interruptResponse(true) .prefixPaddingMs(0L) .silenceDurationMs(0L) .threshold(0.0) - .type( - RealtimeTranscriptionSessionAudioInputTurnDetection.Type - .SERVER_VAD - ) .build() ) .build() @@ -146,22 +130,14 @@ internal class RealtimeTranscriptionSessionCreateRequestTest { .build() ) .turnDetection( - RealtimeTranscriptionSessionAudioInputTurnDetection.builder() + RealtimeTranscriptionSessionAudioInputTurnDetection.ServerVad + .builder() .createResponse(true) - .eagerness( - RealtimeTranscriptionSessionAudioInputTurnDetection - .Eagerness - .LOW - ) - .idleTimeoutMs(0L) + .idleTimeoutMs(5000L) .interruptResponse(true) .prefixPaddingMs(0L) .silenceDurationMs(0L) .threshold(0.0) - .type( - RealtimeTranscriptionSessionAudioInputTurnDetection.Type - .SERVER_VAD - ) .build() ) .build() diff --git a/openai-java-core/src/test/kotlin/com/openai/models/realtime/SessionCreatedEventTest.kt b/openai-java-core/src/test/kotlin/com/openai/models/realtime/SessionCreatedEventTest.kt index 3bad57f3..9d45e5bc 100644 --- a/openai-java-core/src/test/kotlin/com/openai/models/realtime/SessionCreatedEventTest.kt +++ b/openai-java-core/src/test/kotlin/com/openai/models/realtime/SessionCreatedEventTest.kt @@ -42,19 +42,13 @@ internal class SessionCreatedEventTest { .build() ) .turnDetection( - RealtimeAudioInputTurnDetection.builder() + RealtimeAudioInputTurnDetection.ServerVad.builder() .createResponse(true) - .eagerness( - RealtimeAudioInputTurnDetection.Eagerness.LOW - ) - .idleTimeoutMs(0L) + .idleTimeoutMs(5000L) .interruptResponse(true) .prefixPaddingMs(0L) .silenceDurationMs(0L) .threshold(0.0) - .type( - RealtimeAudioInputTurnDetection.Type.SERVER_VAD - ) .build() ) .build() @@ -135,19 +129,13 @@ internal class SessionCreatedEventTest { .build() ) .turnDetection( - RealtimeAudioInputTurnDetection.builder() + RealtimeAudioInputTurnDetection.ServerVad.builder() .createResponse(true) - .eagerness( - RealtimeAudioInputTurnDetection.Eagerness.LOW - ) - .idleTimeoutMs(0L) + .idleTimeoutMs(5000L) .interruptResponse(true) .prefixPaddingMs(0L) .silenceDurationMs(0L) .threshold(0.0) - .type( - RealtimeAudioInputTurnDetection.Type.SERVER_VAD - ) .build() ) .build() @@ -232,19 +220,13 @@ internal class SessionCreatedEventTest { .build() ) .turnDetection( - RealtimeAudioInputTurnDetection.builder() + RealtimeAudioInputTurnDetection.ServerVad.builder() .createResponse(true) - .eagerness( - RealtimeAudioInputTurnDetection.Eagerness.LOW - ) - .idleTimeoutMs(0L) + .idleTimeoutMs(5000L) .interruptResponse(true) .prefixPaddingMs(0L) .silenceDurationMs(0L) .threshold(0.0) - .type( - RealtimeAudioInputTurnDetection.Type.SERVER_VAD - ) .build() ) .build() diff --git a/openai-java-core/src/test/kotlin/com/openai/models/realtime/SessionUpdateEventTest.kt b/openai-java-core/src/test/kotlin/com/openai/models/realtime/SessionUpdateEventTest.kt index 28eb30c0..4e937841 100644 --- a/openai-java-core/src/test/kotlin/com/openai/models/realtime/SessionUpdateEventTest.kt +++ b/openai-java-core/src/test/kotlin/com/openai/models/realtime/SessionUpdateEventTest.kt @@ -41,19 +41,13 @@ internal class SessionUpdateEventTest { .build() ) .turnDetection( - RealtimeAudioInputTurnDetection.builder() + RealtimeAudioInputTurnDetection.ServerVad.builder() .createResponse(true) - .eagerness( - RealtimeAudioInputTurnDetection.Eagerness.LOW - ) - .idleTimeoutMs(0L) + .idleTimeoutMs(5000L) .interruptResponse(true) .prefixPaddingMs(0L) .silenceDurationMs(0L) .threshold(0.0) - .type( - RealtimeAudioInputTurnDetection.Type.SERVER_VAD - ) .build() ) .build() @@ -134,19 +128,13 @@ internal class SessionUpdateEventTest { .build() ) .turnDetection( - RealtimeAudioInputTurnDetection.builder() + RealtimeAudioInputTurnDetection.ServerVad.builder() .createResponse(true) - .eagerness( - RealtimeAudioInputTurnDetection.Eagerness.LOW - ) - .idleTimeoutMs(0L) + .idleTimeoutMs(5000L) .interruptResponse(true) .prefixPaddingMs(0L) .silenceDurationMs(0L) .threshold(0.0) - .type( - RealtimeAudioInputTurnDetection.Type.SERVER_VAD - ) .build() ) .build() @@ -231,19 +219,13 @@ internal class SessionUpdateEventTest { .build() ) .turnDetection( - RealtimeAudioInputTurnDetection.builder() + RealtimeAudioInputTurnDetection.ServerVad.builder() .createResponse(true) - .eagerness( - RealtimeAudioInputTurnDetection.Eagerness.LOW - ) - .idleTimeoutMs(0L) + .idleTimeoutMs(5000L) .interruptResponse(true) .prefixPaddingMs(0L) .silenceDurationMs(0L) .threshold(0.0) - .type( - RealtimeAudioInputTurnDetection.Type.SERVER_VAD - ) .build() ) .build() diff --git a/openai-java-core/src/test/kotlin/com/openai/models/realtime/SessionUpdatedEventTest.kt b/openai-java-core/src/test/kotlin/com/openai/models/realtime/SessionUpdatedEventTest.kt index cd599562..f8716bb4 100644 --- a/openai-java-core/src/test/kotlin/com/openai/models/realtime/SessionUpdatedEventTest.kt +++ b/openai-java-core/src/test/kotlin/com/openai/models/realtime/SessionUpdatedEventTest.kt @@ -42,19 +42,13 @@ internal class SessionUpdatedEventTest { .build() ) .turnDetection( - RealtimeAudioInputTurnDetection.builder() + RealtimeAudioInputTurnDetection.ServerVad.builder() .createResponse(true) - .eagerness( - RealtimeAudioInputTurnDetection.Eagerness.LOW - ) - .idleTimeoutMs(0L) + .idleTimeoutMs(5000L) .interruptResponse(true) .prefixPaddingMs(0L) .silenceDurationMs(0L) .threshold(0.0) - .type( - RealtimeAudioInputTurnDetection.Type.SERVER_VAD - ) .build() ) .build() @@ -135,19 +129,13 @@ internal class SessionUpdatedEventTest { .build() ) .turnDetection( - RealtimeAudioInputTurnDetection.builder() + RealtimeAudioInputTurnDetection.ServerVad.builder() .createResponse(true) - .eagerness( - RealtimeAudioInputTurnDetection.Eagerness.LOW - ) - .idleTimeoutMs(0L) + .idleTimeoutMs(5000L) .interruptResponse(true) .prefixPaddingMs(0L) .silenceDurationMs(0L) .threshold(0.0) - .type( - RealtimeAudioInputTurnDetection.Type.SERVER_VAD - ) .build() ) .build() @@ -232,19 +220,13 @@ internal class SessionUpdatedEventTest { .build() ) .turnDetection( - RealtimeAudioInputTurnDetection.builder() + RealtimeAudioInputTurnDetection.ServerVad.builder() .createResponse(true) - .eagerness( - RealtimeAudioInputTurnDetection.Eagerness.LOW - ) - .idleTimeoutMs(0L) + .idleTimeoutMs(5000L) .interruptResponse(true) .prefixPaddingMs(0L) .silenceDurationMs(0L) .threshold(0.0) - .type( - RealtimeAudioInputTurnDetection.Type.SERVER_VAD - ) .build() ) .build() diff --git a/openai-java-core/src/test/kotlin/com/openai/models/realtime/clientsecrets/ClientSecretCreateParamsTest.kt b/openai-java-core/src/test/kotlin/com/openai/models/realtime/clientsecrets/ClientSecretCreateParamsTest.kt index 55640c46..fcc0d80d 100644 --- a/openai-java-core/src/test/kotlin/com/openai/models/realtime/clientsecrets/ClientSecretCreateParamsTest.kt +++ b/openai-java-core/src/test/kotlin/com/openai/models/realtime/clientsecrets/ClientSecretCreateParamsTest.kt @@ -54,17 +54,13 @@ internal class ClientSecretCreateParamsTest { .build() ) .turnDetection( - RealtimeAudioInputTurnDetection.builder() + RealtimeAudioInputTurnDetection.ServerVad.builder() .createResponse(true) - .eagerness( - RealtimeAudioInputTurnDetection.Eagerness.LOW - ) - .idleTimeoutMs(0L) + .idleTimeoutMs(5000L) .interruptResponse(true) .prefixPaddingMs(0L) .silenceDurationMs(0L) .threshold(0.0) - .type(RealtimeAudioInputTurnDetection.Type.SERVER_VAD) .build() ) .build() @@ -152,19 +148,13 @@ internal class ClientSecretCreateParamsTest { .build() ) .turnDetection( - RealtimeAudioInputTurnDetection.builder() + RealtimeAudioInputTurnDetection.ServerVad.builder() .createResponse(true) - .eagerness( - RealtimeAudioInputTurnDetection.Eagerness.LOW - ) - .idleTimeoutMs(0L) + .idleTimeoutMs(5000L) .interruptResponse(true) .prefixPaddingMs(0L) .silenceDurationMs(0L) .threshold(0.0) - .type( - RealtimeAudioInputTurnDetection.Type.SERVER_VAD - ) .build() ) .build() @@ -253,19 +243,13 @@ internal class ClientSecretCreateParamsTest { .build() ) .turnDetection( - RealtimeAudioInputTurnDetection.builder() + RealtimeAudioInputTurnDetection.ServerVad.builder() .createResponse(true) - .eagerness( - RealtimeAudioInputTurnDetection.Eagerness.LOW - ) - .idleTimeoutMs(0L) + .idleTimeoutMs(5000L) .interruptResponse(true) .prefixPaddingMs(0L) .silenceDurationMs(0L) .threshold(0.0) - .type( - RealtimeAudioInputTurnDetection.Type.SERVER_VAD - ) .build() ) .build() diff --git a/openai-java-core/src/test/kotlin/com/openai/models/realtime/clientsecrets/ClientSecretCreateResponseTest.kt b/openai-java-core/src/test/kotlin/com/openai/models/realtime/clientsecrets/ClientSecretCreateResponseTest.kt index 2fad297b..39939f10 100644 --- a/openai-java-core/src/test/kotlin/com/openai/models/realtime/clientsecrets/ClientSecretCreateResponseTest.kt +++ b/openai-java-core/src/test/kotlin/com/openai/models/realtime/clientsecrets/ClientSecretCreateResponseTest.kt @@ -55,25 +55,14 @@ internal class ClientSecretCreateResponseTest { ) .turnDetection( RealtimeSessionCreateResponse.Audio.Input.TurnDetection + .ServerVad .builder() .createResponse(true) - .eagerness( - RealtimeSessionCreateResponse.Audio.Input - .TurnDetection - .Eagerness - .LOW - ) - .idleTimeoutMs(0L) + .idleTimeoutMs(5000L) .interruptResponse(true) .prefixPaddingMs(0L) .silenceDurationMs(0L) .threshold(0.0) - .type( - RealtimeSessionCreateResponse.Audio.Input - .TurnDetection - .Type - .SERVER_VAD - ) .build() ) .build() @@ -165,25 +154,14 @@ internal class ClientSecretCreateResponseTest { ) .turnDetection( RealtimeSessionCreateResponse.Audio.Input.TurnDetection + .ServerVad .builder() .createResponse(true) - .eagerness( - RealtimeSessionCreateResponse.Audio.Input - .TurnDetection - .Eagerness - .LOW - ) - .idleTimeoutMs(0L) + .idleTimeoutMs(5000L) .interruptResponse(true) .prefixPaddingMs(0L) .silenceDurationMs(0L) .threshold(0.0) - .type( - RealtimeSessionCreateResponse.Audio.Input - .TurnDetection - .Type - .SERVER_VAD - ) .build() ) .build() @@ -279,25 +257,14 @@ internal class ClientSecretCreateResponseTest { ) .turnDetection( RealtimeSessionCreateResponse.Audio.Input.TurnDetection + .ServerVad .builder() .createResponse(true) - .eagerness( - RealtimeSessionCreateResponse.Audio.Input - .TurnDetection - .Eagerness - .LOW - ) - .idleTimeoutMs(0L) + .idleTimeoutMs(5000L) .interruptResponse(true) .prefixPaddingMs(0L) .silenceDurationMs(0L) .threshold(0.0) - .type( - RealtimeSessionCreateResponse.Audio.Input - .TurnDetection - .Type - .SERVER_VAD - ) .build() ) .build() diff --git a/openai-java-core/src/test/kotlin/com/openai/models/realtime/clientsecrets/RealtimeSessionCreateResponseTest.kt b/openai-java-core/src/test/kotlin/com/openai/models/realtime/clientsecrets/RealtimeSessionCreateResponseTest.kt index 24d7d974..4d1eb4a1 100644 --- a/openai-java-core/src/test/kotlin/com/openai/models/realtime/clientsecrets/RealtimeSessionCreateResponseTest.kt +++ b/openai-java-core/src/test/kotlin/com/openai/models/realtime/clientsecrets/RealtimeSessionCreateResponseTest.kt @@ -50,23 +50,14 @@ internal class RealtimeSessionCreateResponseTest { ) .turnDetection( RealtimeSessionCreateResponse.Audio.Input.TurnDetection + .ServerVad .builder() .createResponse(true) - .eagerness( - RealtimeSessionCreateResponse.Audio.Input.TurnDetection - .Eagerness - .LOW - ) - .idleTimeoutMs(0L) + .idleTimeoutMs(5000L) .interruptResponse(true) .prefixPaddingMs(0L) .silenceDurationMs(0L) .threshold(0.0) - .type( - RealtimeSessionCreateResponse.Audio.Input.TurnDetection - .Type - .SERVER_VAD - ) .build() ) .build() @@ -142,22 +133,14 @@ internal class RealtimeSessionCreateResponseTest { .build() ) .turnDetection( - RealtimeSessionCreateResponse.Audio.Input.TurnDetection.builder() + RealtimeSessionCreateResponse.Audio.Input.TurnDetection.ServerVad + .builder() .createResponse(true) - .eagerness( - RealtimeSessionCreateResponse.Audio.Input.TurnDetection - .Eagerness - .LOW - ) - .idleTimeoutMs(0L) + .idleTimeoutMs(5000L) .interruptResponse(true) .prefixPaddingMs(0L) .silenceDurationMs(0L) .threshold(0.0) - .type( - RealtimeSessionCreateResponse.Audio.Input.TurnDetection.Type - .SERVER_VAD - ) .build() ) .build() @@ -253,23 +236,14 @@ internal class RealtimeSessionCreateResponseTest { ) .turnDetection( RealtimeSessionCreateResponse.Audio.Input.TurnDetection + .ServerVad .builder() .createResponse(true) - .eagerness( - RealtimeSessionCreateResponse.Audio.Input.TurnDetection - .Eagerness - .LOW - ) - .idleTimeoutMs(0L) + .idleTimeoutMs(5000L) .interruptResponse(true) .prefixPaddingMs(0L) .silenceDurationMs(0L) .threshold(0.0) - .type( - RealtimeSessionCreateResponse.Audio.Input.TurnDetection - .Type - .SERVER_VAD - ) .build() ) .build() diff --git a/openai-java-core/src/test/kotlin/com/openai/services/async/realtime/ClientSecretServiceAsyncTest.kt b/openai-java-core/src/test/kotlin/com/openai/services/async/realtime/ClientSecretServiceAsyncTest.kt index 9808a106..d3420b6a 100644 --- a/openai-java-core/src/test/kotlin/com/openai/services/async/realtime/ClientSecretServiceAsyncTest.kt +++ b/openai-java-core/src/test/kotlin/com/openai/services/async/realtime/ClientSecretServiceAsyncTest.kt @@ -69,21 +69,13 @@ internal class ClientSecretServiceAsyncTest { .build() ) .turnDetection( - RealtimeAudioInputTurnDetection.builder() + RealtimeAudioInputTurnDetection.ServerVad.builder() .createResponse(true) - .eagerness( - RealtimeAudioInputTurnDetection.Eagerness - .LOW - ) - .idleTimeoutMs(0L) + .idleTimeoutMs(5000L) .interruptResponse(true) .prefixPaddingMs(0L) .silenceDurationMs(0L) .threshold(0.0) - .type( - RealtimeAudioInputTurnDetection.Type - .SERVER_VAD - ) .build() ) .build() diff --git a/openai-java-core/src/test/kotlin/com/openai/services/blocking/realtime/ClientSecretServiceTest.kt b/openai-java-core/src/test/kotlin/com/openai/services/blocking/realtime/ClientSecretServiceTest.kt index 79a74843..a5f1ccec 100644 --- a/openai-java-core/src/test/kotlin/com/openai/services/blocking/realtime/ClientSecretServiceTest.kt +++ b/openai-java-core/src/test/kotlin/com/openai/services/blocking/realtime/ClientSecretServiceTest.kt @@ -69,21 +69,13 @@ internal class ClientSecretServiceTest { .build() ) .turnDetection( - RealtimeAudioInputTurnDetection.builder() + RealtimeAudioInputTurnDetection.ServerVad.builder() .createResponse(true) - .eagerness( - RealtimeAudioInputTurnDetection.Eagerness - .LOW - ) - .idleTimeoutMs(0L) + .idleTimeoutMs(5000L) .interruptResponse(true) .prefixPaddingMs(0L) .silenceDurationMs(0L) .threshold(0.0) - .type( - RealtimeAudioInputTurnDetection.Type - .SERVER_VAD - ) .build() ) .build() From 1aee297889eba57ec360de0aef389c14a31c9eba Mon Sep 17 00:00:00 2001 From: "stainless-app[bot]" <142633134+stainless-app[bot]@users.noreply.github.com> Date: Thu, 11 Sep 2025 18:29:48 +0000 Subject: [PATCH 2/7] codegen metadata --- .stats.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.stats.yml b/.stats.yml index 5388f246..e3897189 100644 --- a/.stats.yml +++ b/.stats.yml @@ -1,4 +1,4 @@ configured_endpoints: 118 -openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai%2Fopenai-c829f9e7f51d4946dae7b02eb37eb857b538a464cf54c7ced5eff1b1c93e07db.yml -openapi_spec_hash: 1b2eaba46b264bcec8831bc496543649 +openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai%2Fopenai-94b1e3cb0bdc616ff0c2f267c33dadd95f133b1f64e647aab6c64afb292b2793.yml +openapi_spec_hash: 2395319ac9befd59b6536ae7f9564a05 config_hash: 930dac3aa861344867e4ac84f037b5df From 8c63a5b4983039ace361ab08b89a0c227a88cf90 Mon Sep 17 00:00:00 2001 From: "stainless-app[bot]" <142633134+stainless-app[bot]@users.noreply.github.com> Date: Fri, 12 Sep 2025 20:11:49 +0000 Subject: [PATCH 3/7] chore(internal): remove redundant deserializer symbols --- .../TranscriptionCreateParams.kt | 56 ------------------- .../openai/models/images/ImageEditParams.kt | 56 ------------------- 2 files changed, 112 deletions(-) diff --git a/openai-java-core/src/main/kotlin/com/openai/models/audio/transcriptions/TranscriptionCreateParams.kt b/openai-java-core/src/main/kotlin/com/openai/models/audio/transcriptions/TranscriptionCreateParams.kt index 581c6cff..8c34a011 100644 --- a/openai-java-core/src/main/kotlin/com/openai/models/audio/transcriptions/TranscriptionCreateParams.kt +++ b/openai-java-core/src/main/kotlin/com/openai/models/audio/transcriptions/TranscriptionCreateParams.kt @@ -7,13 +7,8 @@ import com.fasterxml.jackson.annotation.JsonAnySetter import com.fasterxml.jackson.annotation.JsonCreator import com.fasterxml.jackson.annotation.JsonProperty import com.fasterxml.jackson.core.JsonGenerator -import com.fasterxml.jackson.core.ObjectCodec -import com.fasterxml.jackson.databind.JsonNode import com.fasterxml.jackson.databind.SerializerProvider -import com.fasterxml.jackson.databind.annotation.JsonDeserialize import com.fasterxml.jackson.databind.annotation.JsonSerialize -import com.fasterxml.jackson.module.kotlin.jacksonTypeRef -import com.openai.core.BaseDeserializer import com.openai.core.BaseSerializer import com.openai.core.Enum import com.openai.core.ExcludeMissing @@ -21,7 +16,6 @@ import com.openai.core.JsonField import com.openai.core.JsonValue import com.openai.core.MultipartField import com.openai.core.Params -import com.openai.core.allMaxBy import com.openai.core.checkKnown import com.openai.core.checkRequired import com.openai.core.getOrThrow @@ -1220,7 +1214,6 @@ private constructor( * object can be provided to tweak VAD detection parameters manually. If unset, the audio is * transcribed as a single block. */ - @JsonDeserialize(using = ChunkingStrategy.Deserializer::class) @JsonSerialize(using = ChunkingStrategy.Serializer::class) class ChunkingStrategy private constructor( @@ -1285,25 +1278,6 @@ private constructor( false } - /** - * Returns a score indicating how many valid values are contained in this object - * recursively. - * - * Used for best match union deserialization. - */ - @JvmSynthetic - internal fun validity(): Int = - accept( - object : Visitor { - override fun visitAuto(auto: JsonValue) = - auto.let { if (it == JsonValue.from("auto")) 1 else 0 } - - override fun visitVadConfig(vadConfig: VadConfig) = 1 - - override fun unknown(json: JsonValue?) = 0 - } - ) - override fun equals(other: Any?): Boolean { if (this === other) { return true @@ -1361,36 +1335,6 @@ private constructor( } } - internal class Deserializer : BaseDeserializer(ChunkingStrategy::class) { - - override fun ObjectCodec.deserialize(node: JsonNode): ChunkingStrategy { - val json = JsonValue.fromJsonNode(node) - - val bestMatches = - sequenceOf( - tryDeserialize(node, jacksonTypeRef()) - ?.let { ChunkingStrategy(auto = it, _json = json) } - ?.takeIf { it.isValid() }, - tryDeserialize(node, jacksonTypeRef())?.let { - ChunkingStrategy(vadConfig = it, _json = json) - }, - ) - .filterNotNull() - .allMaxBy { it.validity() } - .toList() - return when (bestMatches.size) { - // This can happen if what we're deserializing is completely incompatible with - // all the possible variants (e.g. deserializing from array). - 0 -> ChunkingStrategy(_json = json) - 1 -> bestMatches.single() - // If there's more than one match with the highest validity, then use the first - // completely valid match, or simply the first match if none are completely - // valid. - else -> bestMatches.firstOrNull { it.isValid() } ?: bestMatches.first() - } - } - } - internal class Serializer : BaseSerializer(ChunkingStrategy::class) { override fun serialize( diff --git a/openai-java-core/src/main/kotlin/com/openai/models/images/ImageEditParams.kt b/openai-java-core/src/main/kotlin/com/openai/models/images/ImageEditParams.kt index 1ff743f1..b90acd9e 100644 --- a/openai-java-core/src/main/kotlin/com/openai/models/images/ImageEditParams.kt +++ b/openai-java-core/src/main/kotlin/com/openai/models/images/ImageEditParams.kt @@ -7,13 +7,8 @@ import com.fasterxml.jackson.annotation.JsonAnySetter import com.fasterxml.jackson.annotation.JsonCreator import com.fasterxml.jackson.annotation.JsonProperty import com.fasterxml.jackson.core.JsonGenerator -import com.fasterxml.jackson.core.ObjectCodec -import com.fasterxml.jackson.databind.JsonNode import com.fasterxml.jackson.databind.SerializerProvider -import com.fasterxml.jackson.databind.annotation.JsonDeserialize import com.fasterxml.jackson.databind.annotation.JsonSerialize -import com.fasterxml.jackson.module.kotlin.jacksonTypeRef -import com.openai.core.BaseDeserializer import com.openai.core.BaseSerializer import com.openai.core.Enum import com.openai.core.ExcludeMissing @@ -21,7 +16,6 @@ import com.openai.core.JsonField import com.openai.core.JsonValue import com.openai.core.MultipartField import com.openai.core.Params -import com.openai.core.allMaxBy import com.openai.core.checkRequired import com.openai.core.getOrThrow import com.openai.core.http.Headers @@ -1729,7 +1723,6 @@ private constructor( * For `dall-e-2`, you can only provide one image, and it should be a square `png` file less * than 4MB. */ - @JsonDeserialize(using = Image.Deserializer::class) @JsonSerialize(using = Image.Serializer::class) class Image private constructor( @@ -1784,25 +1777,6 @@ private constructor( false } - /** - * Returns a score indicating how many valid values are contained in this object - * recursively. - * - * Used for best match union deserialization. - */ - @JvmSynthetic - internal fun validity(): Int = - accept( - object : Visitor { - override fun visitInputStream(inputStream: InputStream) = 1 - - override fun visitInputStreams(inputStreams: List) = - inputStreams.size - - override fun unknown(json: JsonValue?) = 0 - } - ) - override fun equals(other: Any?): Boolean { if (this === other) { return true @@ -1855,36 +1829,6 @@ private constructor( } } - internal class Deserializer : BaseDeserializer(Image::class) { - - override fun ObjectCodec.deserialize(node: JsonNode): Image { - val json = JsonValue.fromJsonNode(node) - - val bestMatches = - sequenceOf( - tryDeserialize(node, jacksonTypeRef())?.let { - Image(inputStream = it, _json = json) - }, - tryDeserialize(node, jacksonTypeRef>())?.let { - Image(inputStreams = it, _json = json) - }, - ) - .filterNotNull() - .allMaxBy { it.validity() } - .toList() - return when (bestMatches.size) { - // This can happen if what we're deserializing is completely incompatible with - // all the possible variants (e.g. deserializing from object). - 0 -> Image(_json = json) - 1 -> bestMatches.single() - // If there's more than one match with the highest validity, then use the first - // completely valid match, or simply the first match if none are completely - // valid. - else -> bestMatches.firstOrNull { it.isValid() } ?: bestMatches.first() - } - } - } - internal class Serializer : BaseSerializer(Image::class) { override fun serialize( From be0acb7a7779c4df1bf3e4678c40f4bd6d2b50d6 Mon Sep 17 00:00:00 2001 From: "stainless-app[bot]" <142633134+stainless-app[bot]@users.noreply.github.com> Date: Thu, 11 Sep 2025 21:37:35 +0000 Subject: [PATCH 4/7] chore: improve formatter performance --- scripts/fast-format | 39 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) create mode 100755 scripts/fast-format diff --git a/scripts/fast-format b/scripts/fast-format new file mode 100755 index 00000000..2aa524fd --- /dev/null +++ b/scripts/fast-format @@ -0,0 +1,39 @@ +#!/usr/bin/env bash + +set -euo pipefail + +cd "$(dirname "$0")/.." + +if [ $# -eq 0 ]; then + echo "Usage: $0 [additional-formatter-args...]" + echo "The file should contain one file path per line" + exit 1 +fi + +FILE_LIST="$1" + +if [ ! -f "$FILE_LIST" ]; then + echo "Error: File '$FILE_LIST' not found" + exit 1 +fi + +if command -v ktfmt-fast-format &> /dev/null; then + echo "Error: ktfmt-fast-format not found" + exit 1 +fi + +# Process Kotlin files +kt_files=$(grep -E '\.kt$' "$FILE_LIST" | grep -v './buildSrc/build/') +kt_files=$(grep -E '\.kt$' "$FILE_LIST" | grep -v './buildSrc/build/') +echo "==> Found $(echo "$kt_files" | wc -l) Kotlin files:" + +if [[ -n "$kt_files" ]]; then + echo "==> will format Kotlin files" + echo "$kt_files" | tr '\n' '\0' | xargs -0 ktfmt --kotlinlang-style "$@" +else + echo "No Kotlin files to format -- expected outcome during incremental formatting" +fi + +# TODO(mbudayr): support palantir-java-format +# Process Java files +# grep -E '\.java$' "$FILE_LIST" | grep -v './buildSrc/build/' | tr '\n' '\0' | xargs -0 -r palantir-java-format --palantir --replace "$@" From ab870095a749e00bcc2e864476553f782d098525 Mon Sep 17 00:00:00 2001 From: "stainless-app[bot]" <142633134+stainless-app[bot]@users.noreply.github.com> Date: Fri, 12 Sep 2025 05:43:35 +0000 Subject: [PATCH 5/7] chore(internal): codegen related update --- scripts/fast-format | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/fast-format b/scripts/fast-format index 2aa524fd..c8b60da2 100755 --- a/scripts/fast-format +++ b/scripts/fast-format @@ -17,7 +17,7 @@ if [ ! -f "$FILE_LIST" ]; then exit 1 fi -if command -v ktfmt-fast-format &> /dev/null; then +if ! command -v ktfmt-fast-format &> /dev/null; then echo "Error: ktfmt-fast-format not found" exit 1 fi From 18a0e645fd3ea28957e55633f2a271287b56c312 Mon Sep 17 00:00:00 2001 From: "stainless-app[bot]" <142633134+stainless-app[bot]@users.noreply.github.com> Date: Fri, 12 Sep 2025 06:33:33 +0000 Subject: [PATCH 6/7] chore(internal): codegen related update --- scripts/fast-format | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/fast-format b/scripts/fast-format index c8b60da2..e16bfc56 100755 --- a/scripts/fast-format +++ b/scripts/fast-format @@ -29,7 +29,7 @@ echo "==> Found $(echo "$kt_files" | wc -l) Kotlin files:" if [[ -n "$kt_files" ]]; then echo "==> will format Kotlin files" - echo "$kt_files" | tr '\n' '\0' | xargs -0 ktfmt --kotlinlang-style "$@" + echo "$kt_files" | tr '\n' '\0' | xargs -0 ktfmt-fast-format --kotlinlang-style "$@" else echo "No Kotlin files to format -- expected outcome during incremental formatting" fi From a0c1a1bf5a775c57bf927a0ab5391ba17cccb014 Mon Sep 17 00:00:00 2001 From: "stainless-app[bot]" <142633134+stainless-app[bot]@users.noreply.github.com> Date: Fri, 12 Sep 2025 20:12:21 +0000 Subject: [PATCH 7/7] release: 3.5.2 --- .release-please-manifest.json | 2 +- CHANGELOG.md | 12 ++++++++++++ README.md | 14 +++++++------- build.gradle.kts | 2 +- 4 files changed, 21 insertions(+), 9 deletions(-) diff --git a/.release-please-manifest.json b/.release-please-manifest.json index 3549461d..c6a6955c 100644 --- a/.release-please-manifest.json +++ b/.release-please-manifest.json @@ -1,3 +1,3 @@ { - ".": "3.5.1" + ".": "3.5.2" } \ No newline at end of file diff --git a/CHANGELOG.md b/CHANGELOG.md index ebbf3c40..964ceae4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,17 @@ # Changelog +## 3.5.2 (2025-09-12) + +Full Changelog: [v3.5.1...v3.5.2](https://github.com/openai/openai-java/compare/v3.5.1...v3.5.2) + +### Chores + +* **api:** Minor docs and type updates for realtime ([63ad148](https://github.com/openai/openai-java/commit/63ad148f1f998de1c9d66021037d7d5e04615022)) +* improve formatter performance ([be0acb7](https://github.com/openai/openai-java/commit/be0acb7a7779c4df1bf3e4678c40f4bd6d2b50d6)) +* **internal:** codegen related update ([18a0e64](https://github.com/openai/openai-java/commit/18a0e645fd3ea28957e55633f2a271287b56c312)) +* **internal:** codegen related update ([ab87009](https://github.com/openai/openai-java/commit/ab870095a749e00bcc2e864476553f782d098525)) +* **internal:** remove redundant deserializer symbols ([8c63a5b](https://github.com/openai/openai-java/commit/8c63a5b4983039ace361ab08b89a0c227a88cf90)) + ## 3.5.1 (2025-09-10) Full Changelog: [v3.5.0...v3.5.1](https://github.com/openai/openai-java/compare/v3.5.0...v3.5.1) diff --git a/README.md b/README.md index 285497ba..c356ae81 100644 --- a/README.md +++ b/README.md @@ -2,8 +2,8 @@ -[![Maven Central](https://img.shields.io/maven-central/v/com.openai/openai-java)](https://central.sonatype.com/artifact/com.openai/openai-java/3.5.1) -[![javadoc](https://javadoc.io/badge2/com.openai/openai-java/3.5.1/javadoc.svg)](https://javadoc.io/doc/com.openai/openai-java/3.5.1) +[![Maven Central](https://img.shields.io/maven-central/v/com.openai/openai-java)](https://central.sonatype.com/artifact/com.openai/openai-java/3.5.2) +[![javadoc](https://javadoc.io/badge2/com.openai/openai-java/3.5.2/javadoc.svg)](https://javadoc.io/doc/com.openai/openai-java/3.5.2) @@ -11,7 +11,7 @@ The OpenAI Java SDK provides convenient access to the [OpenAI REST API](https:// -The REST API documentation can be found on [platform.openai.com](https://platform.openai.com/docs). Javadocs are available on [javadoc.io](https://javadoc.io/doc/com.openai/openai-java/3.5.1). +The REST API documentation can be found on [platform.openai.com](https://platform.openai.com/docs). Javadocs are available on [javadoc.io](https://javadoc.io/doc/com.openai/openai-java/3.5.2). @@ -24,7 +24,7 @@ The REST API documentation can be found on [platform.openai.com](https://platfor ### Gradle ```kotlin -implementation("com.openai:openai-java:3.5.1") +implementation("com.openai:openai-java:3.5.2") ``` ### Maven @@ -33,7 +33,7 @@ implementation("com.openai:openai-java:3.5.1") com.openai openai-java - 3.5.1 + 3.5.2 ``` @@ -1342,7 +1342,7 @@ If you're using Spring Boot, then you can use the SDK's [Spring Boot starter](ht #### Gradle ```kotlin -implementation("com.openai:openai-java-spring-boot-starter:3.5.1") +implementation("com.openai:openai-java-spring-boot-starter:3.5.2") ``` #### Maven @@ -1351,7 +1351,7 @@ implementation("com.openai:openai-java-spring-boot-starter:3.5.1") com.openai openai-java-spring-boot-starter - 3.5.1 + 3.5.2 ``` diff --git a/build.gradle.kts b/build.gradle.kts index 60955f4e..f1e418a7 100644 --- a/build.gradle.kts +++ b/build.gradle.kts @@ -8,7 +8,7 @@ repositories { allprojects { group = "com.openai" - version = "3.5.1" // x-release-please-version + version = "3.5.2" // x-release-please-version } subprojects {