@@ -54,28 +54,37 @@ def prewarm(proc: JobProcess):
5454
5555
5656async def entrypoint (ctx : JobContext ):
57- # each log entry will include these fields
57+ # Logging setup
58+ # Add any other context you want in all log entries here
5859 ctx .log_context_fields = {
5960 "room" : ctx .room .name ,
6061 }
6162
6263 # Set up a voice AI pipeline using OpenAI, Cartesia, Deepgram, and the LiveKit turn detector
6364 session = AgentSession (
64- # any combination of STT, LLM, TTS, or realtime API can be used
65+ # A Large Language Model (LLM) is your agent's brain, processing user input and generating a response
66+ # See all providers at https://docs.livekit.io/agents/integrations/llm/
6567 llm = openai .LLM (model = "gpt-4o-mini" ),
68+ # Speech-to-text (STT) is your agent's ears, turning the user's speech into text that the LLM can understand
69+ # See all providers at https://docs.livekit.io/agents/integrations/stt/
6670 stt = deepgram .STT (model = "nova-3" , language = "multi" ),
71+ # Text-to-speech (TTS) is your agent's voice, turning the LLM's text into speech that the user can hear
72+ # See all providers at https://docs.livekit.io/agents/integrations/tts/
6773 tts = cartesia .TTS (voice = "6f84f4b8-58a2-430c-8c79-688dad597532" ),
68- # use LiveKit's turn detection model
74+ # VAD and turn detection are used to determine when the user is speaking and when the agent should respond
75+ # See more at https://docs.livekit.io/agents/build/turns
6976 turn_detection = MultilingualModel (),
7077 vad = ctx .proc .userdata ["vad" ],
7178 )
7279
73- # To use the OpenAI Realtime API , use the following session setup instead:
80+ # To use a realtime model instead of a voice pipeline , use the following session setup instead:
7481 # session = AgentSession(
82+ # # See all providers at https://docs.livekit.io/agents/integrations/realtime/
7583 # llm=openai.realtime.RealtimeModel()
7684 # )
7785
78- # log metrics as they are emitted, and total usage after session is over
86+ # Metrics collection, to measure pipeline performance
87+ # For more information, see https://docs.livekit.io/agents/build/metrics/
7988 usage_collector = metrics .UsageCollector ()
8089
8190 @session .on ("metrics_collected" )
@@ -87,9 +96,17 @@ async def log_usage():
8796 summary = usage_collector .get_summary ()
8897 logger .info (f"Usage: { summary } " )
8998
90- # shutdown callbacks are triggered when the session is over
9199 ctx .add_shutdown_callback (log_usage )
92100
101+ # # Add a virtual avatar to the session, if desired
102+ # # For other providers, see https://docs.livekit.io/agents/integrations/avatar/
103+ # avatar = hedra.AvatarSession(
104+ # avatar_id="...", # See https://docs.livekit.io/agents/integrations/avatar/hedra
105+ # )
106+ # # Start the avatar and wait for it to join
107+ # await avatar.start(session, room=ctx.room)
108+
109+ # Start the session, which initializes the voice pipeline and warms up the models
93110 await session .start (
94111 agent = Assistant (),
95112 room = ctx .room ,
@@ -99,10 +116,9 @@ async def log_usage():
99116 # - For telephony applications, use `BVCTelephony` for best results
100117 noise_cancellation = noise_cancellation .BVC (),
101118 ),
102- room_output_options = RoomOutputOptions (transcription_enabled = True ),
103119 )
104120
105- # join the room when agent is ready
121+ # Join the room and connect to the user
106122 await ctx .connect ()
107123
108124
0 commit comments