Skip to content

Commit aaecb0e

Browse files
committed
Add some comments
1 parent 2081639 commit aaecb0e

File tree

1 file changed

+24
-8
lines changed

1 file changed

+24
-8
lines changed

src/agent.py

Lines changed: 24 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -54,28 +54,37 @@ def prewarm(proc: JobProcess):
5454

5555

5656
async def entrypoint(ctx: JobContext):
57-
# each log entry will include these fields
57+
# Logging setup
58+
# Add any other context you want in all log entries here
5859
ctx.log_context_fields = {
5960
"room": ctx.room.name,
6061
}
6162

6263
# Set up a voice AI pipeline using OpenAI, Cartesia, Deepgram, and the LiveKit turn detector
6364
session = AgentSession(
64-
# any combination of STT, LLM, TTS, or realtime API can be used
65+
# A Large Language Model (LLM) is your agent's brain, processing user input and generating a response
66+
# See all providers at https://docs.livekit.io/agents/integrations/llm/
6567
llm=openai.LLM(model="gpt-4o-mini"),
68+
# Speech-to-text (STT) is your agent's ears, turning the user's speech into text that the LLM can understand
69+
# See all providers at https://docs.livekit.io/agents/integrations/stt/
6670
stt=deepgram.STT(model="nova-3", language="multi"),
71+
# Text-to-speech (TTS) is your agent's voice, turning the LLM's text into speech that the user can hear
72+
# See all providers at https://docs.livekit.io/agents/integrations/tts/
6773
tts=cartesia.TTS(voice="6f84f4b8-58a2-430c-8c79-688dad597532"),
68-
# use LiveKit's turn detection model
74+
# VAD and turn detection are used to determine when the user is speaking and when the agent should respond
75+
# See more at https://docs.livekit.io/agents/build/turns
6976
turn_detection=MultilingualModel(),
7077
vad=ctx.proc.userdata["vad"],
7178
)
7279

73-
# To use the OpenAI Realtime API, use the following session setup instead:
80+
# To use a realtime model instead of a voice pipeline, use the following session setup instead:
7481
# session = AgentSession(
82+
# # See all providers at https://docs.livekit.io/agents/integrations/realtime/
7583
# llm=openai.realtime.RealtimeModel()
7684
# )
7785

78-
# log metrics as they are emitted, and total usage after session is over
86+
# Metrics collection, to measure pipeline performance
87+
# For more information, see https://docs.livekit.io/agents/build/metrics/
7988
usage_collector = metrics.UsageCollector()
8089

8190
@session.on("metrics_collected")
@@ -87,9 +96,17 @@ async def log_usage():
8796
summary = usage_collector.get_summary()
8897
logger.info(f"Usage: {summary}")
8998

90-
# shutdown callbacks are triggered when the session is over
9199
ctx.add_shutdown_callback(log_usage)
92100

101+
# # Add a virtual avatar to the session, if desired
102+
# # For other providers, see https://docs.livekit.io/agents/integrations/avatar/
103+
# avatar = hedra.AvatarSession(
104+
# avatar_id="...", # See https://docs.livekit.io/agents/integrations/avatar/hedra
105+
# )
106+
# # Start the avatar and wait for it to join
107+
# await avatar.start(session, room=ctx.room)
108+
109+
# Start the session, which initializes the voice pipeline and warms up the models
93110
await session.start(
94111
agent=Assistant(),
95112
room=ctx.room,
@@ -99,10 +116,9 @@ async def log_usage():
99116
# - For telephony applications, use `BVCTelephony` for best results
100117
noise_cancellation=noise_cancellation.BVC(),
101118
),
102-
room_output_options=RoomOutputOptions(transcription_enabled=True),
103119
)
104120

105-
# join the room when agent is ready
121+
# Join the room and connect to the user
106122
await ctx.connect()
107123

108124

0 commit comments

Comments
 (0)