diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 0000000..e703a10 --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1,110 @@ +# CLAUDE.md + +This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository. + +## Project Overview + +This is an ElevenLabs MCP (Model Context Protocol) server that provides access to ElevenLabs' text-to-speech, speech-to-text, voice cloning, and conversational AI capabilities through MCP tools. + +## Development Commands + +```bash +# Setup development environment +uv venv +source .venv/bin/activate +uv pip install -e ".[dev]" + +# Run tests with coverage +./scripts/test.sh +./scripts/test.sh --verbose --fail-fast # For quick feedback during development + +# Run development server with MCP Inspector +./scripts/dev.sh +# Or directly: mcp dev elevenlabs_mcp/server.py + +# Build package +./scripts/build.sh + +# Deploy to PyPI (requires PyPI credentials) +./scripts/deploy.sh +``` + +## Architecture + +### Core Components + +1. **`server.py`** - Main MCP server implementation + - Contains all 24 MCP tools decorated with `@mcp.tool` + - Each tool that makes API calls includes cost warnings + - Tools return `TextContent` with operation results + +2. **`utils.py`** - Shared utilities + - `make_output_path()` - Handles base path configuration + - `make_output_file()` - Generates timestamped output filenames + - `handle_input_file()` - Validates and resolves input file paths + - `find_similar_files()` - Fuzzy file matching for better UX + +3. **`convai.py`** - Conversational AI configuration builders + - `create_conversation_config()` - Builds agent conversation settings + - `create_platform_settings()` - Configures privacy and limits + +4. **`model.py`** - Pydantic models for type safety + +### Key Design Patterns + +**Cost-Aware API Tools**: Every tool that calls ElevenLabs API has a cost warning in its description: +```python +@mcp.tool( + description="""... +   COST WARNING: This tool makes an API call to ElevenLabs which may incur costs. Only use when explicitly requested by the user. + """ +) +``` + +**File Path Handling**: All file operations respect the `ELEVENLABS_MCP_BASE_PATH` environment variable: +- If set: Files saved to specified directory +- If not set: Files saved to user's Desktop +- Input files can be absolute or relative paths + +**Error Handling**: Custom `ElevenLabsMcpError` exception with helpful messages: +- File not found ’ Suggests similar files if available +- Permission errors ’ Clear guidance on file access issues + +### Environment Configuration + +Required environment variables: +- `ELEVENLABS_API_KEY` - Your ElevenLabs API key (required) +- `ELEVENLABS_MCP_BASE_PATH` - Base directory for file operations (optional) + +### Adding New Tools + +1. Add tool function in `server.py` with `@mcp.tool` decorator +2. Include cost warning if it makes API calls +3. Use consistent parameter patterns (see existing tools) +4. Return `TextContent` with clear success/error messages +5. Handle file operations through utility functions + +### Testing + +- Unit tests focus on utilities and file operations +- No integration tests for API calls (to avoid costs) +- Run tests before committing: `./scripts/test.sh` +- Aim for high coverage on utility functions + +### Common Development Tasks + +**Adding a new conversational AI feature**: +1. Check if ElevenLabs SDK supports it +2. Add/update configuration in `convai.py` if needed +3. Create tool in `server.py` following existing patterns +4. Test with dev server: `./scripts/dev.sh` + +**Debugging file operations**: +- Set `ELEVENLABS_MCP_BASE_PATH` to a test directory +- Check file permissions with `handle_input_file()` +- Use `make_output_file()` for consistent naming + +**Updating agent configurations**: +- Agent configs are immutable (Pydantic frozen models) +- Create new configs rather than modifying existing ones +- Use `create_conversation_config()` for proper structure \ No newline at end of file diff --git a/elevenlabs_mcp/convai.py b/elevenlabs_mcp/convai.py index bd3f3af..0e69f47 100644 --- a/elevenlabs_mcp/convai.py +++ b/elevenlabs_mcp/convai.py @@ -13,6 +13,7 @@ def create_conversation_config( similarity_boost: float, turn_timeout: int, max_duration_seconds: int, + tools: list | None = None, ) -> dict: return { "agent": { @@ -20,7 +21,7 @@ def create_conversation_config( "prompt": { "prompt": system_prompt, "llm": llm, - "tools": [{"type": "system", "name": "end_call", "description": ""}], + "tools": tools if tools is not None else [{"type": "system", "name": "end_call", "description": ""}], "knowledge_base": [], "temperature": temperature, **({"max_tokens": max_tokens} if max_tokens else {}), diff --git a/elevenlabs_mcp/server.py b/elevenlabs_mcp/server.py index 3cd0606..9464b21 100644 --- a/elevenlabs_mcp/server.py +++ b/elevenlabs_mcp/server.py @@ -21,6 +21,11 @@ from mcp.server.fastmcp import FastMCP from mcp.types import TextContent from elevenlabs.client import ElevenLabs +from elevenlabs.types import ( + PromptAgentInputToolsItem_System, + SystemToolConfigInputParams_TransferToAgent, + AgentTransfer, +) from elevenlabs_mcp.model import McpVoice, McpModel, McpLanguage from elevenlabs_mcp.utils import ( make_error, @@ -419,6 +424,9 @@ def check_subscription() -> TextContent: max_duration_seconds: Maximum duration of a conversation in seconds. Defaults to 600 seconds (10 minutes). record_voice: Whether to record the agent's voice. retention_days: Number of days to retain the agent's data. + transfer_rules: List of transfer rules for agent-to-agent transfers. Each rule should contain: + - agent_id: The ID of the agent to transfer to + - condition: Natural language condition describing when to transfer """ ) def create_agent( @@ -439,7 +447,51 @@ def create_agent( max_duration_seconds: int = 300, record_voice: bool = True, retention_days: int = 730, + transfer_rules: list[dict] | None = None, ) -> TextContent: + # Create tools list + tools = [{"type": "system", "name": "end_call", "description": ""}] + + # Add transfer_to_agent tool if transfer rules are provided + if transfer_rules: + # Convert transfer rules to AgentTransfer objects + agent_transfers = [ + AgentTransfer( + agent_id=rule["agent_id"], + condition=rule["condition"] + ) + for rule in transfer_rules + ] + + # Create the transfer tool + transfer_tool = PromptAgentInputToolsItem_System( + type="system", + name="transfer_to_agent", + description="Transfer the user to a specialized agent based on their request.", + params=SystemToolConfigInputParams_TransferToAgent( + transfers=agent_transfers + ) + ) + + # Convert to dict format for the API + transfer_tool_dict = { + "type": "system", + "name": "transfer_to_agent", + "description": transfer_tool.description, + "params": { + "system_tool_type": "transfer_to_agent", + "transfers": [ + { + "agent_id": transfer.agent_id, + "condition": transfer.condition + } + for transfer in agent_transfers + ] + } + } + + tools.append(transfer_tool_dict) + conversation_config = create_conversation_config( language=language, system_prompt=system_prompt, @@ -455,6 +507,7 @@ def create_agent( similarity_boost=similarity_boost, turn_timeout=turn_timeout, max_duration_seconds=max_duration_seconds, + tools=tools, ) platform_settings = create_platform_settings( @@ -468,9 +521,13 @@ def create_agent( platform_settings=platform_settings, ) + transfer_info = "" + if transfer_rules: + transfer_info = f", Transfer Rules: {len(transfer_rules)} agent(s) configured for transfers" + return TextContent( type="text", - text=f"""Agent created successfully: Name: {name}, Agent ID: {response.agent_id}, System Prompt: {system_prompt}, Voice ID: {voice_id or "Default"}, Language: {language}, LLM: {llm}, You can use this agent ID for future interactions with the agent.""", + text=f"""Agent created successfully: Name: {name}, Agent ID: {response.agent_id}, System Prompt: {system_prompt}, Voice ID: {voice_id or "Default"}, Language: {language}, LLM: {llm}{transfer_info}, You can use this agent ID for future interactions with the agent.""", ) @@ -502,6 +559,8 @@ def add_knowledge_base_to_agent( if len(provided_params) > 1: make_error("Must provide exactly one of: URL, file, or text") + is_file_based = url is None + if url is not None: response = client.conversational_ai.knowledge_base.documents.create_from_url( name=knowledge_base_name, @@ -528,7 +587,7 @@ def add_knowledge_base_to_agent( agent = client.conversational_ai.agents.get(agent_id=agent_id) agent.conversation_config.agent.prompt.knowledge_base.append( KnowledgeBaseLocator( - type="file" if file else "url", + type="file" if is_file_based else "url", name=knowledge_base_name, id=response.id, ) @@ -583,6 +642,141 @@ def get_agent(agent_id: str) -> TextContent: ) +@mcp.tool(description="Get the conversation configuration of a specific conversational AI agent") +def get_agent_config(agent_id: str) -> TextContent: + """Get the conversation configuration of a specific conversational AI agent. + + Args: + agent_id: The ID of the agent to retrieve configuration for + + Returns: + TextContent with the full conversation configuration in JSON format + """ + import json + + response = client.conversational_ai.agents.get(agent_id=agent_id) + + # Extract the conversation config + config = response.conversation_config + + # Convert to dict for JSON serialization + config_dict = config.model_dump() if hasattr(config, 'model_dump') else config.__dict__ + + return TextContent( + type="text", + text=json.dumps(config_dict, indent=2) + ) + + +@mcp.tool(description="Update an existing conversational AI agent's configuration, including built-in tools") +def update_agent_with_tools( + agent_id: str, + enable_transfer_to_agent: bool = False, + transfer_rules: list[dict] | None = None, + enable_language_detection: bool = False, + enable_end_call: bool = True, +) -> TextContent: + """Update an existing agent's built-in tools configuration. + + Args: + agent_id: The ID of the agent to update + enable_transfer_to_agent: Whether to enable the transfer_to_agent tool + transfer_rules: List of transfer rules if enabling transfer_to_agent + enable_language_detection: Whether to enable language detection + enable_end_call: Whether to enable the end_call tool (default True) + + Returns: + TextContent with update status + """ + # Get the current agent configuration + agent = client.conversational_ai.agents.get(agent_id=agent_id) + + # Extract current conversation config and convert to dict + current_config = agent.conversation_config + config_dict = current_config.model_dump() if hasattr(current_config, 'model_dump') else current_config.__dict__ + + # Build the tools array + tools = [] + if enable_end_call: + tools.append({ + "type": "system", + "name": "end_call", + "description": "", + "response_timeout_secs": 20, + "params": {"system_tool_type": "end_call"} + }) + + if enable_transfer_to_agent and transfer_rules: + tools.append({ + "type": "system", + "name": "transfer_to_agent", + "description": "", + "response_timeout_secs": 20, + "params": { + "system_tool_type": "transfer_to_agent", + "transfers": [ + { + "agent_id": rule["agent_id"], + "condition": rule["condition"] + } + for rule in transfer_rules + ] + } + }) + + if enable_language_detection: + tools.append({ + "type": "system", + "name": "language_detection", + "description": "", + "response_timeout_secs": 20, + "params": {"system_tool_type": "language_detection"} + }) + + # Update the tools in the config dict + config_dict['agent']['prompt']['tools'] = tools + + # Create a new conversation config with the updated data + # We'll use the create_conversation_config helper to ensure proper structure + new_config = create_conversation_config( + language=config_dict['agent']['language'], + system_prompt=config_dict['agent']['prompt']['prompt'], + llm=config_dict['agent']['prompt']['llm'], + first_message=config_dict['agent']['first_message'], + temperature=config_dict['agent']['prompt']['temperature'], + max_tokens=config_dict['agent']['prompt'].get('max_tokens', -1), + asr_quality=config_dict['asr'].get('quality', 'high'), + voice_id=config_dict['tts']['voice_id'], + model_id=config_dict['tts']['model_id'], + optimize_streaming_latency=config_dict['tts'].get('optimize_streaming_latency', 3), + stability=config_dict['tts'].get('stability', 0.5), + similarity_boost=config_dict['tts'].get('similarity_boost', 0.8), + turn_timeout=config_dict['turn'].get('turn_timeout', 7), + max_duration_seconds=config_dict['conversation'].get('max_duration_seconds', 300), + tools=tools, + ) + + # Update the agent + client.conversational_ai.agents.update( + agent_id=agent_id, + conversation_config=new_config + ) + + # Prepare status message + enabled_tools = [] + if enable_end_call: + enabled_tools.append("end_call") + if enable_transfer_to_agent: + enabled_tools.append(f"transfer_to_agent ({len(transfer_rules)} rules)") + if enable_language_detection: + enabled_tools.append("language_detection") + + return TextContent( + type="text", + text=f"Agent {agent_id} updated successfully. Enabled built-in tools: {', '.join(enabled_tools) if enabled_tools else 'None'}" + ) + + @mcp.tool( description="""Gets conversation with transcript. Returns: conversation details and full transcript. Use when: analyzing completed agent conversations. @@ -958,6 +1152,31 @@ def play_audio(input_file_path: str) -> TextContent: return TextContent(type="text", text=f"Successfully played audio file: {file_path}") +@mcp.tool(description="Get information about the ElevenLabs MCP server project") +def get_info() -> TextContent: + """Get information about the ElevenLabs MCP server project location and version. + + Returns: + TextContent with project information including disk location and version + """ + import pathlib + + # Get the project root directory (where server.py is located) + project_root = pathlib.Path(__file__).parent.absolute() + + # Get the parent directory which should be the package root + package_root = project_root.parent.absolute() + + info = f"""ElevenLabs MCP Server Information: +Project Location: {project_root} +Package Root: {package_root} +Version: {__version__} +Base Path (ELEVENLABS_MCP_BASE_PATH): {base_path or 'Not set (using Desktop)'} +API Key Configured: {'Yes' if api_key else 'No'}""" + + return TextContent(type="text", text=info) + + def main(): print("Starting MCP server") """Run the MCP server""" diff --git a/uv.lock b/uv.lock index 68d137d..7f3f484 100644 --- a/uv.lock +++ b/uv.lock @@ -1,5 +1,4 @@ version = 1 -revision = 1 requires-python = ">=3.11" [[package]] @@ -305,7 +304,7 @@ wheels = [ [[package]] name = "elevenlabs-mcp" -version = "0.3.0" +version = "0.4.0" source = { editable = "." } dependencies = [ { name = "elevenlabs" }, @@ -353,7 +352,6 @@ requires-dist = [ { name = "twine", marker = "extra == 'dev'", specifier = "==6.1.0" }, { name = "uvicorn", specifier = "==0.27.1" }, ] -provides-extras = ["dev"] [[package]] name = "fastapi"