Skip to content

Commit a0a8427

Browse files
authored
Add You.com as tool for browser (#171)
* Add You.com as tool for browser * change key name * update tests in order to mock API key * address changes * address changes * update README
1 parent 7f3c896 commit a0a8427

File tree

8 files changed

+197
-24
lines changed

8 files changed

+197
-24
lines changed

README.md

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -426,7 +426,7 @@ codex -p oss
426426
### Browser
427427

428428
> [!WARNING]
429-
> This implementation is purely for educational purposes and should not be used in production. You should implement your own equivalent of the [`ExaBackend`](gpt_oss/tools/simple_browser/backend.py) class with your own browsing environment.
429+
> This implementation is purely for educational purposes and should not be used in production. You should implement your own equivalent of the [`YouComBackend`](gpt_oss/tools/simple_browser/backend.py) class with your own browsing environment. Currently we have available `YouComBackend` and `ExaBackend`.
430430
431431
Both gpt-oss models were trained with the capability to browse using the `browser` tool that exposes the following three methods:
432432

@@ -441,15 +441,20 @@ To enable the browser tool, you'll have to place the definition into the `system
441441
```python
442442
import datetime
443443
from gpt_oss.tools.simple_browser import SimpleBrowserTool
444-
from gpt_oss.tools.simple_browser.backend import ExaBackend
444+
from gpt_oss.tools.simple_browser.backend import YouComBackend
445445
from openai_harmony import SystemContent, Message, Conversation, Role, load_harmony_encoding, HarmonyEncodingName
446446

447447
encoding = load_harmony_encoding(HarmonyEncodingName.HARMONY_GPT_OSS)
448448

449-
# Exa backend requires you to have set the EXA_API_KEY environment variable
450-
backend = ExaBackend(
449+
# Depending on the choice of the browser backend you need corresponding env variables setup
450+
# In case you use You.com backend requires you to have set the YDC_API_KEY environment variable,
451+
# while for Exa you might need EXA_API_KEY environment variable set
452+
backend = YouComBackend(
451453
source="web",
452454
)
455+
# backend = ExaBackend(
456+
# source="web",
457+
# )
453458
browser_tool = SimpleBrowserTool(backend=backend)
454459

455460
# create a basic system prompt

gpt-oss-mcp-server/browser_server.py

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,26 @@
1+
import os
12
from collections.abc import AsyncIterator
23
from contextlib import asynccontextmanager
34
from dataclasses import dataclass, field
45
from typing import Union, Optional
56

67
from mcp.server.fastmcp import Context, FastMCP
78
from gpt_oss.tools.simple_browser import SimpleBrowserTool
8-
from gpt_oss.tools.simple_browser.backend import ExaBackend
9-
9+
from gpt_oss.tools.simple_browser.backend import YouComBackend, ExaBackend
1010

1111
@dataclass
1212
class AppContext:
1313
browsers: dict[str, SimpleBrowserTool] = field(default_factory=dict)
1414

1515
def create_or_get_browser(self, session_id: str) -> SimpleBrowserTool:
1616
if session_id not in self.browsers:
17-
backend = ExaBackend(source="web")
17+
tool_backend = os.getenv("BROWSER_BACKEND", "exa")
18+
if tool_backend == "youcom":
19+
backend = YouComBackend(source="web")
20+
elif tool_backend == "exa":
21+
backend = ExaBackend(source="web")
22+
else:
23+
raise ValueError(f"Invalid tool backend: {tool_backend}")
1824
self.browsers[session_id] = SimpleBrowserTool(backend=backend)
1925
return self.browsers[session_id]
2026

gpt-oss-mcp-server/reference-system-prompt.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import datetime
22

33
from gpt_oss.tools.simple_browser import SimpleBrowserTool
4-
from gpt_oss.tools.simple_browser.backend import ExaBackend
4+
from gpt_oss.tools.simple_browser.backend import YouComBackend
55
from gpt_oss.tools.python_docker.docker_tool import PythonTool
66
from gpt_oss.tokenizer import tokenizer
77

@@ -22,7 +22,7 @@
2222
ReasoningEffort.LOW).with_conversation_start_date(
2323
datetime.datetime.now().strftime("%Y-%m-%d")))
2424

25-
backend = ExaBackend(source="web", )
25+
backend = YouComBackend(source="web")
2626
browser_tool = SimpleBrowserTool(backend=backend)
2727
system_message_content = system_message_content.with_tools(
2828
browser_tool.tool_config)

gpt_oss/chat.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919

2020
from gpt_oss.tools import apply_patch
2121
from gpt_oss.tools.simple_browser import SimpleBrowserTool
22-
from gpt_oss.tools.simple_browser.backend import ExaBackend
22+
from gpt_oss.tools.simple_browser.backend import YouComBackend
2323
from gpt_oss.tools.python_docker.docker_tool import PythonTool
2424

2525
from openai_harmony import (
@@ -85,7 +85,7 @@ def main(args):
8585
)
8686

8787
if args.browser:
88-
backend = ExaBackend(
88+
backend = YouComBackend(
8989
source="web",
9090
)
9191
browser_tool = SimpleBrowserTool(backend=backend)

gpt_oss/responses_api/api_server.py

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import os
12
import datetime
23
import uuid
34
from typing import Callable, Literal, Optional
@@ -20,7 +21,7 @@
2021

2122
from gpt_oss.tools.python_docker.docker_tool import PythonTool
2223
from gpt_oss.tools.simple_browser import SimpleBrowserTool
23-
from gpt_oss.tools.simple_browser.backend import ExaBackend
24+
from gpt_oss.tools.simple_browser.backend import YouComBackend, ExaBackend
2425

2526
from .events import (
2627
ResponseCodeInterpreterCallCompleted,
@@ -904,9 +905,13 @@ async def generate(body: ResponsesRequest, request: Request):
904905
)
905906

906907
if use_browser_tool:
907-
backend = ExaBackend(
908-
source="web",
909-
)
908+
tool_backend = os.getenv("BROWSER_BACKEND", "exa")
909+
if tool_backend == "youcom":
910+
backend = YouComBackend(source="web")
911+
elif tool_backend == "exa":
912+
backend = ExaBackend(source="web")
913+
else:
914+
raise ValueError(f"Invalid tool backend: {tool_backend}")
910915
browser_tool = SimpleBrowserTool(backend=backend)
911916
else:
912917
browser_tool = None
Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
11
from .simple_browser_tool import SimpleBrowserTool
2-
from .backend import ExaBackend
2+
from .backend import ExaBackend, YouComBackend
33

44
__all__ = [
55
"SimpleBrowserTool",
66
"ExaBackend",
7+
"YouComBackend",
78
]

gpt_oss/tools/simple_browser/backend.py

Lines changed: 94 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
"""
44

55
import functools
6+
import asyncio
67
import logging
78
import os
89
from abc import abstractmethod
@@ -87,6 +88,24 @@ async def search(
8788
async def fetch(self, url: str, session: ClientSession) -> PageContents:
8889
pass
8990

91+
async def _post(self, session: ClientSession, endpoint: str, payload: dict) -> dict:
92+
headers = {"x-api-key": self._get_api_key()}
93+
async with session.post(f"{self.BASE_URL}{endpoint}", json=payload, headers=headers) as resp:
94+
if resp.status != 200:
95+
raise BackendError(
96+
f"{self.__class__.__name__} error {resp.status}: {await resp.text()}"
97+
)
98+
return await resp.json()
99+
100+
async def _get(self, session: ClientSession, endpoint: str, params: dict) -> dict:
101+
headers = {"x-api-key": self._get_api_key()}
102+
async with session.get(f"{self.BASE_URL}{endpoint}", params=params, headers=headers) as resp:
103+
if resp.status != 200:
104+
raise BackendError(
105+
f"{self.__class__.__name__} error {resp.status}: {await resp.text()}"
106+
)
107+
return await resp.json()
108+
90109

91110
@chz.chz(typecheck=True)
92111
class ExaBackend(Backend):
@@ -106,14 +125,6 @@ def _get_api_key(self) -> str:
106125
raise BackendError("Exa API key not provided")
107126
return key
108127

109-
async def _post(self, session: ClientSession, endpoint: str, payload: dict) -> dict:
110-
headers = {"x-api-key": self._get_api_key()}
111-
async with session.post(f"{self.BASE_URL}{endpoint}", json=payload, headers=headers) as resp:
112-
if resp.status != 200:
113-
raise BackendError(
114-
f"Exa API error {resp.status}: {await resp.text()}"
115-
)
116-
return await resp.json()
117128

118129
async def search(
119130
self, query: str, topn: int, session: ClientSession
@@ -164,3 +175,78 @@ async def fetch(self, url: str, session: ClientSession) -> PageContents:
164175
display_urls=True,
165176
session=session,
166177
)
178+
179+
@chz.chz(typecheck=True)
180+
class YouComBackend(Backend):
181+
"""Backend that uses the You.com Search API."""
182+
183+
source: str = chz.field(doc="Description of the backend source")
184+
185+
BASE_URL: str = "https://api.ydc-index.io"
186+
187+
def _get_api_key(self) -> str:
188+
key = os.environ.get("YDC_API_KEY")
189+
if not key:
190+
raise BackendError("You.com API key not provided")
191+
return key
192+
193+
194+
async def search(
195+
self, query: str, topn: int, session: ClientSession
196+
) -> PageContents:
197+
data = await self._get(
198+
session,
199+
"/v1/search",
200+
{"query": query, "count": topn},
201+
)
202+
# make a simple HTML page to work with browser format
203+
web_titles_and_urls, news_titles_and_urls = [], []
204+
if "web" in data["results"]:
205+
web_titles_and_urls = [
206+
(result["title"], result["url"], result["snippets"])
207+
for result in data["results"]["web"]
208+
]
209+
if "news" in data["results"]:
210+
news_titles_and_urls = [
211+
(result["title"], result["url"], result["description"])
212+
for result in data["results"]["news"]
213+
]
214+
titles_and_urls = web_titles_and_urls + news_titles_and_urls
215+
html_page = f"""
216+
<html><body>
217+
<h1>Search Results</h1>
218+
<ul>
219+
{"".join([f"<li><a href='{url}'>{title}</a> {summary}</li>" for title, url, summary in titles_and_urls])}
220+
</ul>
221+
</body></html>
222+
"""
223+
224+
return process_html(
225+
html=html_page,
226+
url="",
227+
title=query,
228+
display_urls=True,
229+
session=session,
230+
)
231+
232+
async def fetch(self, url: str, session: ClientSession) -> PageContents:
233+
is_view_source = url.startswith(VIEW_SOURCE_PREFIX)
234+
if is_view_source:
235+
url = url[len(VIEW_SOURCE_PREFIX) :]
236+
data = await self._post(
237+
session,
238+
"/v1/contents",
239+
{"urls": [url], "livecrawl_formats": "html"},
240+
)
241+
if not data:
242+
raise BackendError(f"No contents returned for {url}")
243+
if "html" not in data[0]:
244+
raise BackendError(f"No HTML returned for {url}")
245+
return process_html(
246+
html=data[0].get("html", ""),
247+
url=url,
248+
title=data[0].get("title", ""),
249+
display_urls=True,
250+
session=session,
251+
)
252+
Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
import pytest
2+
from typing import Generator, Any
3+
from unittest import mock
4+
from aiohttp import ClientSession
5+
6+
from gpt_oss.tools.simple_browser.backend import YouComBackend
7+
8+
class MockAiohttpResponse:
9+
"""Mocks responses for get/post requests from async libraries."""
10+
11+
def __init__(self, json: dict, status: int):
12+
self._json = json
13+
self.status = status
14+
15+
async def json(self):
16+
return self._json
17+
18+
async def __aexit__(self, exc_type, exc, tb):
19+
pass
20+
21+
async def __aenter__(self):
22+
return self
23+
24+
def mock_os_environ_get(name: str, default: Any = "test_api_key"):
25+
assert name in ["YDC_API_KEY"]
26+
return default
27+
28+
def test_youcom_backend():
29+
backend = YouComBackend(source="web")
30+
assert backend.source == "web"
31+
32+
@pytest.mark.asyncio
33+
@mock.patch("aiohttp.ClientSession.get")
34+
async def test_youcom_backend_search(mock_session_get):
35+
backend = YouComBackend(source="web")
36+
api_response = {
37+
"results": {
38+
"web": [
39+
{"title": "Web Result 1", "url": "https://www.example.com/web1", "snippets": "Web Result 1 snippets"},
40+
{"title": "Web Result 2", "url": "https://www.example.com/web2", "snippets": "Web Result 2 snippets"},
41+
],
42+
"news": [
43+
{"title": "News Result 1", "url": "https://www.example.com/news1", "description": "News Result 1 description"},
44+
{"title": "News Result 2", "url": "https://www.example.com/news2", "description": "News Result 2 description"},
45+
],
46+
}
47+
}
48+
with mock.patch("os.environ.get", wraps=mock_os_environ_get):
49+
mock_session_get.return_value = MockAiohttpResponse(api_response, 200)
50+
async with ClientSession() as session:
51+
result = await backend.search(query="test", topn=10, session=session)
52+
assert result.title == "test"
53+
assert result.urls == {"0": "https://www.example.com/web1", "1": "https://www.example.com/web2", "2": "https://www.example.com/news1", "3": "https://www.example.com/news2"}
54+
55+
@pytest.mark.asyncio
56+
@mock.patch("aiohttp.ClientSession.post")
57+
async def test_youcom_backend_fetch(mock_session_get):
58+
backend = YouComBackend(source="web")
59+
api_response = [
60+
{"title": "Fetch Result 1", "url": "https://www.example.com/fetch1", "html": "<div>Fetch Result 1 text</div>"},
61+
]
62+
with mock.patch("os.environ.get", wraps=mock_os_environ_get):
63+
mock_session_get.return_value = MockAiohttpResponse(api_response, 200)
64+
async with ClientSession() as session:
65+
result = await backend.fetch(url="https://www.example.com/fetch1", session=session)
66+
assert result.title == "Fetch Result 1"
67+
assert result.text == "\nURL: https://www.example.com/fetch1\nFetch Result 1 text"
68+
69+
70+

0 commit comments

Comments
 (0)