Skip to content

Commit a32a37b

Browse files
authored
Merge pull request #81 from evalstate/feat/combine-details
Feat/combine details
2 parents cb6a544 + 38215b6 commit a32a37b

File tree

8 files changed

+239
-10
lines changed

8 files changed

+239
-10
lines changed

packages/app/src/server/mcp-server.ts

Lines changed: 62 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,9 @@ import {
2424
DATASET_DETAIL_TOOL_CONFIG,
2525
DATASET_DETAIL_PROMPT_CONFIG,
2626
type DatasetDetailParams,
27+
HUB_INSPECT_TOOL_CONFIG,
28+
HubInspectTool,
29+
type HubInspectParams,
2730
DuplicateSpaceTool,
2831
formatDuplicateResult,
2932
type DuplicateSpaceParams,
@@ -151,6 +154,14 @@ export const createServerFactory = (_webServerInstance: WebServer, sharedApiClie
151154
disable(): void;
152155
}
153156

157+
// Get tool selection first (needed for runtime configuration like INCLUDE_README)
158+
const toolSelectionContext: ToolSelectionContext = {
159+
headers,
160+
userSettings,
161+
hfToken,
162+
};
163+
const toolSelection = await toolSelectionStrategy.selectTools(toolSelectionContext);
164+
154165
// Always register all tools and store instances for dynamic control
155166
const toolInstances: { [name: string]: Tool } = {};
156167

@@ -466,6 +477,56 @@ export const createServerFactory = (_webServerInstance: WebServer, sharedApiClie
466477
}
467478
);
468479

480+
// Compute README availability; adjust description and schema accordingly
481+
const hubInspectReadmeAllowed = toolSelection.enabledToolIds.includes('INCLUDE_README');
482+
const hubInspectDescription = hubInspectReadmeAllowed
483+
? `${HUB_INSPECT_TOOL_CONFIG.description} README file is included from the external repository.`
484+
: HUB_INSPECT_TOOL_CONFIG.description;
485+
const hubInspectBaseShape = HUB_INSPECT_TOOL_CONFIG.schema.shape as z.ZodRawShape;
486+
const hubInspectSchemaShape: z.ZodRawShape = hubInspectReadmeAllowed
487+
? hubInspectBaseShape
488+
: (() => {
489+
const { include_readme: _omit, ...rest } = hubInspectBaseShape as unknown as Record<string, unknown>;
490+
return rest as unknown as z.ZodRawShape;
491+
})();
492+
493+
toolInstances[HUB_INSPECT_TOOL_CONFIG.name] = server.tool(
494+
HUB_INSPECT_TOOL_CONFIG.name,
495+
hubInspectDescription,
496+
hubInspectSchemaShape,
497+
HUB_INSPECT_TOOL_CONFIG.annotations,
498+
async (params: Record<string, unknown>) => {
499+
// Allow README only if enabled by configuration; default to on when allowed
500+
const allowReadme = hubInspectReadmeAllowed;
501+
const wantReadme = (params as { include_readme?: boolean }).include_readme !== false; // default ON if param present
502+
const includeReadme = allowReadme && wantReadme;
503+
504+
const tool = new HubInspectTool(hfToken, undefined);
505+
const result = await tool.inspect(params as unknown as HubInspectParams, includeReadme);
506+
// Prepare safe logging parameters without relying on strong typing
507+
const repoIdsParam = (params as { repo_ids?: unknown }).repo_ids;
508+
const repoIds = Array.isArray(repoIdsParam) ? repoIdsParam : [];
509+
const firstRepoId = typeof repoIds[0] === 'string' ? (repoIds[0] as string) : '';
510+
const repoType = (params as { repo_type?: unknown }).repo_type as unknown;
511+
const repoTypeSafe = repoType === 'model' || repoType === 'dataset' || repoType === 'space' ? repoType : undefined;
512+
513+
logPromptQuery(
514+
HUB_INSPECT_TOOL_CONFIG.name,
515+
firstRepoId,
516+
{ count: repoIds.length, repo_type: repoTypeSafe, include_readme: includeReadme },
517+
{
518+
...getLoggingOptions(),
519+
totalResults: result.totalResults,
520+
resultsShared: result.resultsShared,
521+
responseCharCount: result.formatted.length,
522+
}
523+
);
524+
return {
525+
content: [{ type: 'text', text: result.formatted }],
526+
};
527+
}
528+
);
529+
469530
toolInstances[DOCS_SEMANTIC_SEARCH_CONFIG.name] = server.tool(
470531
DOCS_SEMANTIC_SEARCH_CONFIG.name,
471532
DOCS_SEMANTIC_SEARCH_CONFIG.description,
@@ -557,13 +618,7 @@ export const createServerFactory = (_webServerInstance: WebServer, sharedApiClie
557618
// NB: That may not always be the case, consider carefully whether you want a tool
558619
// included in the skipGradio check.
559620
const applyToolStates = async () => {
560-
const context: ToolSelectionContext = {
561-
headers,
562-
userSettings,
563-
hfToken,
564-
};
565-
566-
const toolSelection = await toolSelectionStrategy.selectTools(context);
621+
// Use the already computed toolSelection
567622

568623
logger.info(
569624
{

packages/app/src/server/utils/tool-selection-strategy.ts

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
import { logger } from './logger.js';
22
import type { AppSettings, SpaceTool } from '../../shared/settings.js';
3-
import { ALL_BUILTIN_TOOL_IDS, TOOL_ID_GROUPS } from '@llmindset/hf-mcp';
3+
import { ALL_BUILTIN_TOOL_IDS, HUB_INSPECT_TOOL_ID, TOOL_ID_GROUPS } from '@llmindset/hf-mcp';
44
import type { McpApiClient } from './mcp-api-client.js';
55
import { extractAuthBouquetAndMix } from '../utils/auth-utils.js';
66

@@ -48,6 +48,15 @@ export const BOUQUETS: Record<string, AppSettings> = {
4848
builtInTools: [...ALL_BUILTIN_TOOL_IDS],
4949
spaceTools: [],
5050
},
51+
// Test bouquets for README inclusion behavior
52+
inspect_readme: {
53+
builtInTools: [HUB_INSPECT_TOOL_ID, 'INCLUDE_README'],
54+
spaceTools: [],
55+
},
56+
inspect: {
57+
builtInTools: [HUB_INSPECT_TOOL_ID],
58+
spaceTools: [],
59+
},
5160
};
5261

5362
/**

packages/app/src/web/App.tsx

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,12 +25,14 @@ import {
2525
SPACE_FILES_TOOL_ID,
2626
DOCS_SEMANTIC_SEARCH_TOOL_ID,
2727
DOC_FETCH_TOOL_ID,
28+
HUB_INSPECT_TOOL_ID,
2829
SEMANTIC_SEARCH_TOOL_CONFIG,
2930
MODEL_SEARCH_TOOL_CONFIG,
3031
MODEL_DETAIL_TOOL_CONFIG,
3132
PAPER_SEARCH_TOOL_CONFIG,
3233
DATASET_SEARCH_TOOL_CONFIG,
3334
DATASET_DETAIL_TOOL_CONFIG,
35+
HUB_INSPECT_TOOL_CONFIG,
3436
DUPLICATE_SPACE_TOOL_CONFIG,
3537
SPACE_INFO_TOOL_CONFIG,
3638
SPACE_FILES_TOOL_CONFIG,
@@ -270,6 +272,12 @@ function App() {
270272
description: DATASET_DETAIL_TOOL_CONFIG.description,
271273
settings: { enabled: settings?.builtInTools?.includes(DATASET_DETAIL_TOOL_ID) ?? true },
272274
},
275+
hub_inspect: {
276+
id: HUB_INSPECT_TOOL_ID,
277+
label: HUB_INSPECT_TOOL_CONFIG.annotations.title,
278+
description: HUB_INSPECT_TOOL_CONFIG.description,
279+
settings: { enabled: settings?.builtInTools?.includes(HUB_INSPECT_TOOL_ID) ?? true },
280+
},
273281
doc_semantic_search: {
274282
id: DOCS_SEMANTIC_SEARCH_TOOL_ID,
275283
label: DOCS_SEMANTIC_SEARCH_CONFIG.annotations.title,

packages/e2e-python/hub-inspect.sh

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
fast-agent --url http://localhost:3001/mcp?mix=inspect_readme --auth $HF_TOKEN -m "can you tell me details about these models: microsoft/VibeVoice-1.5B, 'openai/gpt-oss-120b', xai-org/grok-2, NousResearch/Hermes-4-70B" --model groq.openai/gpt-oss-20b
2+

packages/mcp/src/hub-inspect.ts

Lines changed: 142 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,142 @@
1+
import { z } from 'zod';
2+
import type { ToolResult } from './types/tool-result.js';
3+
import { ModelDetailTool } from './model-detail.js';
4+
import { DatasetDetailTool } from './dataset-detail.js';
5+
import { spaceInfo } from '@huggingface/hub';
6+
import { formatDate } from './utilities.js';
7+
8+
export const HUB_INSPECT_TOOL_CONFIG = {
9+
name: 'hub_inspect',
10+
description:
11+
'Get details for one or more Hugging Face repos (model, dataset, or space). ' +
12+
'Auto-detects type unless specified.',
13+
schema: z.object({
14+
repo_ids: z
15+
.array(z.string().min(1))
16+
.min(1, 'Provide at least one id')
17+
.max(10, 'Provide at most 10 repo ids')
18+
.describe('Repo IDs for (models|dataset/space) - usually in author/name format (e.g. openai/gpt-oss-120b)'),
19+
repo_type: z.enum(['model', 'dataset', 'space']).optional().describe('Specify lookup type; otherwise auto-detects'),
20+
include_readme: z.boolean().default(true).describe('Include README from the repo'),
21+
}),
22+
annotations: {
23+
title: 'Hub Inspect',
24+
destructiveHint: false,
25+
readOnlyHint: true,
26+
openWorldHint: false,
27+
},
28+
} as const;
29+
30+
export type HubInspectParams = z.infer<typeof HUB_INSPECT_TOOL_CONFIG.schema>;
31+
32+
export class HubInspectTool {
33+
private readonly modelDetail: ModelDetailTool;
34+
private readonly datasetDetail: DatasetDetailTool;
35+
private readonly hubUrl?: string;
36+
37+
constructor(hfToken?: string, hubUrl?: string) {
38+
this.modelDetail = new ModelDetailTool(hfToken, hubUrl);
39+
this.datasetDetail = new DatasetDetailTool(hfToken, hubUrl);
40+
this.hubUrl = hubUrl;
41+
}
42+
43+
async inspect(params: HubInspectParams, includeReadme: boolean = false): Promise<ToolResult> {
44+
const parts: string[] = [];
45+
let successCount = 0;
46+
47+
for (const id of params.repo_ids) {
48+
try {
49+
const section = await this.inspectSingle(id, params.repo_type, includeReadme);
50+
parts.push(section);
51+
successCount += 1;
52+
} catch (err) {
53+
const msg = err instanceof Error ? err.message : String(err);
54+
parts.push(`# ${id}\n\n- Error: ${msg}`);
55+
}
56+
}
57+
58+
return {
59+
formatted: parts.join('\n\n---\n\n'),
60+
totalResults: params.repo_ids.length,
61+
resultsShared: successCount,
62+
};
63+
}
64+
65+
private async inspectSingle(
66+
repoId: string,
67+
type: 'model' | 'dataset' | 'space' | undefined,
68+
includeReadme: boolean
69+
): Promise<string> {
70+
// If caller constrained the type, do only that
71+
if (type === 'model') {
72+
return (await this.modelDetail.getDetails(repoId, includeReadme)).formatted;
73+
}
74+
if (type === 'dataset') {
75+
return (await this.datasetDetail.getDetails(repoId, includeReadme)).formatted;
76+
}
77+
if (type === 'space') {
78+
return await this.getSpaceDetails(repoId);
79+
}
80+
81+
// Auto-detect: attempt all three and aggregate. The same id may exist for multiple types.
82+
const matches: string[] = [];
83+
84+
try {
85+
const r = await this.modelDetail.getDetails(repoId, includeReadme);
86+
matches.push(`**Type: Model**\n\n${r.formatted}`);
87+
} catch {
88+
/* not a model */
89+
}
90+
91+
try {
92+
const r = await this.datasetDetail.getDetails(repoId, includeReadme);
93+
matches.push(`**Type: Dataset**\n\n${r.formatted}`);
94+
} catch {
95+
/* not a dataset */
96+
}
97+
98+
try {
99+
const r = await this.getSpaceDetails(repoId);
100+
matches.push(`**Type: Space**\n\n${r}`);
101+
} catch {
102+
/* not a space */
103+
}
104+
105+
if (matches.length === 0) {
106+
throw new Error(`Could not find repo '${repoId}' as model, dataset, or space.`);
107+
}
108+
109+
return matches.join('\n\n---\n\n');
110+
}
111+
112+
private async getSpaceDetails(spaceId: string): Promise<string> {
113+
const additionalFields = ['author', 'tags', 'runtime', 'subdomain', 'sha'] as const;
114+
const info = await spaceInfo<(typeof additionalFields)[number]>({
115+
name: spaceId,
116+
additionalFields: Array.from(additionalFields),
117+
...(this.hubUrl && { hubUrl: this.hubUrl }),
118+
});
119+
120+
const lines: string[] = [];
121+
lines.push(`# ${info.name}`);
122+
lines.push('');
123+
lines.push('## Overview');
124+
interface SpaceExtra {
125+
author?: string;
126+
tags?: readonly string[] | string[];
127+
runtime?: unknown;
128+
subdomain?: string;
129+
sha?: string;
130+
}
131+
const extra = info as Partial<SpaceExtra>;
132+
if (extra.author) lines.push(`- **Author:** ${extra.author}`);
133+
if (info.sdk) lines.push(`- **SDK:** ${info.sdk}`);
134+
lines.push(`- **Likes:** ${info.likes}`);
135+
lines.push(`- **Updated:** ${formatDate(info.updatedAt)}`);
136+
const tags = Array.isArray(extra.tags) ? extra.tags : undefined;
137+
if (tags && tags.length) lines.push(`- **Tags:** ${tags.join(', ')}`);
138+
lines.push('');
139+
lines.push(`**Link:** [https://hf.co/spaces/${info.name}](https://hf.co/spaces/${info.name})`);
140+
return lines.join('\n');
141+
}
142+
}

packages/mcp/src/index.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ export * from './utilities.js';
88
export * from './paper-search.js';
99
export * from './dataset-search.js';
1010
export * from './dataset-detail.js';
11+
export * from './hub-inspect.js';
1112
export * from './duplicate-space.js';
1213
export * from './space-info.js';
1314
export * from './space-files.js';

packages/mcp/src/readme-utils.ts

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,9 @@
22
* Utility functions for fetching and processing README files from Hugging Face repositories
33
*/
44

5+
// Maximum number of characters to include from a README
6+
const DEFAULT_MAX_README_CHARS = 10_000;
7+
58
/**
69
* Fetches README content from a Hugging Face repository
710
*
@@ -40,6 +43,12 @@ export async function fetchReadmeContent(
4043
content = stripYamlFrontmatter(content);
4144
}
4245

46+
// Truncate overly long READMEs to a sensible default size
47+
if (content.length > DEFAULT_MAX_README_CHARS) {
48+
const truncated = content.slice(0, DEFAULT_MAX_README_CHARS);
49+
content = `${truncated}\n\n[... truncated to ~${DEFAULT_MAX_README_CHARS.toString()} characters — full README: ${baseUrl}]`;
50+
}
51+
4352
// Return null if content is empty after processing
4453
if (!content.trim()) {
4554
return null;
@@ -71,4 +80,4 @@ function stripYamlFrontmatter(content: string): string {
7180

7281
// No YAML frontmatter found, return original content
7382
return content;
74-
}
83+
}

packages/mcp/src/tool-ids.ts

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ import {
1212
DATASET_SEARCH_TOOL_CONFIG,
1313
DATASET_DETAIL_TOOL_CONFIG,
1414
DATASET_DETAIL_PROMPT_CONFIG,
15+
HUB_INSPECT_TOOL_CONFIG,
1516
DUPLICATE_SPACE_TOOL_CONFIG,
1617
SPACE_INFO_TOOL_CONFIG,
1718
SPACE_FILES_TOOL_CONFIG,
@@ -28,6 +29,7 @@ export const MODEL_DETAIL_TOOL_ID = MODEL_DETAIL_TOOL_CONFIG.name;
2829
export const PAPER_SEARCH_TOOL_ID = PAPER_SEARCH_TOOL_CONFIG.name;
2930
export const DATASET_SEARCH_TOOL_ID = DATASET_SEARCH_TOOL_CONFIG.name;
3031
export const DATASET_DETAIL_TOOL_ID = DATASET_DETAIL_TOOL_CONFIG.name;
32+
export const HUB_INSPECT_TOOL_ID = HUB_INSPECT_TOOL_CONFIG.name;
3133
export const DUPLICATE_SPACE_TOOL_ID = DUPLICATE_SPACE_TOOL_CONFIG.name;
3234
export const SPACE_INFO_TOOL_ID = SPACE_INFO_TOOL_CONFIG.name;
3335
export const SPACE_FILES_TOOL_ID = SPACE_FILES_TOOL_CONFIG.name;
@@ -46,6 +48,7 @@ export const ALL_BUILTIN_TOOL_IDS = [
4648
PAPER_SEARCH_TOOL_ID,
4749
DATASET_SEARCH_TOOL_ID,
4850
DATASET_DETAIL_TOOL_ID,
51+
HUB_INSPECT_TOOL_ID,
4952
DUPLICATE_SPACE_TOOL_ID,
5053
SPACE_INFO_TOOL_ID,
5154
SPACE_FILES_TOOL_ID,
@@ -62,7 +65,7 @@ export const TOOL_ID_GROUPS = {
6265
DOCS_SEMANTIC_SEARCH_TOOL_ID,
6366
] as const,
6467
spaces: [SPACE_SEARCH_TOOL_ID, DUPLICATE_SPACE_TOOL_ID, SPACE_INFO_TOOL_ID, SPACE_FILES_TOOL_ID] as const,
65-
detail: [MODEL_DETAIL_TOOL_ID, DATASET_DETAIL_TOOL_ID] as const,
68+
detail: [MODEL_DETAIL_TOOL_ID, DATASET_DETAIL_TOOL_ID, HUB_INSPECT_TOOL_ID] as const,
6669
docs: [DOCS_SEMANTIC_SEARCH_TOOL_ID, DOC_FETCH_TOOL_ID] as const,
6770
hf_api: [
6871
SPACE_SEARCH_TOOL_ID,

0 commit comments

Comments
 (0)