huggingface · AlpineVibrations · Jul 8, 2025 · Jul 9, 2025 · Jul 11, 2025 · Jul 16, 2025
@@ -47,7 +47,7 @@ export async function createRepo(
 			...(repoId.type === "space"
 				? {
 						type: "space",
-						sdk: "static",
+						sdk: params.sdk ?? "static",
 				  }
 				: {
 						type: repoId.type,

@@ -144,6 +144,7 @@ export const PROVIDERS: Record<InferenceProvider, Partial<Record<InferenceTask,
 		"text-to-speech": new Replicate.ReplicateTextToSpeechTask(),
 		"text-to-video": new Replicate.ReplicateTextToVideoTask(),
 		"image-to-image": new Replicate.ReplicateImageToImageTask(),
+		"automatic-speech-recognition": new Replicate.ReplicateAutomaticSpeechRecognitionTask(),
 	},
 	sambanova: {
 		conversational: new Sambanova.SambanovaConversationalTask(),

@@ -20,11 +20,14 @@ import type { BodyParams, HeaderParams, RequestArgs, UrlParams } from "../types.
 import { omit } from "../utils/omit.js";
 import {
 	TaskProviderHelper,
+	type AutomaticSpeechRecognitionTaskHelper,
 	type ImageToImageTaskHelper,
 	type TextToImageTaskHelper,
 	type TextToVideoTaskHelper,
 } from "./providerHelper.js";
 import type { ImageToImageArgs } from "../tasks/cv/imageToImage.js";
+import type { AutomaticSpeechRecognitionArgs } from "../tasks/audio/automaticSpeechRecognition.js";
+import type { AutomaticSpeechRecognitionOutput } from "@huggingface/tasks";
 import { base64FromBytes } from "../utils/base64FromBytes.js";
 export interface ReplicateOutput {
 	output?: string | string[];
@@ -163,6 +166,64 @@ export class ReplicateTextToVideoTask extends ReplicateTask implements TextToVid
 	}
 }
 
+export class ReplicateAutomaticSpeechRecognitionTask
+	extends ReplicateTask
+	implements AutomaticSpeechRecognitionTaskHelper
+{
+	override preparePayload(params: BodyParams): Record<string, unknown> {
+		return {
+			input: {
+				...omit(params.args, ["inputs", "parameters"]),
+				...(params.args.parameters as Record<string, unknown>),
+				audio: params.args.inputs, // This will be processed in preparePayloadAsync
+			},
+			version: params.model.includes(":") ? params.model.split(":")[1] : undefined,
+		};
+	}
+
+	async preparePayloadAsync(args: AutomaticSpeechRecognitionArgs): Promise<RequestArgs> {
+		const blob = "data" in args && args.data instanceof Blob ? args.data : "inputs" in args ? args.inputs : undefined;
+
+		if (!blob || !(blob instanceof Blob)) {
+			throw new Error("Audio input must be a Blob");
+		}
+
+		// Convert Blob to base64 data URL
+		const bytes = new Uint8Array(await blob.arrayBuffer());
+		const base64 = base64FromBytes(bytes);
+		const audioInput = `data:${blob.type || "audio/wav"};base64,${base64}`;
+
+		return {
+			...("data" in args ? omit(args, "data") : omit(args, "inputs")),
+			inputs: audioInput,
+		};
+	}
+
+	override async getResponse(response: ReplicateOutput): Promise<AutomaticSpeechRecognitionOutput> {
+		if (typeof response?.output === "string") return { text: response.output };
+		if (Array.isArray(response?.output) && typeof response.output[0] === "string") return { text: response.output[0] };
+
+		const out = response?.output as
+			| undefined
+			| {
+					transcription?: string;
+					translation?: string;
+					txt_file?: string;
+			  };
+		if (out && typeof out === "object") {
+			if (typeof out.transcription === "string") return { text: out.transcription };
+			if (typeof out.translation === "string") return { text: out.translation };
+			if (typeof out.txt_file === "string") {
+				const r = await fetch(out.txt_file);
+				return { text: await r.text() };
+			}
+		}
+		throw new InferenceClientProviderOutputError(
+			"Received malformed response from Replicate automatic-speech-recognition API"
+		);
+	}
+}
+
 export class ReplicateImageToImageTask extends ReplicateTask implements ImageToImageTaskHelper {
 	override preparePayload(params: BodyParams<ImageToImageArgs>): Record<string, unknown> {
 		return {

@@ -4,7 +4,6 @@ import { getProviderHelper } from "../../lib/getProviderHelper.js";
 import type { BaseArgs, Options } from "../../types.js";
 import { innerRequest } from "../../utils/request.js";
 import type { LegacyAudioInput } from "./utils.js";
-import { InferenceClientProviderOutputError } from "../../errors.js";
 
 export type AutomaticSpeechRecognitionArgs = BaseArgs & (AutomaticSpeechRecognitionInput | LegacyAudioInput);
 /**
@@ -22,9 +21,5 @@ export async function automaticSpeechRecognition(
 		...options,
 		task: "automatic-speech-recognition",
 	});
-	const isValidOutput = typeof res?.text === "string";
-	if (!isValidOutput) {
-		throw new InferenceClientProviderOutputError("Received malformed response from automatic-speech-recognition API");
-	}
 	return providerHelper.getResponse(res);
 }
@@ -1,7 +1,7 @@
 {
 	"name": "@huggingface/ollama-utils",
 	"packageManager": "[email protected]",
-	"version": "0.0.12",
+	"version": "0.0.13",
 	"description": "Various utilities for maintaining Ollama compatibility with models on Hugging Face hub",
 	"repository": "https://github.com/huggingface/huggingface.js.git",
 	"publishConfig": {

@@ -5,19 +5,19 @@ import type { OllamaChatTemplateMapEntry } from "./types";
 
 /**
  * Skipped these models due to error:
- * - library/minicpm-v:latest
- * - library/qwen2:latest
- * - library/qwen2.5:0.5b
- * - library/llama4:latest
- * - library/command-r:latest
- * - library/phi4-reasoning:latest
+ * - library/llama3.2:latest
+ * - library/llama2:latest
+ * - library/llama3.1:latest
+ * - library/deepseek-v3:latest
  * - library/cogito:3b
- * - library/starcoder:latest
- * - library/mistral-small3.1:latest
- * - library/cogito:latest
- * - library/aya-expanse:latest
- * - library/smallthinker:3b
- * - library/command-r7b:7b
+ * - library/phi4-mini:latest
+ * - library/qwen3-coder:latest
+ * - library/granite3.2-vision:latest
+ * - library/opencoder:latest
+ * - library/opencoder:1.5b
+ * - library/phind-codellama:latest
+ * - library/yarn-mistral:latest
+ * - library/stablelm-zephyr:latest
  */
 
 export const OLLAMA_CHAT_TEMPLATE_MAPPING: OllamaChatTemplateMapEntry[] = [
@@ -503,6 +503,20 @@ export const OLLAMA_CHAT_TEMPLATE_MAPPING: OllamaChatTemplateMapEntry[] = [
 			},
 		},
 	},
+	{
+		model: "library/gemma3:270m",
+		gguf: "{{ bos_token }}\n{%- if messages[0]['role'] == 'system' -%}\n    {%- if messages[0]['content'] is string -%}\n        {%- set first_user_prefix = messages[0]['content'] + '\n\n' -%}\n    {%- else -%}\n        {%- set first_user_prefix = messages[0]['content'][0]['text'] + '\n\n' -%}\n    {%- endif -%}\n    {%- set loop_messages = messages[1:] -%}\n{%- else -%}\n    {%- set first_user_prefix = \"\" -%}\n    {%- set loop_messages = messages -%}\n{%- endif -%}\n{%- for message in loop_messages -%}\n    {%- if (message['role'] == 'user') != (loop.index0 % 2 == 0) -%}\n        {{ raise_exception(\"Conversation roles must alternate user/assistant/user/assistant/...\") }}\n    {%- endif -%}\n    {%- if (message['role'] == 'assistant') -%}\n        {%- set role = \"model\" -%}\n    {%- else -%}\n        {%- set role = message['role'] -%}\n    {%- endif -%}\n    {{ '<start_of_turn>' + role + '\n' + (first_user_prefix if loop.first else \"\") }}\n    {%- if message['content'] is string -%}\n        {{ message['content'] | trim }}\n    {%- elif message['content'] is iterable -%}\n        {%- for item in message['content'] -%}\n            {%- if item['type'] == 'image' -%}\n                {{ '<start_of_image>' }}\n            {%- elif item['type'] == 'text' -%}\n                {{ item['text'] | trim }}\n            {%- endif -%}\n        {%- endfor -%}\n    {%- else -%}\n        {{ raise_exception(\"Invalid content type\") }}\n    {%- endif -%}\n    {{ '<end_of_turn>\n' }}\n{%- endfor -%}\n{%- if add_generation_prompt -%}\n    {{'<start_of_turn>model\n'}}\n{%- endif -%}\n",
+		ollama: {
+			template:
+				'{{- $systemPromptAdded := false }}\n{{- range $i, $_ := .Messages }}\n{{- $last := eq (len (slice $.Messages $i)) 1 }}\n{{- if eq .Role "user" }}<start_of_turn>user\n{{- if (and (not $systemPromptAdded) $.System) }}\n{{- $systemPromptAdded = true }}\n{{ $.System }}\n{{ end }}\n{{ .Content }}<end_of_turn>\n{{ if $last }}<start_of_turn>model\n{{ end }}\n{{- else if eq .Role "assistant" }}<start_of_turn>model\n{{ .Content }}{{ if not $last }}<end_of_turn>\n{{ end }}\n{{- end }}\n{{- end }}',
+			tokens: ["<start_of_turn>", "<start_of_image>", "<end_of_turn>"],
+			params: {
+				stop: ["<end_of_turn>"],
+				top_k: 64,
+				top_p: 0.95,
+			},
+		},
+	},
 	{
 		model: "library/glm4:9b",
 		gguf: "[gMASK]<sop>{% for item in messages %}{% if item['tools'] is defined %}<|system|>\n你是一个名为 ChatGLM 的人工智能助手。你是基于智谱AI训练的语言模型 GLM-4 模型开发的，你的任务是针对用户的问题和要求提供适当的答复和支持。\n\n# 可用工具{% set tools = item['tools'] %}{% for tool in tools %}{% if tool['type'] == 'function' %}\n\n## {{ tool['function']['name'] }}\n\n{{ tool['function'] | tojson(indent=4) }}\n在调用上述函数时，请使用 Json 格式表示调用的参数。{% elif tool['type'] == 'python' %}\n\n## python\n\n当你向 `python` 发送包含 Python 代码的消息时，该代码将会在一个有状态的 Jupyter notebook 环境中执行。\n`python` 返回代码执行的输出，或在执行 60 秒后返回超时。\n`/mnt/data` 将会持久化存储你的文件。在此会话中，`python` 无法访问互联网。不要使用 `python` 进行任何网络请求或者在线 API 调用，这些在线内容的访问将不会成功。{% elif tool['type'] == 'simple_browser' %}\n\n## simple_browser\n\n你可以使用 `simple_browser` 工具。该工具支持以下函数：\n`search(query: str, recency_days: int)`：使用搜索引擎进行查询并显示结果，可以使用 `recency_days` 参数控制搜索内容的时效性。\n`mclick(ids: list[int])`：获取一系列指定 id 的页面内容。每次调用时，须选择3-10个页面。选择多个角度的页面，同时尽可能选择可信任的信息来源。考虑到部分页面是无法加载的，你也可以多打开一些可能有用的页面而不用担心内容过多。\n`open_url(url: str)`：打开指定的 URL。\n\n使用 `【{引用 id}†{引用文本}】` 来引用内容。\n\n操作步骤：1. 使用 `search` 来获得信息列表; 2. 使用 `mclick` 来获取指定 ID 页面的内容; 3. 根据获得的内容进行回复。在回复中应当引用信息来源。\n 如果用户提供了 URL，也可以用 `open_url` 直接打开页面。\n如果初次搜索结果没有找到合适的信息，也可以再次使用 `search` 进行搜索。{% elif tool['type'] == 'cogview' %}\n\n## cogview\n\n如果用户的请求中包含了对图像的描述，你可以使用 `cogview` 来生成图像并展示给用户。你需要向 `cogview` 发送图像描述，规则：\n- 发送给 `cogview` 的消息必须使用英语。用户的中文描述必须完全翻译为英语。\n- 应当尽可能详细地描述图像生成的需求，需求描述约 100 英文单词。\n- 保持用户原始描述的意图。不要虚构内容或者没见过的人物。\n- 如无特殊说明，所在地为中国，持有中国立场并遵循中国社会主义价值观。{% endif %}{% endfor %}{% endif %}{% if item['content'] %}<|{{ item['role'] }}|>{{ item['metadata'] }}\n{{ item['content'] }}{% endif %}{% endfor %}{% if add_generation_prompt %}<|assistant|>{% endif %}",

@@ -1,7 +1,7 @@
 {
 	"name": "@huggingface/tasks",
 	"packageManager": "[email protected]",
-	"version": "0.19.35",
+	"version": "0.19.37",
 	"description": "List of ML tasks for huggingface.co/tasks",
 	"repository": "https://github.com/huggingface/huggingface.js.git",
 	"publishConfig": {

@@ -315,6 +315,31 @@ const snippetDockerModelRunner = (model: ModelData, filepath?: string): string =
 	return `docker model run hf.co/${model.id}${getQuantTag(filepath)}`;
 };
 
+const snippetLemonade = (model: ModelData, filepath?: string): LocalAppSnippet[] => {
+	const tagName = getQuantTag(filepath);
+	const modelName = model.id.split("/")[1];
+	return [
+		{
+			title: "Pull the model",
+			setup: "# Download Lemonade from https://lemonade-server.ai/",
+			content: [
+				`lemonade-server pull user.${modelName} \\
+	--checkpoint ${model.id}${tagName} \\
+	--recipe llamacpp`,
+				"# Note: If you installed from source, use the lemonade-server-dev command instead.",
+			].join("\n"),
+		},
+		{
+			title: "Run and chat with the model",
+			content: `lemonade-server run user.${modelName}`,
+		},
+		{
+			title: "List all available models",
+			content: "lemonade-server list",
+		},
+	];
+};
+
 /**
  * Add your new local app here.
  *
@@ -492,6 +517,21 @@ export const LOCAL_APPS = {
 		displayOnModelPage: isLlamaCppGgufModel,
 		snippet: snippetDockerModelRunner,
 	},
+	lemonade: {
+		prettyLabel: "Lemonade",
+		docsUrl: "https://lemonade-server.ai",
+		mainTask: "text-generation",
+		displayOnModelPage: isLlamaCppGgufModel,
+		snippet: snippetLemonade,
+	},
+	aifx: {
+		prettyLabel: "AIFX",
+		docsUrl: "https://aifxart.com",
+		mainTask: "text-to-image",
+		macOSOnly: false,
+		displayOnModelPage: (model) => model.library_name === "diffusers" && model.pipeline_tag === "text-to-image",
+		deeplink: (model) => new URL(`https://aifxart.com/pages/model-view?model=${model.id}`),
+	}
 } satisfies Record<string, LocalApp>;
 
 export type LocalAppKey = keyof typeof LOCAL_APPS;
@@ -1523,7 +1523,7 @@ export const transformers = (model: ModelData): string[] => {
 		autoSnippet.push(
 			"# Load model directly",
 			`from transformers import ${info.auto_model}`,
-			`model = ${info.auto_model}.from_pretrained("${model.id}"` + remote_code_snippet + ', torch_dtype="auto"),'
+			`model = ${info.auto_model}.from_pretrained("${model.id}"` + remote_code_snippet + ', torch_dtype="auto")'
 		);
 	}
 
@@ -1703,6 +1703,16 @@ export const vfimamba = (model: ModelData): string[] => [
 model = Model.from_pretrained("${model.id}")`,
 ];
 
+export const lvface = (model: ModelData): string[] => [
+	`from huggingface_hub import hf_hub_download
+	 from inference_onnx import LVFaceONNXInferencer
+
+model_path = hf_hub_download("${model.id}", "LVFace-L_Glint360K/LVFace-L_Glint360K.onnx")
+inferencer = LVFaceONNXInferencer(model_path, use_gpu=True, timeout=300)
+img_path = 'path/to/image1.jpg'
+embedding = inferencer.infer_from_image(img_path)`,
+];
+
 export const voicecraft = (model: ModelData): string[] => [
 	`from voicecraft import VoiceCraft
 

@@ -130,6 +130,13 @@ export const MODEL_LIBRARIES_UI_ELEMENTS = {
 		filter: false,
 		countDownloads: `path:"llm_config.json"`,
 	},
+	bboxmaskpose: {
+		prettyLabel: "BBoxMaskPose",
+		repoName: "BBoxMaskPose",
+		repoUrl: "https://github.com/MiraPurkrabek/BBoxMaskPose",
+		filter: false,
+		countDownloads: `path_extension:"pth"`,
+	},
 	ben2: {
 		prettyLabel: "BEN2",
 		repoName: "BEN2",
@@ -1156,6 +1163,13 @@ export const MODEL_LIBRARIES_UI_ELEMENTS = {
 		countDownloads: `path_extension:"pkl"`,
 		snippets: snippets.vfimamba,
 	},
+	lvface: {
+		prettyLabel: "LVFace",
+		repoName: "LVFace",
+		repoUrl: "https://github.com/bytedance/LVFace",
+		countDownloads: `path_extension:"pt" OR path_extension:"onnx"`,
+		snippets: snippets.lvface,
+	},
 	voicecraft: {
 		prettyLabel: "VoiceCraft",
 		repoName: "VoiceCraft",