livekit · roertbb · Jul 6, 2025 · Jul 6, 2025 · Jul 6, 2025
diff --git a/plugins/google/CHANGELOG.md b/plugins/google/CHANGELOG.md
@@ -0,0 +1 @@
+# @livekit/agents-plugin-google
diff --git a/plugins/google/README.md b/plugins/google/README.md
@@ -0,0 +1,12 @@
+# Google plugin for LiveKit Agents
+
+The Agents Framework is designed for building realtime, programmable
+participants that run on servers. Use it to create conversational, multi-modal
+voice agents that can see, hear, and understand.
+
+This package contains the Google plugin, which allows for speech recognition.
+Refer to the [documentation](https://docs.livekit.io/agents/overview/) for
+information on how to use it, or browse the [API
+reference](https://docs.livekit.io/agents-js/modules/plugins_agents_plugin_google.html).
+See the [repository](https://github.com/livekit/agents-js) for more information
+about the framework as a whole.
diff --git a/plugins/google/api-extractor.json b/plugins/google/api-extractor.json
@@ -0,0 +1,20 @@
+/**
+ * Config file for API Extractor.  For more info, please visit: https://api-extractor.com
+ */
+{
+  "$schema": "https://developer.microsoft.com/json-schemas/api-extractor/v7/api-extractor.schema.json",
+
+  /**
+   * Optionally specifies another JSON config file that this file extends from.  This provides a way for
+   * standard settings to be shared across multiple projects.
+   *
+   * If the path starts with "./" or "../", the path is resolved relative to the folder of the file that contains
+   * the "extends" field.  Otherwise, the first path segment is interpreted as an NPM package name, and will be
+   * resolved using NodeJS require().
+   *
+   * SUPPORTED TOKENS: none
+   * DEFAULT VALUE: ""
+   */
+  "extends": "../../api-extractor-shared.json",
+  "mainEntryPointFilePath": "./dist/index.d.ts"
+}
diff --git a/plugins/google/package.json b/plugins/google/package.json
@@ -0,0 +1,48 @@
+{
+  "name": "@livekit/agents-plugin-google",
+  "version": "0.1.0",
+  "description": "Google plugin for LiveKit Agents for Node.js",
+  "main": "dist/index.js",
+  "require": "dist/index.cjs",
+  "types": "dist/index.d.ts",
+  "exports": {
+    ".": {
+      "types": "./dist/index.d.ts",
+      "import": "./dist/index.js",
+      "require": "./dist/index.cjs"
+    }
+  },
+  "author": "LiveKit",
+  "type": "module",
+  "repository": "[email protected]:livekit/agents-js.git",
+  "license": "Apache-2.0",
+  "files": [
+    "dist",
+    "src",
+    "README.md"
+  ],
+  "scripts": {
+    "build": "tsup --onSuccess \"tsc --declaration --emitDeclarationOnly\"",
+    "clean": "rm -rf dist",
+    "clean:build": "pnpm clean && pnpm build",
+    "lint": "eslint -f unix \"src/**/*.{ts,js}\"",
+    "api:check": "api-extractor run --typescript-compiler-folder ../../node_modules/typescript",
+    "api:update": "api-extractor run --local --typescript-compiler-folder ../../node_modules/typescript --verbose"
+  },
+  "devDependencies": {
+    "@livekit/agents": "workspace:^x",
+    "@livekit/agents-plugin-silero": "workspace:^x",
+    "@livekit/agents-plugins-test": "workspace:^x",
+    "@livekit/rtc-node": "^0.13.11",
+    "@microsoft/api-extractor": "^7.35.0",
+    "tsup": "^8.3.5",
+    "typescript": "^5.0.0"
+  },
+  "dependencies": {
+    "@google-cloud/speech": "^7.1.0"
+  },
+  "peerDependencies": {
+    "@livekit/agents": "workspace:^x",
+    "@livekit/rtc-node": "^0.13.11"
+  }
+}
diff --git a/plugins/google/src/index.ts b/plugins/google/src/index.ts
@@ -0,0 +1 @@
+export * from './stt.js';
diff --git a/plugins/google/src/models.ts b/plugins/google/src/models.ts
@@ -0,0 +1,84 @@
+export enum AudioEncoding {
+  AUDIO_ENCODING_UNSPECIFIED = 'AUDIO_ENCODING_UNSPECIFIED',
+  LINEAR16 = 'LINEAR16',
+  MULAW = 'MULAW',
+  ALAW = 'ALAW',
+  AMR = 'AMR',
+  AMR_WB = 'AMR_WB',
+  FLAC = 'FLAC',
+  MP3 = 'MP3',
+  OGG_OPUS = 'OGG_OPUS',
+  WEBM_OPUS = 'WEBM_OPUS',
+  MP4_AAC = 'MP4_AAC',
+  M4A_AAC = 'M4A_AAC',
+  MOV_AAC = 'MOV_AAC',
+}
+
+export enum SpeechEventType {
+  SPEECH_EVENT_TYPE_UNSPECIFIED = 'SPEECH_EVENT_TYPE_UNSPECIFIED',
+  END_OF_SINGLE_UTTERANCE = 'END_OF_SINGLE_UTTERANCE',
+  SPEECH_ACTIVITY_BEGIN = 'SPEECH_ACTIVITY_BEGIN',
+  SPEECH_ACTIVITY_END = 'SPEECH_ACTIVITY_END',
+}
+
+// Google Cloud Speech-to-Text API types
+export interface GoogleCredentials {
+  type: string;
+  project_id: string;
+  private_key_id: string;
+  private_key: string;
+  client_email: string;
+  client_id: string;
+  auth_uri: string;
+  token_uri: string;
+  auth_provider_x509_cert_url: string;
+  client_x509_cert_url: string;
+}
+
+export type SpeechLanguages =
+  | 'en-US'
+  | 'en-GB'
+  | 'en-AU'
+  | 'en-CA'
+  | 'pl-PL'
+  | 'de-DE'
+  | 'fr-FR'
+  | 'es-ES'
+  | 'it-IT'
+  | 'pt-BR'
+  | 'ru-RU'
+  | 'ja-JP'
+  | 'ko-KR'
+  | 'zh-CN'
+  | 'zh-TW'
+  | 'ar-SA'
+  | 'hi-IN'
+  | 'th-TH'
+  | 'vi-VN'
+  | 'tr-TR';
+
+export type SpeechModels =
+  | 'latest_long'
+  | 'latest_short'
+  | 'latest_medium'
+  | 'command_and_search'
+  | 'phone_call'
+  | 'video'
+  | 'default'
+  | 'medical_conversation'
+  | 'medical_dictation'
+  | 'medical_question_and_answer'
+  | 'medical_report'
+  | 'medical_symptom'
+  | 'medical_test'
+  | 'medical_treatment'
+  | 'medical_emergency'
+  | 'medical_consultation'
+  | 'medical_instruction'
+  | 'medical_procedure'
+  | 'medical_medication'
+  | 'medical_diagnosis'
+  | 'medical_condition';
+
+export type LanguageType = SpeechLanguages | string;
+export type LanguageCode = LanguageType | LanguageType[];
diff --git a/plugins/google/src/stt.test.ts b/plugins/google/src/stt.test.ts
@@ -0,0 +1,10 @@
+import { initializeLogger } from '@livekit/agents';
+import { VAD } from '@livekit/agents-plugin-silero';
+import { stt } from '@livekit/agents-plugins-test';
+import { describe } from 'vitest';
+import { STT } from './stt.js';
+
+describe('Google', async () => {
+  initializeLogger({ pretty: false });
+  await stt(new STT(), await VAD.load(), { nonStreaming: false });
+});