Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions plugins/google/CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
# @livekit/agents-plugin-google
12 changes: 12 additions & 0 deletions plugins/google/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
# Google plugin for LiveKit Agents

The Agents Framework is designed for building realtime, programmable
participants that run on servers. Use it to create conversational, multi-modal
voice agents that can see, hear, and understand.

This package contains the Google plugin, which allows for speech recognition.
Refer to the [documentation](https://docs.livekit.io/agents/overview/) for
information on how to use it, or browse the [API
reference](https://docs.livekit.io/agents-js/modules/plugins_agents_plugin_google.html).
See the [repository](https://github.com/livekit/agents-js) for more information
about the framework as a whole.
20 changes: 20 additions & 0 deletions plugins/google/api-extractor.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
/**
* Config file for API Extractor. For more info, please visit: https://api-extractor.com
*/
{
"$schema": "https://developer.microsoft.com/json-schemas/api-extractor/v7/api-extractor.schema.json",

/**
* Optionally specifies another JSON config file that this file extends from. This provides a way for
* standard settings to be shared across multiple projects.
*
* If the path starts with "./" or "../", the path is resolved relative to the folder of the file that contains
* the "extends" field. Otherwise, the first path segment is interpreted as an NPM package name, and will be
* resolved using NodeJS require().
*
* SUPPORTED TOKENS: none
* DEFAULT VALUE: ""
*/
"extends": "../../api-extractor-shared.json",
"mainEntryPointFilePath": "./dist/index.d.ts"
}
48 changes: 48 additions & 0 deletions plugins/google/package.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
{
"name": "@livekit/agents-plugin-google",
"version": "0.1.0",
"description": "Google plugin for LiveKit Agents for Node.js",
"main": "dist/index.js",
"require": "dist/index.cjs",
"types": "dist/index.d.ts",
"exports": {
".": {
"types": "./dist/index.d.ts",
"import": "./dist/index.js",
"require": "./dist/index.cjs"
}
},
"author": "LiveKit",
"type": "module",
"repository": "[email protected]:livekit/agents-js.git",
"license": "Apache-2.0",
"files": [
"dist",
"src",
"README.md"
],
"scripts": {
"build": "tsup --onSuccess \"tsc --declaration --emitDeclarationOnly\"",
"clean": "rm -rf dist",
"clean:build": "pnpm clean && pnpm build",
"lint": "eslint -f unix \"src/**/*.{ts,js}\"",
"api:check": "api-extractor run --typescript-compiler-folder ../../node_modules/typescript",
"api:update": "api-extractor run --local --typescript-compiler-folder ../../node_modules/typescript --verbose"
},
"devDependencies": {
"@livekit/agents": "workspace:^x",
"@livekit/agents-plugin-silero": "workspace:^x",
"@livekit/agents-plugins-test": "workspace:^x",
"@livekit/rtc-node": "^0.13.11",
"@microsoft/api-extractor": "^7.35.0",
"tsup": "^8.3.5",
"typescript": "^5.0.0"
},
"dependencies": {
"@google-cloud/speech": "^7.1.0"
},
"peerDependencies": {
"@livekit/agents": "workspace:^x",
"@livekit/rtc-node": "^0.13.11"
}
}
1 change: 1 addition & 0 deletions plugins/google/src/index.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
export * from './stt.js';
84 changes: 84 additions & 0 deletions plugins/google/src/models.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
export enum AudioEncoding {
AUDIO_ENCODING_UNSPECIFIED = 'AUDIO_ENCODING_UNSPECIFIED',
LINEAR16 = 'LINEAR16',
MULAW = 'MULAW',
ALAW = 'ALAW',
AMR = 'AMR',
AMR_WB = 'AMR_WB',
FLAC = 'FLAC',
MP3 = 'MP3',
OGG_OPUS = 'OGG_OPUS',
WEBM_OPUS = 'WEBM_OPUS',
MP4_AAC = 'MP4_AAC',
M4A_AAC = 'M4A_AAC',
MOV_AAC = 'MOV_AAC',
}

export enum SpeechEventType {
SPEECH_EVENT_TYPE_UNSPECIFIED = 'SPEECH_EVENT_TYPE_UNSPECIFIED',
END_OF_SINGLE_UTTERANCE = 'END_OF_SINGLE_UTTERANCE',
SPEECH_ACTIVITY_BEGIN = 'SPEECH_ACTIVITY_BEGIN',
SPEECH_ACTIVITY_END = 'SPEECH_ACTIVITY_END',
}

// Google Cloud Speech-to-Text API types
export interface GoogleCredentials {
type: string;
project_id: string;
private_key_id: string;
private_key: string;
client_email: string;
client_id: string;
auth_uri: string;
token_uri: string;
auth_provider_x509_cert_url: string;
client_x509_cert_url: string;
}

export type SpeechLanguages =
| 'en-US'
| 'en-GB'
| 'en-AU'
| 'en-CA'
| 'pl-PL'
| 'de-DE'
| 'fr-FR'
| 'es-ES'
| 'it-IT'
| 'pt-BR'
| 'ru-RU'
| 'ja-JP'
| 'ko-KR'
| 'zh-CN'
| 'zh-TW'
| 'ar-SA'
| 'hi-IN'
| 'th-TH'
| 'vi-VN'
| 'tr-TR';

export type SpeechModels =
| 'latest_long'
| 'latest_short'
| 'latest_medium'
| 'command_and_search'
| 'phone_call'
| 'video'
| 'default'
| 'medical_conversation'
| 'medical_dictation'
| 'medical_question_and_answer'
| 'medical_report'
| 'medical_symptom'
| 'medical_test'
| 'medical_treatment'
| 'medical_emergency'
| 'medical_consultation'
| 'medical_instruction'
| 'medical_procedure'
| 'medical_medication'
| 'medical_diagnosis'
| 'medical_condition';

export type LanguageType = SpeechLanguages | string;
export type LanguageCode = LanguageType | LanguageType[];
10 changes: 10 additions & 0 deletions plugins/google/src/stt.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
import { initializeLogger } from '@livekit/agents';
import { VAD } from '@livekit/agents-plugin-silero';
import { stt } from '@livekit/agents-plugins-test';
import { describe } from 'vitest';
import { STT } from './stt.js';

describe('Google', async () => {
initializeLogger({ pretty: false });
await stt(new STT(), await VAD.load(), { nonStreaming: false });
});
Loading