diff --git a/README.md b/README.md index d639197..f01ca8c 100644 --- a/README.md +++ b/README.md @@ -1 +1,28 @@ # Pot-App OpenAI 文字识别插件 + +中文 | [English](README_EN.md) + +本插件提供了与 OpenAI 和 Google Gemini API 集成的图像识别功能。 + +## API 密钥设置 + +### OpenAI API +1. 访问 [OpenAI API Keys](https://platform.openai.com/account/api-keys) +2. 创建新的 API 密钥 +3. 复制密钥以供后续使用 + +### Google Gemini API +1. 访问 [Google AI Studio](https://aistudio.google.com/app/apikey) +2. 生成新的 API 密钥 +3. 复制密钥以供后续使用 + +## API 端点配置 + +### OpenAI +- 默认端点:`https://api.openai.com` +- 如果使用官方 OpenAI API,无需指定 URL +- 对于第三方 OpenAI 兼容 API,可以自定义端点 URL + +### Google Gemini +- 手動輸入此端点:`https://generativelanguage.googleapis.com` +- 使用 Gemini API 时必须指定此确切 URL \ No newline at end of file diff --git a/README_EN.md b/README_EN.md new file mode 100644 index 0000000..0675198 --- /dev/null +++ b/README_EN.md @@ -0,0 +1,28 @@ +# Pot-App OpenAI Text Recognition Plugin + +[中文](README.md) | English + +This plugin provides integration with OpenAI and Google Gemini APIs for image recognition tasks. + +## API Key Setup + +### OpenAI API +1. Visit [OpenAI API Keys](https://platform.openai.com/account/api-keys) +2. Create a new API key +3. Copy the key for later use + +### Google Gemini API +1. Visit [Google AI Studio](https://aistudio.google.com/app/apikey) +2. Generate a new API key +3. Copy the key for later use + +## API Endpoint Configuration + +### OpenAI +- Default endpoint: `https://api.openai.com` +- You don't need to specify the URL if using the official OpenAI API +- For third-party OpenAI-compatible APIs, you can customize the endpoint URL + +### Google Gemini +- Manually enter this endpoint: `https://generativelanguage.googleapis.com` +- Must specify this exact URL when using Gemini API \ No newline at end of file diff --git a/info.json b/info.json index b99ac36..ed1cf5a 100644 --- a/info.json +++ b/info.json @@ -8,12 +8,7 @@ { "key": "model", "display": "模型", - "type": "select", - "options": { - "gpt-4o": "GPT-4o", - "gpt-4o-mini": "GPT-4o-Mini", - "gpt-4-vision-preview": "GPT-4 Vision Preview" - } + "type": "input" }, { "key": "requestPath", diff --git a/main.js b/main.js index 02d9047..684b59c 100644 --- a/main.js +++ b/main.js @@ -3,69 +3,195 @@ async function recognize(base64, lang, options) { const { tauriFetch: fetch } = utils; let { model = "gpt-4o", apiKey, requestPath, customPrompt } = config; - if (!requestPath) { - requestPath = "https://api.openai.com"; - } - if (!/https?:\/\/.+/.test(requestPath)) { - requestPath = `https://${requestPath}`; - } - if (requestPath.endsWith('/')) { - requestPath = requestPath.slice(0, -1); - } - if (!requestPath.endsWith('/chat/completions')) { - requestPath += '/v1/chat/completions'; - } - if (!customPrompt) { - customPrompt = "Just recognize the text in the image. Do not offer unnecessary explanations."; - }else{ - customPrompt = customPrompt.replaceAll("$lang", lang); - } + // 檢查是否是 Google API + const isGoogleAPI = requestPath?.includes('generativelanguage.googleapis.com'); + + if (isGoogleAPI) { + // Google Gemini API 格式 + if (!model || model === "gpt-4o") { + model = "gemini-1.5-flash"; + } + + const headers = { + 'Content-Type': 'application/json' + } - const headers = { - 'Content-Type': 'application/json', - 'Authorization': `Bearer ${apiKey}` - } + // 使用正確的 Google API endpoint + requestPath = `https://generativelanguage.googleapis.com/v1beta/models/${model}:generateContent`; - const body = { - model, - messages: [ - { - "role": "system", - "content": [ + const body = { + contents: [{ + role: "user", + parts: [ { - "type": "text", - "text": customPrompt + text: customPrompt || "Just recognize the text in the image. Do not offer unnecessary explanations." + }, + { + inlineData: { + mimeType: "image/png", + data: base64 + } } - ], + ] + }] + }; + + // 添加 API key 到 URL + requestPath += `?key=${apiKey}`; + + let res = await fetch(requestPath, { + method: 'POST', + url: requestPath, + headers: headers, + body: { + type: "Json", + payload: body }, - { - "role": "user", - "content": [ - { - "type": "image_url", - "image_url": { - "url": `data:image/png;base64,${base64}`, - "detail": "high" - }, - }, - ], + responseType: 1 + }); + + // 處理 Google API 的返回格式 + if (res.ok) { + let result = res.data; + // 檢查返回格式並提供詳細錯誤信息 + if (!result) { + throw `Empty response from Google API`; } - ], - } - let res = await fetch(requestPath, { - method: 'POST', - url: requestPath, - headers: headers, - body: { - type: "Json", - payload: body + if (!result.candidates) { + throw `No candidates in response: ${JSON.stringify(result)}`; + } + if (!result.candidates[0]) { + throw `Empty candidates array: ${JSON.stringify(result)}`; + } + if (!result.candidates[0].content) { + throw `No content in candidate: ${JSON.stringify(result.candidates[0])}`; + } + if (!result.candidates[0].content.parts) { + throw `No parts in content: ${JSON.stringify(result.candidates[0].content)}`; + } + if (!result.candidates[0].content.parts[0]) { + throw `Empty parts array: ${JSON.stringify(result.candidates[0].content.parts)}`; + } + if (!result.candidates[0].content.parts[0].text) { + throw `No text in part: ${JSON.stringify(result.candidates[0].content.parts[0])}`; + } + return result.candidates[0].content.parts[0].text; + } else { + throw `Request failed with status ${res.status}: ${JSON.stringify(res.data)}`; } - }); - if (res.ok) { - let result = res.data; - return result.choices[0].message.content; } else { - throw `Http Request Error\nHttp Status: ${res.status}\n${JSON.stringify(res.data)}`; + // OpenAI API 處理邏輯 + if (!requestPath) { + requestPath = "https://api.openai.com"; + } + if (!/https?:\/\/.+/.test(requestPath)) { + requestPath = `https://${requestPath}`; + } + if (requestPath.endsWith('/')) { + requestPath = requestPath.slice(0, -1); + } + if (!requestPath.endsWith('/chat/completions')) { + requestPath += '/v1/chat/completions'; + } + if (!customPrompt) { + customPrompt = "Just recognize the text in the image. Do not offer unnecessary explanations."; + }else{ + customPrompt = customPrompt.replaceAll("$lang", lang); + } + + const headers = { + 'Content-Type': 'application/json', + 'Authorization': `Bearer ${apiKey}` + } + + const body = { + model, + messages: [ + { + "role": "user", + "content": [ + { + "type": "text", + "text": customPrompt + }, + { + "type": "image_url", + "image_url": { + "url": `data:image/png;base64,${base64}`, + "detail": "high" + } + } + ] + } + ] + }; + + let res = await fetch(requestPath, { + method: 'POST', + url: requestPath, + headers: headers, + body: { + type: "Json", + payload: body + }, + responseType: 1 + }); + + /* Debug 相關代碼 + const debugBody = { ...body }; + if (debugBody.messages?.[0]?.content) { + debugBody.messages[0].content = debugBody.messages[0].content.map(item => { + if (item.type === 'image_url') { + return { + type: 'image_url', + image_url: { + url: '***[BASE64]***', + detail: item.image_url.detail + } + }; + } + return item; + }); + } + */ + + if (res.ok) { + let result = res.data; + if (!result || !result.choices || !result.choices[0]) { + throw `Invalid API Response: ${JSON.stringify(result)}`; + } + + const choice = result.choices[0]; + let content = ''; + if (choice.message && choice.message.content) { + content = choice.message.content; + } else if (choice.content) { + content = choice.content; + } else { + content = JSON.stringify(choice); + } + + /* Debug 相關代碼 + return `🔍 Debug Info: +Request URL: ${requestPath} +Request Headers: ${JSON.stringify(headers, null, 2)} +Request Body: ${JSON.stringify(debugBody, null, 2)} + +📝 Response: +${content}`; + */ + return content; + } else { + /* Debug 相關代碼 + throw `❌ Http Request Error +URL: ${requestPath} +Status: ${res.status} +Headers: ${JSON.stringify(headers, null, 2)} +Request Body: ${JSON.stringify(debugBody, null, 2)} +Response: ${JSON.stringify(res.data, null, 2)}`; + */ + throw `Request failed with status ${res.status}`; + } } }