add-gemini-ocr (#890)

* add-gemini-ocr

* add gui, config
This commit is contained in:
Asuka Minato 2024-07-15 00:14:54 +09:00 committed by GitHub
parent 869ef46306
commit 252f6449b2
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 46 additions and 0 deletions

View File

@ -0,0 +1,37 @@
import base64
import requests
from ocrengines.baseocrclass import baseocr
class OCR(baseocr):
def ocr(self, imagebinary):
self.checkempty(["key"])
api_key = self.config["key"]
image_data = base64.b64encode(imagebinary).decode("utf-8")
# Prepare the request payload
payload = {
"contents": [
{
"parts": [
{"text": "Ocr this picture"},
{"inlineData": {"mimeType": "image/png", "data": image_data}},
]
}
]
}
# Set up the request headers and URL
headers = {"Content-Type": "application/json"}
url = f"https://generativelanguage.googleapis.com/v1/models/gemini-1.5-flash:generateContent?key={api_key}"
# Send the request
response = requests.post(url, headers=headers, json=payload, proxies=self.proxy)
try:
# Handle the response
if response.status_code == 200:
return response.json()["candidates"][0]["content"]["parts"][0]["text"]
else:
raise Exception(response.text)
except Exception as e:
raise Exception(response.text) from e

View File

@ -1463,6 +1463,10 @@
"use": false,
"name": "WeChatOCR",
"type": "offline"
},
"geminiocr": {
"use": false,
"name": "GeminiOCR"
}
},
"fanyi": {

View File

@ -257,6 +257,11 @@
"key": ""
}
},
"geminiocr": {
"args": {
"key": ""
}
},
"xunfei": {
"args": {
"link": "https://www.xfyun.cn/doc/words/multi_print_recognition/API.html",