From 252f6449b227982b522e18f4b1f09b752cd49a52 Mon Sep 17 00:00:00 2001 From: Asuka Minato Date: Mon, 15 Jul 2024 00:14:54 +0900 Subject: [PATCH] add-gemini-ocr (#890) * add-gemini-ocr * add gui, config --- .../LunaTranslator/ocrengines/gemini.py | 37 +++++++++++++++++++ .../files/defaultconfig/config.json | 4 ++ .../files/defaultconfig/ocrsetting.json | 5 +++ 3 files changed, 46 insertions(+) create mode 100644 LunaTranslator/LunaTranslator/ocrengines/gemini.py diff --git a/LunaTranslator/LunaTranslator/ocrengines/gemini.py b/LunaTranslator/LunaTranslator/ocrengines/gemini.py new file mode 100644 index 00000000..43209655 --- /dev/null +++ b/LunaTranslator/LunaTranslator/ocrengines/gemini.py @@ -0,0 +1,37 @@ +import base64 +import requests +from ocrengines.baseocrclass import baseocr + + +class OCR(baseocr): + def ocr(self, imagebinary): + self.checkempty(["key"]) + api_key = self.config["key"] + image_data = base64.b64encode(imagebinary).decode("utf-8") + + # Prepare the request payload + payload = { + "contents": [ + { + "parts": [ + {"text": "Ocr this picture"}, + {"inlineData": {"mimeType": "image/png", "data": image_data}}, + ] + } + ] + } + + # Set up the request headers and URL + headers = {"Content-Type": "application/json"} + url = f"https://generativelanguage.googleapis.com/v1/models/gemini-1.5-flash:generateContent?key={api_key}" + + # Send the request + response = requests.post(url, headers=headers, json=payload, proxies=self.proxy) + try: + # Handle the response + if response.status_code == 200: + return response.json()["candidates"][0]["content"]["parts"][0]["text"] + else: + raise Exception(response.text) + except Exception as e: + raise Exception(response.text) from e diff --git a/LunaTranslator/files/defaultconfig/config.json b/LunaTranslator/files/defaultconfig/config.json index 90344cec..a73c4e8e 100644 --- a/LunaTranslator/files/defaultconfig/config.json +++ b/LunaTranslator/files/defaultconfig/config.json @@ -1463,6 +1463,10 @@ "use": false, "name": "WeChatOCR", "type": "offline" + }, + "geminiocr": { + "use": false, + "name": "GeminiOCR" } }, "fanyi": { diff --git a/LunaTranslator/files/defaultconfig/ocrsetting.json b/LunaTranslator/files/defaultconfig/ocrsetting.json index bb2fd8c0..7083d7f9 100644 --- a/LunaTranslator/files/defaultconfig/ocrsetting.json +++ b/LunaTranslator/files/defaultconfig/ocrsetting.json @@ -257,6 +257,11 @@ "key": "" } }, + "geminiocr": { + "args": { + "key": "" + } + }, "xunfei": { "args": { "link": "https://www.xfyun.cn/doc/words/multi_print_recognition/API.html",