add-gemini-ocr (#890)

* add-gemini-ocr * add gui, config
2024-12-29 16:44:13 +08:00 · 2024-07-15 00:14:54 +09:00 · 2024-07-15 00:14:54 +09:00 · 252f6449b2
commit 252f6449b2
parent 869ef46306
3 changed files with 46 additions and 0 deletions
--- a/LunaTranslator/LunaTranslator/ocrengines/gemini.py
+++ b/LunaTranslator/LunaTranslator/ocrengines/gemini.py
@ -0,0 +1,37 @@
+import base64
+import requests
+from ocrengines.baseocrclass import baseocr
+
+
+class OCR(baseocr):
+    def ocr(self, imagebinary):
+        self.checkempty(["key"])
+        api_key = self.config["key"]
+        image_data = base64.b64encode(imagebinary).decode("utf-8")
+
+        # Prepare the request payload
+        payload = {
+            "contents": [
+                {
+                    "parts": [
+                        {"text": "Ocr this picture"},
+                        {"inlineData": {"mimeType": "image/png", "data": image_data}},
+                    ]
+                }
+            ]
+        }
+
+        # Set up the request headers and URL
+        headers = {"Content-Type": "application/json"}
+        url = f"https://generativelanguage.googleapis.com/v1/models/gemini-1.5-flash:generateContent?key={api_key}"
+
+        # Send the request
+        response = requests.post(url, headers=headers, json=payload, proxies=self.proxy)
+        try:
+            # Handle the response
+            if response.status_code == 200:
+                return response.json()["candidates"][0]["content"]["parts"][0]["text"]
+            else:
+                raise Exception(response.text)
+        except Exception as e:
+            raise Exception(response.text) from e
--- a/LunaTranslator/files/defaultconfig/config.json
+++ b/LunaTranslator/files/defaultconfig/config.json
@ -1463,6 +1463,10 @@
            "use": false,
            "name": "WeChatOCR",
            "type": "offline"
+        },
+        "geminiocr": {
+            "use": false,
+            "name": "GeminiOCR"
        }
    },
    "fanyi": {
--- a/LunaTranslator/files/defaultconfig/ocrsetting.json
+++ b/LunaTranslator/files/defaultconfig/ocrsetting.json
@ -257,6 +257,11 @@
            "key": ""
        }
    },
+    "geminiocr": {
+        "args": {
+            "key": ""
+        }
+    },
    "xunfei": {
        "args": {
            "link": "https://www.xfyun.cn/doc/words/multi_print_recognition/API.html",