From 955f313fe432f3d39195de5c297cf6e583f20f05 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=81=8D=E5=85=AE=E6=83=9A=E5=85=AE?= <101191390+HIllya51@users.noreply.github.com> Date: Thu, 16 May 2024 19:55:36 +0800 Subject: [PATCH] binary --- LunaTranslator/LunaTranslator/gui/showword.py | 12 +- .../LunaTranslator/gui/translatorUI.py | 5 +- .../LunaTranslator/myutils/ocrutil.py | 37 +++--- .../LunaTranslator/ocrengines/baiduocr_X.py | 7 +- .../LunaTranslator/ocrengines/baseocrclass.py | 6 +- .../LunaTranslator/ocrengines/docsumo.py | 4 +- .../LunaTranslator/ocrengines/feishu.py | 6 +- .../ocrengines/googlecloudvision.py | 6 +- .../LunaTranslator/ocrengines/local.py | 114 ++++++++++++------ .../LunaTranslator/ocrengines/mangaocr.py | 14 ++- .../LunaTranslator/ocrengines/ocrspace.py | 6 +- .../LunaTranslator/ocrengines/tesseract5.py | 12 +- .../LunaTranslator/ocrengines/txocr.py | 6 +- .../LunaTranslator/ocrengines/txocrtrans.py | 6 +- .../LunaTranslator/ocrengines/volcengine.py | 7 +- .../LunaTranslator/ocrengines/windowsocr.py | 6 +- .../LunaTranslator/ocrengines/xunfei.py | 9 +- .../LunaTranslator/ocrengines/youdaocr.py | 18 ++- .../ocrengines/youdaocrtrans.py | 26 ++-- .../LunaTranslator/textsource/ocrtext.py | 14 +-- LunaTranslator/LunaTranslator/winrtutils.py | 7 +- plugins/CMakeLists.txt | 2 +- plugins/winrtutils/define.h | 2 +- plugins/winrtutils/winrtocr.cpp | 12 +- 24 files changed, 188 insertions(+), 156 deletions(-) diff --git a/LunaTranslator/LunaTranslator/gui/showword.py b/LunaTranslator/LunaTranslator/gui/showword.py index d8f865e5..c6bc3b4e 100644 --- a/LunaTranslator/LunaTranslator/gui/showword.py +++ b/LunaTranslator/LunaTranslator/gui/showword.py @@ -55,19 +55,21 @@ class AnkiWindow(QWidget): ) @threader - def asyncocr(self, fname): - self.__ocrsettext.emit(ocr_run(fname)) + def asyncocr(self, img): + self.__ocrsettext.emit(ocr_run(img)) def crop(self): def ocroncefunction(rect): - img = imageCut(0, rect[0][0], rect[0][1], rect[1][0], rect[1][1]) + img = imageCut( + 0, rect[0][0], rect[0][1], rect[1][0], rect[1][1], False, True + ) fname = "./cache/ocr/cropforanki.png" os.makedirs("./cache/ocr", exist_ok=True) img.save(fname) self.editpath.setText("") self.editpath.setText(os.path.abspath(fname)) if globalconfig["ankiconnect"]["ocrcroped"]: - self.asyncocr(fname) + self.asyncocr(img) rangeselct_function(self, ocroncefunction, False, False) @@ -378,6 +380,8 @@ class AnkiWindow(QWidget): return wid def wrappedpixmap(self, src): + if not src: + return pix = QPixmap.fromImage(QImage(src)) rate = self.devicePixelRatioF() pix.setDevicePixelRatio(rate) diff --git a/LunaTranslator/LunaTranslator/gui/translatorUI.py b/LunaTranslator/LunaTranslator/gui/translatorUI.py index 06e29710..56d06613 100644 --- a/LunaTranslator/LunaTranslator/gui/translatorUI.py +++ b/LunaTranslator/LunaTranslator/gui/translatorUI.py @@ -356,10 +356,7 @@ class QUnFrameWindow(resizableframeless): @threader def ocroncefunction(rect): img = imageCut(0, rect[0][0], rect[0][1], rect[1][0], rect[1][1]) - fname = "./cache/ocr/once.png" - os.makedirs("./cache/ocr", exist_ok=True) - img.save(fname) - text = ocr_run(fname) + text = ocr_run(img) gobject.baseobject.textgetmethod(text, False) rangeselct_function(self, ocroncefunction, False, False) diff --git a/LunaTranslator/LunaTranslator/myutils/ocrutil.py b/LunaTranslator/LunaTranslator/myutils/ocrutil.py index 577e8002..9db2830e 100644 --- a/LunaTranslator/LunaTranslator/myutils/ocrutil.py +++ b/LunaTranslator/LunaTranslator/myutils/ocrutil.py @@ -11,24 +11,31 @@ from traceback import print_exc import gobject, winsharedutils +def qimage2binary(qimage: QImage): + byte_array = QByteArray() + buffer = QBuffer(byte_array) + buffer.open(QBuffer.WriteOnly) + qimage.save(buffer, "BMP") + buffer.close() + image_data = byte_array.data() + return image_data + + +def binary2qimage(binary): + image = QImage() + image.loadFromData(binary) + return image + + def togray(image): gray_image = image.convertToFormat(QImage.Format_Grayscale8) return gray_image def otsu_threshold_fast(image: QImage, thresh): - - byte_array = QByteArray() - buffer = QBuffer(byte_array) - buffer.open(QBuffer.WriteOnly) - image.save(buffer, "BMP") - buffer.close() - image_data = byte_array.data() - + image_data = qimage2binary(image) solved = winsharedutils.otsu_binary(image_data, thresh) - image = QImage() - image.loadFromData(solved) - return image + return binary2qimage(solved) def imagesolve(image): @@ -43,7 +50,7 @@ def imagesolve(image): return image2 -def imageCut(hwnd, x1, y1, x2, y2, viscompare=True): +def imageCut(hwnd, x1, y1, x2, y2, viscompare=True, rawimage=False) -> QImage: screen = QApplication.primaryScreen() for _ in range(2): @@ -81,6 +88,8 @@ def imageCut(hwnd, x1, y1, x2, y2, viscompare=True): ) image = pix.toImage() + if rawimage: + return image image2 = imagesolve(image) if viscompare: gobject.baseobject.showocrimage.setimage.emit([image, image2]) @@ -101,7 +110,7 @@ def ocr_end(): _ocrengine = None -def ocr_run(img): +def ocr_run(qimage: QImage): global _nowuseocr, _ocrengine use = None @@ -123,7 +132,7 @@ def ocr_run(img): aclass = importlib.import_module("ocrengines." + use).OCR _ocrengine = aclass(use) _nowuseocr = use - text = _ocrengine._private_ocr(img) + text = _ocrengine._private_ocr(qimage2binary(qimage)) except Exception as e: if isinstance(e, ArgsEmptyExc): msg = str(e) diff --git a/LunaTranslator/LunaTranslator/ocrengines/baiduocr_X.py b/LunaTranslator/LunaTranslator/ocrengines/baiduocr_X.py index f02fab1b..c93784c8 100644 --- a/LunaTranslator/LunaTranslator/ocrengines/baiduocr_X.py +++ b/LunaTranslator/LunaTranslator/ocrengines/baiduocr_X.py @@ -36,7 +36,7 @@ class OCR(baseocr): + self.secretKey ).json()["access_token"] - def ocr(self, imgfile): + def ocr(self, imagebinary): self.checkchange() if self.accstoken == "": return "" @@ -57,9 +57,8 @@ class OCR(baseocr): } params = {"access_token": self.accstoken} # '', - with open(imgfile, "rb") as ff: - f = ff.read() - b64 = base64.b64encode(f) + + b64 = base64.b64encode(imagebinary) data = { "image": b64, diff --git a/LunaTranslator/LunaTranslator/ocrengines/baseocrclass.py b/LunaTranslator/LunaTranslator/ocrengines/baseocrclass.py index 72ee8838..abfabf72 100644 --- a/LunaTranslator/LunaTranslator/ocrengines/baseocrclass.py +++ b/LunaTranslator/LunaTranslator/ocrengines/baseocrclass.py @@ -9,7 +9,7 @@ class baseocr(commonbase): def initocr(self): pass - def ocr(self, imgpath): + def ocr(self, imagebinary): raise Exception def end(self): @@ -101,11 +101,11 @@ class baseocr(commonbase): raise e self.needinit = False - def _private_ocr(self, imgpath): + def _private_ocr(self, imagebinary): if self.needinit: self.level2init() try: - text = self.ocr(imgpath) + text = self.ocr(imagebinary) except Exception as e: self.needinit = True raise e diff --git a/LunaTranslator/LunaTranslator/ocrengines/docsumo.py b/LunaTranslator/LunaTranslator/ocrengines/docsumo.py index c3a2dcba..6d2bc65c 100644 --- a/LunaTranslator/LunaTranslator/ocrengines/docsumo.py +++ b/LunaTranslator/LunaTranslator/ocrengines/docsumo.py @@ -3,7 +3,7 @@ from ocrengines.baseocrclass import baseocr class OCR(baseocr): - def ocr(self, imgfile): + def ocr(self, imagebinary): self.checkempty(["token"]) headers = { @@ -29,7 +29,7 @@ class OCR(baseocr): '------WebKitFormBoundaryUjYOv45hug6CFh3t\r\nContent-Disposition: form-data; name="file"; filename="screenshot.png"\r\nContent-Type: application/octet-stream\r\n\r\n'.encode( "latin-1" ) - + open(imgfile, "rb").read() + + imagebinary + "\r\n------WebKitFormBoundaryUjYOv45hug6CFh3t--\r\n".encode("latin-1") ) diff --git a/LunaTranslator/LunaTranslator/ocrengines/feishu.py b/LunaTranslator/LunaTranslator/ocrengines/feishu.py index 9acb564b..635df4de 100644 --- a/LunaTranslator/LunaTranslator/ocrengines/feishu.py +++ b/LunaTranslator/LunaTranslator/ocrengines/feishu.py @@ -24,11 +24,9 @@ class OCR(baseocr): self.tokens[(app_id, app_secret)] = token return self.tokens[(app_id, app_secret)] - def ocr(self, imgfile): + def ocr(self, imagebinary): token = self.check() - with open(imgfile, "rb") as ff: - f = ff.read() - b64 = base64.b64encode(f) + b64 = base64.b64encode(imagebinary) res = self.session.post( "https://open.feishu.cn/open-apis/optical_char_recognition/v1/image/basic_recognize", headers={ diff --git a/LunaTranslator/LunaTranslator/ocrengines/googlecloudvision.py b/LunaTranslator/LunaTranslator/ocrengines/googlecloudvision.py index e7b4a8dd..4df3bfce 100644 --- a/LunaTranslator/LunaTranslator/ocrengines/googlecloudvision.py +++ b/LunaTranslator/LunaTranslator/ocrengines/googlecloudvision.py @@ -4,7 +4,7 @@ import base64 class OCR(baseocr): - def ocr(self, imgfile): + def ocr(self, imagebinary): # https://github.com/dmotz/thing-translator/blob/d1fec3f38d24e973af49766669f9ee00bd9e98a8/src/effects/snap.js # https://cloud.google.com/vision/docs/ocr?hl=zh-cn # https://cloud.google.com/vision/docs/reference/rest/v1/AnnotateImageResponse#EntityAnnotation @@ -12,9 +12,7 @@ class OCR(baseocr): ocr_url = ( "https://vision.googleapis.com/v1/images:annotate?key=" + self.config["key"] ) - with open(imgfile, "rb") as f: - data = f.read() - encodestr = str(base64.b64encode(data), "utf-8") + encodestr = str(base64.b64encode(imagebinary), "utf-8") data = { "requests": [ { diff --git a/LunaTranslator/LunaTranslator/ocrengines/local.py b/LunaTranslator/LunaTranslator/ocrengines/local.py index 67363ebb..0d65827e 100644 --- a/LunaTranslator/LunaTranslator/ocrengines/local.py +++ b/LunaTranslator/LunaTranslator/ocrengines/local.py @@ -1,9 +1,35 @@ import os from myutils.config import globalconfig, _TR, static_data from ocrengines.baseocrclass import baseocr -from ctypes import CDLL, c_char_p, create_string_buffer, c_uint32, POINTER, c_int32 +from ctypes import ( + CDLL, + c_char_p, + create_string_buffer, + c_size_t, + c_void_p, + c_int32, + POINTER, + Structure, + pointer, + cast, + c_char_p, +) import os import gobject +from traceback import print_exc + + +class ocrpoints(Structure): + _fields_ = [ + ("x1", c_int32), + ("y1", c_int32), + ("x2", c_int32), + ("y2", c_int32), + ("x3", c_int32), + ("y3", c_int32), + ("x4", c_int32), + ("y4", c_int32), + ] class ocrwrapper: @@ -13,7 +39,7 @@ class ocrwrapper: def _OcrInit(self, szDetModel, szRecModel, szKeyPath, szClsModel="", nThreads=4): _OcrInit = self.dll.OcrInit - _OcrInit.restype = POINTER(c_uint32) + _OcrInit.restype = c_void_p self.pOcrObj = _OcrInit( c_char_p(szDetModel.encode("utf8")), c_char_p(szClsModel.encode("utf8")), @@ -22,22 +48,53 @@ class ocrwrapper: nThreads, ) - def _OcrDetect(self, imgPath, imgName, angle): + def _OcrDetect(self, data: bytes, angle): _OcrDetect = self.dll.OcrDetect - return _OcrDetect( - self.pOcrObj, - c_char_p(imgPath.encode("utf8")), - c_char_p(imgName.encode("utf8")), - c_int32(angle), + _OcrDetect.argtypes = ( + c_void_p, + c_void_p, + c_size_t, + c_int32, + POINTER(c_int32), + POINTER(POINTER(ocrpoints)), + POINTER(POINTER(c_char_p)), ) - def _OcrGet(self): - _OcrGetLen = self.dll.OcrGetLen - _OcrGetResult = self.dll.OcrGetResult - length = _OcrGetLen(self.pOcrObj) - buff = create_string_buffer(length) - _OcrGetResult(self.pOcrObj, buff, length) - return buff.value + _OcrFreeptr = self.dll.OcrFreeptr + _OcrFreeptr.argtypes = c_int32, c_void_p, c_void_p + + num = c_int32() + ps = POINTER(ocrpoints)() + chars = POINTER(c_char_p)() + res = _OcrDetect( + self.pOcrObj, + data, + len(data), + c_int32(angle), + pointer(num), + pointer(ps), + pointer(chars), + ) + if not res: + return [], [] + texts = [] + pss = [] + for i in range((num.value)): + texts.append(chars[i].decode("utf8")) + pss.append( + ( + ps[i].x1, + ps[i].y1, + ps[i].x2, + ps[i].y2, + ps[i].x3, + ps[i].y3, + ps[i].x4, + ps[i].y4, + ) + ) + _OcrFreeptr(num, ps, chars) + return pss, texts def _OcrDestroy(self): _OcrDestroy = self.dll.OcrDestroy @@ -46,15 +103,12 @@ class ocrwrapper: def init(self, det, rec, key): self._OcrInit(det, rec, key) - def ocr(self, path, name, angle=0): + def ocr(self, data, angle=0): try: - if self._OcrDetect(path, name, angle): - return self._OcrGet().decode("utf8") - else: - return "" + return self._OcrDetect(data, angle) except: - - return "" + print_exc() + return [], [] def trydestroy(self): try: @@ -93,20 +147,12 @@ class OCR(baseocr): self._ocr.init(path + "/det.onnx", path + "/rec.onnx", path + "/dict.txt") self._savelang = self.srclang - def ocr(self, imgfile): + def ocr(self, imagebinary): self.checkchange() - s = self._ocr.ocr( - os.path.dirname(imgfile) + "/", - os.path.basename(imgfile), + pss, texts = self._ocr.ocr( + imagebinary, globalconfig["verticalocr"], ) - ls = s.split("\n") - box = [] - text = [] - for i in range(len(ls) // 2): - box.append([int(_) for _ in ls[i * 2].split(",")]) - text.append(ls[i * 2 + 1]) - - return self.common_solve_text_orientation(box, text) + return self.common_solve_text_orientation(pss, texts) diff --git a/LunaTranslator/LunaTranslator/ocrengines/mangaocr.py b/LunaTranslator/LunaTranslator/ocrengines/mangaocr.py index 6c9f0bec..ffc3bc08 100644 --- a/LunaTranslator/LunaTranslator/ocrengines/mangaocr.py +++ b/LunaTranslator/LunaTranslator/ocrengines/mangaocr.py @@ -1,20 +1,24 @@ import requests from ocrengines.baseocrclass import baseocr -import os +import os, uuid +from myutils.ocrutil import binary2qimage class OCR(baseocr): - def ocr(self, img_path): - + def ocr(self, imagebinary): + qimage = binary2qimage(imagebinary) + os.makedirs("./cache/ocr", exist_ok=True) + fname = "./cache/ocr/" + str(uuid.uuid4()) + ".png" + qimage.save(fname) self.checkempty(["Port"]) self.port = self.config["Port"] - absolute_img_path = os.path.abspath(img_path) + absolute_img_path = os.path.abspath(fname) params = {"image_path": absolute_img_path} response = requests.get(f"http://127.0.0.1:{self.port}/image", params=params) - + os.remove(absolute_img_path) try: return response.json()["text"] except Exception as e: diff --git a/LunaTranslator/LunaTranslator/ocrengines/ocrspace.py b/LunaTranslator/LunaTranslator/ocrengines/ocrspace.py index 9c0248e3..c64b2a71 100644 --- a/LunaTranslator/LunaTranslator/ocrengines/ocrspace.py +++ b/LunaTranslator/LunaTranslator/ocrengines/ocrspace.py @@ -15,7 +15,7 @@ class OCR(baseocr): "cht": "cht", } - def ocr(self, imgfile): + def ocr(self, imagebinary): self.checkempty(["apikey"]) apikey = self.config["apikey"] if self.config["interface"] == 1: @@ -39,9 +39,7 @@ class OCR(baseocr): "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/105.0.0.0 Safari/537.36 Edg/105.0.1343.53", } - with open(imgfile, "rb") as ff: - f = ff.read() - b64 = base64.b64encode(f) + b64 = base64.b64encode(imagebinary) data = { "language": self.srclang, "base64Image": "data:image/jpeg;base64," + str(b64, encoding="utf8"), diff --git a/LunaTranslator/LunaTranslator/ocrengines/tesseract5.py b/LunaTranslator/LunaTranslator/ocrengines/tesseract5.py index 09ad03b2..63966c0a 100644 --- a/LunaTranslator/LunaTranslator/ocrengines/tesseract5.py +++ b/LunaTranslator/LunaTranslator/ocrengines/tesseract5.py @@ -1,6 +1,6 @@ -import os +import os, uuid from myutils.config import _TR, ocrsetting - +from myutils.ocrutil import binary2qimage from ocrengines.baseocrclass import baseocr from myutils.subproc import subproc_w @@ -19,11 +19,16 @@ class OCR(baseocr): def initocr(self): self.langs = list_langs() - def ocr(self, imgfile): + def ocr(self, imagebinary): self.checkempty(["路径"]) path = self.config["路径"] if os.path.exists(path) == False: raise Exception(_TR("路径不存在")) + qimage = binary2qimage(imagebinary) + os.makedirs("./cache/ocr", exist_ok=True) + fname = "./cache/ocr/" + str(uuid.uuid4()) + ".png" + qimage.save(fname) + imgfile = os.path.abspath(fname) _ = subproc_w( '"{}" "{}" - -l {} {}'.format( path, imgfile, self.langs[self.config["语言"]], self.config["附加参数"] @@ -32,6 +37,7 @@ class OCR(baseocr): encoding="utf8", run=True, ) + os.remove(imgfile) res = _.stdout err = _.stderr if len(err): diff --git a/LunaTranslator/LunaTranslator/ocrengines/txocr.py b/LunaTranslator/LunaTranslator/ocrengines/txocr.py index cf466d64..b73c8a29 100644 --- a/LunaTranslator/LunaTranslator/ocrengines/txocr.py +++ b/LunaTranslator/LunaTranslator/ocrengines/txocr.py @@ -19,12 +19,10 @@ class OCR(baseocr): "th": "tha", } - def ocr(self, imgfile): + def ocr(self, imagebinary): self.checkempty(["SecretId", "SecretKey"]) - with open(imgfile, "rb") as f: - data = f.read() - encodestr = str(base64.b64encode(data), "utf-8") + encodestr = str(base64.b64encode(imagebinary), "utf-8") req_para = { "LanguageType": self.srclang, "Action": "GeneralBasicOCR", diff --git a/LunaTranslator/LunaTranslator/ocrengines/txocrtrans.py b/LunaTranslator/LunaTranslator/ocrengines/txocrtrans.py index bbcd47f9..b3f699b8 100644 --- a/LunaTranslator/LunaTranslator/ocrengines/txocrtrans.py +++ b/LunaTranslator/LunaTranslator/ocrengines/txocrtrans.py @@ -8,12 +8,10 @@ class OCR(baseocr): # https://cloud.tencent.com/document/product/551/17232 return {"cht": "zh-TW"} - def ocr(self, imgfile): + def ocr(self, imagebinary): self.checkempty(["SecretId", "SecretKey"]) - with open(imgfile, "rb") as f: - data = f.read() - encodestr = str(base64.b64encode(data), "utf-8") + encodestr = str(base64.b64encode(imagebinary), "utf-8") req_para = { "Source": self.srclang, "Target": self.tgtlang, diff --git a/LunaTranslator/LunaTranslator/ocrengines/volcengine.py b/LunaTranslator/LunaTranslator/ocrengines/volcengine.py index 423dd914..f0f48dd5 100644 --- a/LunaTranslator/LunaTranslator/ocrengines/volcengine.py +++ b/LunaTranslator/LunaTranslator/ocrengines/volcengine.py @@ -1740,7 +1740,7 @@ from ocrengines.baseocrclass import baseocr class OCR(baseocr): - def ocr(self, imgfile): + def ocr(self, imagebinary): visual_service = VisualService() self.checkempty(["Access Key ID", "Secret Access Key"]) # call below method if you dont set ak and sk in $HOME/.volc/config @@ -1753,11 +1753,8 @@ class OCR(baseocr): # if you cannot find the needed one, please check other example files in the same dir # or contact us for further help form = dict() - import base64 - with open(imgfile, "rb") as ff: - f = ff.read() - b64 = base64.b64encode(f) + b64 = base64.b64encode(imagebinary) form["image_base64"] = b64 resp = visual_service.ocr_api("MultiLanguageOCR", form, self.proxy) try: diff --git a/LunaTranslator/LunaTranslator/ocrengines/windowsocr.py b/LunaTranslator/LunaTranslator/ocrengines/windowsocr.py index 96f8d846..d429461f 100644 --- a/LunaTranslator/LunaTranslator/ocrengines/windowsocr.py +++ b/LunaTranslator/LunaTranslator/ocrengines/windowsocr.py @@ -25,7 +25,7 @@ class OCR(baseocr): v = self.supportmap.pop("zh-Hant") self.supportmap["cht"] = v - def ocr(self, imgfile): + def ocr(self, imagebinary): if self.srclang not in self.supportmap: idx = static_data["language_list_translator_inner"].index(self.srclang) raise Exception( @@ -39,9 +39,7 @@ class OCR(baseocr): else: space = " " - ret = winrtutils.OCR_f( - os.path.abspath(imgfile), self.supportmap[self.srclang], space - ) + ret = winrtutils.OCR_f(imagebinary, self.supportmap[self.srclang], space) boxs = [_[1:] for _ in ret] texts = [_[0] for _ in ret] diff --git a/LunaTranslator/LunaTranslator/ocrengines/xunfei.py b/LunaTranslator/LunaTranslator/ocrengines/xunfei.py index 63e265ae..18464c85 100644 --- a/LunaTranslator/LunaTranslator/ocrengines/xunfei.py +++ b/LunaTranslator/LunaTranslator/ocrengines/xunfei.py @@ -11,7 +11,7 @@ import json class OCR(baseocr): - def ocr(self, imgfile): + def ocr(self, imagebinary): self.checkempty(["APPId", "APISecret", "APIKey"]) APPId = self.config["APPId"] @@ -50,9 +50,8 @@ class OCR(baseocr): u = Url(host, path, schema) return u - def get_body(self, file_path): - file = open(file_path, "rb") - buf = file.read() + def get_body(self, imagebinary): + buf = imagebinary body = { "header": {"app_id": self.appid, "status": 3}, "parameter": { @@ -120,7 +119,7 @@ class OCR(baseocr): } # print("request_url:", request_url) - body = printed_word_recognition.get_body(file_path=imgfile) + body = printed_word_recognition.get_body(file_path=imagebinary) response = self.session.post( request_url, data=json.dumps(body), headers=headers ) diff --git a/LunaTranslator/LunaTranslator/ocrengines/youdaocr.py b/LunaTranslator/LunaTranslator/ocrengines/youdaocr.py index da3b4c32..9884f06d 100644 --- a/LunaTranslator/LunaTranslator/ocrengines/youdaocr.py +++ b/LunaTranslator/LunaTranslator/ocrengines/youdaocr.py @@ -10,7 +10,7 @@ class OCR(baseocr): def langmap(self): return {"zh": "zh-CHS", "cht": "zh-CHT"} - def freetest(self, imgfile): + def freetest(self, imagebinary): headers = { "authority": "aidemo.youdao.com", "accept": "*/*", @@ -26,9 +26,7 @@ class OCR(baseocr): "sec-fetch-site": "same-site", "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/106.0.0.0 Safari/537.36", } - with open(imgfile, "rb") as ff: - f = ff.read() - b64 = base64.b64encode(f) + b64 = base64.b64encode(imagebinary) data = { "imgBase": "data:image/jpeg;base64," + str(b64, encoding="utf8"), "lang": "", @@ -50,7 +48,7 @@ class OCR(baseocr): except: raise Exception(response.text) - def ocrapi(self, imgfile): + def ocrapi(self, imagebinary): def truncate(q): if q is None: return None @@ -65,9 +63,7 @@ class OCR(baseocr): self.checkempty(["APP_KEY", "APP_SECRET"]) APP_KEY, APP_SECRET = self.config["APP_KEY"], self.config["APP_SECRET"] YOUDAO_URL = "https://openapi.youdao.com/ocrapi" - file = open(imgfile, "rb") - content = base64.b64encode(file.read()).decode("utf-8") - file.close() + content = base64.b64encode(imagebinary).decode("utf-8") data = {} data["img"] = content @@ -99,10 +95,10 @@ class OCR(baseocr): except: raise Exception(response.text) - def ocr(self, imgfile): + def ocr(self, imagebinary): interfacetype = self.config["接口"] if interfacetype == 0: - return self.freetest(imgfile) + return self.freetest(imagebinary) elif interfacetype == 1: - return self.ocrapi(imgfile) + return self.ocrapi(imagebinary) raise Exception("unknown") diff --git a/LunaTranslator/LunaTranslator/ocrengines/youdaocrtrans.py b/LunaTranslator/LunaTranslator/ocrengines/youdaocrtrans.py index 833e1b83..feba260b 100644 --- a/LunaTranslator/LunaTranslator/ocrengines/youdaocrtrans.py +++ b/LunaTranslator/LunaTranslator/ocrengines/youdaocrtrans.py @@ -9,7 +9,7 @@ class OCR(baseocr): def langmap(self): return {"zh": "zh-CHS", "cht": "zh-CHT"} - def freetest(self, imgfile): + def freetest(self, imagebinary): headers = { "authority": "aidemo.youdao.com", @@ -26,9 +26,7 @@ class OCR(baseocr): "sec-fetch-site": "same-site", "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/106.0.0.0 Safari/537.36", } - with open(imgfile, "rb") as ff: - f = ff.read() - b64 = base64.b64encode(f) + b64 = base64.b64encode(imagebinary) data = { "imgBase": "data:image/jpeg;base64," + str(b64, encoding="utf8"), "lang": "", @@ -46,13 +44,11 @@ class OCR(baseocr): except: raise Exception(response.text) - def ocrapi(self, imgfile): + def ocrapi(self, imagebinary): self.checkempty(["APP_KEY", "APP_SECRET"]) APP_KEY, APP_SECRET = self.config["APP_KEY"], self.config["APP_SECRET"] - - # 待翻译图片路径, 例windows路径:PATH = "C:\\youdao\\media.jpg" - PATH = imgfile + """ 添加鉴权相关参数 - @@ -121,7 +117,7 @@ class OCR(baseocr): type = "1" # 数据的base64编码 - q = readFileAsBase64(PATH) + q = readFileAsBase64(imagebinary) data = { "q": q, "from": lang_from, @@ -142,10 +138,8 @@ class OCR(baseocr): elif "post" == method: return self.session.post(url, params, header) - def readFileAsBase64(path): - f = open(path, "rb") - data = f.read() - return str(base64.b64encode(data), "utf-8") + def readFileAsBase64(imagebinary): + return str(base64.b64encode(imagebinary), "utf-8") self.countnum() @@ -161,10 +155,10 @@ class OCR(baseocr): except: raise Exception(response.text) - def ocr(self, imgfile): + def ocr(self, imagebinary): interfacetype = self.config["接口"] if interfacetype == 0: - return self.freetest(imgfile) + return self.freetest(imagebinary) elif interfacetype == 1: - return self.ocrapi(imgfile) + return self.ocrapi(imagebinary) raise Exception("unknown") diff --git a/LunaTranslator/LunaTranslator/textsource/ocrtext.py b/LunaTranslator/LunaTranslator/textsource/ocrtext.py index 6686a1bf..73a867fd 100644 --- a/LunaTranslator/LunaTranslator/textsource/ocrtext.py +++ b/LunaTranslator/LunaTranslator/textsource/ocrtext.py @@ -2,7 +2,7 @@ import time from myutils.config import globalconfig import winsharedutils from gui.rangeselect import rangeadjust -from myutils.ocrutil import imageCut, ocr_run, ocr_end +from myutils.ocrutil import imageCut, ocr_run, ocr_end,qimage2binary import time, gobject, os from PyQt5.QtWidgets import QApplication from PyQt5.QtGui import QImage @@ -131,7 +131,7 @@ class ocrtext(basetext): ok = False if ok == False: continue - text = self.ocrtest(imgr) + text = ocr_run(imgr) self.lastocrtime[i] = time.time() if self.savelasttext[i] is not None: @@ -154,7 +154,7 @@ class ocrtext(basetext): return img = imageCut(self.hwnd, rect[0][0], rect[0][1], rect[1][0], rect[1][1]) - text = self.ocrtest(img) + text = ocr_run(img) imgr1 = qimge2np(img) self.savelastimg[i] = imgr1 self.savelastrecimg[i] = imgr1 @@ -163,14 +163,6 @@ class ocrtext(basetext): __text.append(text) return "\n".join(__text) - def ocrtest(self, img): - os.makedirs("./cache/ocr", exist_ok=True) - fname = "./cache/ocr/{}.png".format(self.timestamp) - img.save(fname) - # print(fname) - text = ocr_run(fname) - # print(text) - return text def end(self): globalconfig["ocrregions"] = [_.getrect() for _ in self.range_ui] diff --git a/LunaTranslator/LunaTranslator/winrtutils.py b/LunaTranslator/LunaTranslator/winrtutils.py index 9543f3fb..4a38875b 100644 --- a/LunaTranslator/LunaTranslator/winrtutils.py +++ b/LunaTranslator/LunaTranslator/winrtutils.py @@ -5,6 +5,7 @@ from ctypes import ( c_wchar_p, pointer, CDLL, + c_size_t, Structure, c_void_p, ) @@ -30,7 +31,7 @@ if winrtutilsdll: ] _OCR_f = winrtutilsdll.OCR - _OCR_f.argtypes = c_wchar_p, c_wchar_p, c_wchar_p, POINTER(c_uint) + _OCR_f.argtypes = c_void_p, c_size_t, c_wchar_p, c_wchar_p, POINTER(c_uint) _OCR_f.restype = ocrres _freeocrres = winrtutilsdll.freeocrres _freeocrres.argtypes = ocrres, c_uint @@ -54,9 +55,9 @@ if winrtutilsdll: _freewstringlist(ret, num.value) return _allsupport - def OCR_f(imgpath, lang, space): + def OCR_f(data, lang, space): num = c_uint() - ret = _OCR_f(imgpath, lang, space, pointer(num)) + ret = _OCR_f(data, len(data), lang, space, pointer(num)) res = [] for i in range(num.value): res.append((ret.lines[i], ret.xs[i], ret.ys[i], ret.xs2[i], ret.ys2[i])) diff --git a/plugins/CMakeLists.txt b/plugins/CMakeLists.txt index 54b4134d..faff41fe 100644 --- a/plugins/CMakeLists.txt +++ b/plugins/CMakeLists.txt @@ -29,7 +29,7 @@ include(generate_product_version) set(VERSION_MAJOR 2) set(VERSION_MINOR 51) -set(VERSION_PATCH 1) +set(VERSION_PATCH 2) add_library(pch pch.cpp) target_precompile_headers(pch PUBLIC pch.h) diff --git a/plugins/winrtutils/define.h b/plugins/winrtutils/define.h index 33888eaf..55d97efe 100644 --- a/plugins/winrtutils/define.h +++ b/plugins/winrtutils/define.h @@ -13,7 +13,7 @@ extern "C" __declspec(dllexport) bool check_language_valid(wchar_t *); __declspec(dllexport) wchar_t **getlanguagelist(int *); - __declspec(dllexport) ocrres OCR(wchar_t *fname, wchar_t *lang, wchar_t *, int *); + __declspec(dllexport) ocrres OCR(void* ptr, size_t size, wchar_t *lang, wchar_t *, int *); __declspec(dllexport) void freewstringlist(wchar_t **, int); __declspec(dllexport) void freeocrres(ocrres, int); diff --git a/plugins/winrtutils/winrtocr.cpp b/plugins/winrtutils/winrtocr.cpp index 4e5d903a..88a0d6e9 100644 --- a/plugins/winrtutils/winrtocr.cpp +++ b/plugins/winrtutils/winrtocr.cpp @@ -57,13 +57,13 @@ wchar_t **getlanguagelist(int *num) *num = languages.Size(); return ret; } -ocrres OCR(wchar_t *fname, wchar_t *lang, wchar_t *space, int *num) +ocrres OCR(void *ptr, size_t size, wchar_t *lang, wchar_t *space, int *num) { - std::wstring imagePath = fname; - - StorageFile imageFile = StorageFile::GetFileFromPathAsync(imagePath).get(); - IRandomAccessStream imageStream = imageFile.OpenAsync(FileAccessMode::Read).get(); - BitmapDecoder decoder = BitmapDecoder::CreateAsync(imageStream).get(); + IBuffer buffer = CryptographicBuffer::CreateFromByteArray( + winrt::array_view(static_cast(ptr), size)); + InMemoryRandomAccessStream memoryStream; + memoryStream.WriteAsync(buffer).get(); + BitmapDecoder decoder = BitmapDecoder::CreateAsync(memoryStream).get(); SoftwareBitmap softwareBitmap = decoder.GetSoftwareBitmapAsync().get(); std::wstring l = lang;