This commit is contained in:
恍兮惚兮 2024-05-16 19:55:36 +08:00
parent 3ebd59ac0e
commit 955f313fe4
24 changed files with 188 additions and 156 deletions

View File

@ -55,19 +55,21 @@ class AnkiWindow(QWidget):
) )
@threader @threader
def asyncocr(self, fname): def asyncocr(self, img):
self.__ocrsettext.emit(ocr_run(fname)) self.__ocrsettext.emit(ocr_run(img))
def crop(self): def crop(self):
def ocroncefunction(rect): def ocroncefunction(rect):
img = imageCut(0, rect[0][0], rect[0][1], rect[1][0], rect[1][1]) img = imageCut(
0, rect[0][0], rect[0][1], rect[1][0], rect[1][1], False, True
)
fname = "./cache/ocr/cropforanki.png" fname = "./cache/ocr/cropforanki.png"
os.makedirs("./cache/ocr", exist_ok=True) os.makedirs("./cache/ocr", exist_ok=True)
img.save(fname) img.save(fname)
self.editpath.setText("") self.editpath.setText("")
self.editpath.setText(os.path.abspath(fname)) self.editpath.setText(os.path.abspath(fname))
if globalconfig["ankiconnect"]["ocrcroped"]: if globalconfig["ankiconnect"]["ocrcroped"]:
self.asyncocr(fname) self.asyncocr(img)
rangeselct_function(self, ocroncefunction, False, False) rangeselct_function(self, ocroncefunction, False, False)
@ -378,6 +380,8 @@ class AnkiWindow(QWidget):
return wid return wid
def wrappedpixmap(self, src): def wrappedpixmap(self, src):
if not src:
return
pix = QPixmap.fromImage(QImage(src)) pix = QPixmap.fromImage(QImage(src))
rate = self.devicePixelRatioF() rate = self.devicePixelRatioF()
pix.setDevicePixelRatio(rate) pix.setDevicePixelRatio(rate)

View File

@ -356,10 +356,7 @@ class QUnFrameWindow(resizableframeless):
@threader @threader
def ocroncefunction(rect): def ocroncefunction(rect):
img = imageCut(0, rect[0][0], rect[0][1], rect[1][0], rect[1][1]) img = imageCut(0, rect[0][0], rect[0][1], rect[1][0], rect[1][1])
fname = "./cache/ocr/once.png" text = ocr_run(img)
os.makedirs("./cache/ocr", exist_ok=True)
img.save(fname)
text = ocr_run(fname)
gobject.baseobject.textgetmethod(text, False) gobject.baseobject.textgetmethod(text, False)
rangeselct_function(self, ocroncefunction, False, False) rangeselct_function(self, ocroncefunction, False, False)

View File

@ -11,24 +11,31 @@ from traceback import print_exc
import gobject, winsharedutils import gobject, winsharedutils
def qimage2binary(qimage: QImage):
byte_array = QByteArray()
buffer = QBuffer(byte_array)
buffer.open(QBuffer.WriteOnly)
qimage.save(buffer, "BMP")
buffer.close()
image_data = byte_array.data()
return image_data
def binary2qimage(binary):
image = QImage()
image.loadFromData(binary)
return image
def togray(image): def togray(image):
gray_image = image.convertToFormat(QImage.Format_Grayscale8) gray_image = image.convertToFormat(QImage.Format_Grayscale8)
return gray_image return gray_image
def otsu_threshold_fast(image: QImage, thresh): def otsu_threshold_fast(image: QImage, thresh):
image_data = qimage2binary(image)
byte_array = QByteArray()
buffer = QBuffer(byte_array)
buffer.open(QBuffer.WriteOnly)
image.save(buffer, "BMP")
buffer.close()
image_data = byte_array.data()
solved = winsharedutils.otsu_binary(image_data, thresh) solved = winsharedutils.otsu_binary(image_data, thresh)
image = QImage() return binary2qimage(solved)
image.loadFromData(solved)
return image
def imagesolve(image): def imagesolve(image):
@ -43,7 +50,7 @@ def imagesolve(image):
return image2 return image2
def imageCut(hwnd, x1, y1, x2, y2, viscompare=True): def imageCut(hwnd, x1, y1, x2, y2, viscompare=True, rawimage=False) -> QImage:
screen = QApplication.primaryScreen() screen = QApplication.primaryScreen()
for _ in range(2): for _ in range(2):
@ -81,6 +88,8 @@ def imageCut(hwnd, x1, y1, x2, y2, viscompare=True):
) )
image = pix.toImage() image = pix.toImage()
if rawimage:
return image
image2 = imagesolve(image) image2 = imagesolve(image)
if viscompare: if viscompare:
gobject.baseobject.showocrimage.setimage.emit([image, image2]) gobject.baseobject.showocrimage.setimage.emit([image, image2])
@ -101,7 +110,7 @@ def ocr_end():
_ocrengine = None _ocrengine = None
def ocr_run(img): def ocr_run(qimage: QImage):
global _nowuseocr, _ocrengine global _nowuseocr, _ocrengine
use = None use = None
@ -123,7 +132,7 @@ def ocr_run(img):
aclass = importlib.import_module("ocrengines." + use).OCR aclass = importlib.import_module("ocrengines." + use).OCR
_ocrengine = aclass(use) _ocrengine = aclass(use)
_nowuseocr = use _nowuseocr = use
text = _ocrengine._private_ocr(img) text = _ocrengine._private_ocr(qimage2binary(qimage))
except Exception as e: except Exception as e:
if isinstance(e, ArgsEmptyExc): if isinstance(e, ArgsEmptyExc):
msg = str(e) msg = str(e)

View File

@ -36,7 +36,7 @@ class OCR(baseocr):
+ self.secretKey + self.secretKey
).json()["access_token"] ).json()["access_token"]
def ocr(self, imgfile): def ocr(self, imagebinary):
self.checkchange() self.checkchange()
if self.accstoken == "": if self.accstoken == "":
return "" return ""
@ -57,9 +57,8 @@ class OCR(baseocr):
} }
params = {"access_token": self.accstoken} # '', params = {"access_token": self.accstoken} # '',
with open(imgfile, "rb") as ff:
f = ff.read() b64 = base64.b64encode(imagebinary)
b64 = base64.b64encode(f)
data = { data = {
"image": b64, "image": b64,

View File

@ -9,7 +9,7 @@ class baseocr(commonbase):
def initocr(self): def initocr(self):
pass pass
def ocr(self, imgpath): def ocr(self, imagebinary):
raise Exception raise Exception
def end(self): def end(self):
@ -101,11 +101,11 @@ class baseocr(commonbase):
raise e raise e
self.needinit = False self.needinit = False
def _private_ocr(self, imgpath): def _private_ocr(self, imagebinary):
if self.needinit: if self.needinit:
self.level2init() self.level2init()
try: try:
text = self.ocr(imgpath) text = self.ocr(imagebinary)
except Exception as e: except Exception as e:
self.needinit = True self.needinit = True
raise e raise e

View File

@ -3,7 +3,7 @@ from ocrengines.baseocrclass import baseocr
class OCR(baseocr): class OCR(baseocr):
def ocr(self, imgfile): def ocr(self, imagebinary):
self.checkempty(["token"]) self.checkempty(["token"])
headers = { headers = {
@ -29,7 +29,7 @@ class OCR(baseocr):
'------WebKitFormBoundaryUjYOv45hug6CFh3t\r\nContent-Disposition: form-data; name="file"; filename="screenshot.png"\r\nContent-Type: application/octet-stream\r\n\r\n'.encode( '------WebKitFormBoundaryUjYOv45hug6CFh3t\r\nContent-Disposition: form-data; name="file"; filename="screenshot.png"\r\nContent-Type: application/octet-stream\r\n\r\n'.encode(
"latin-1" "latin-1"
) )
+ open(imgfile, "rb").read() + imagebinary
+ "\r\n------WebKitFormBoundaryUjYOv45hug6CFh3t--\r\n".encode("latin-1") + "\r\n------WebKitFormBoundaryUjYOv45hug6CFh3t--\r\n".encode("latin-1")
) )

View File

@ -24,11 +24,9 @@ class OCR(baseocr):
self.tokens[(app_id, app_secret)] = token self.tokens[(app_id, app_secret)] = token
return self.tokens[(app_id, app_secret)] return self.tokens[(app_id, app_secret)]
def ocr(self, imgfile): def ocr(self, imagebinary):
token = self.check() token = self.check()
with open(imgfile, "rb") as ff: b64 = base64.b64encode(imagebinary)
f = ff.read()
b64 = base64.b64encode(f)
res = self.session.post( res = self.session.post(
"https://open.feishu.cn/open-apis/optical_char_recognition/v1/image/basic_recognize", "https://open.feishu.cn/open-apis/optical_char_recognition/v1/image/basic_recognize",
headers={ headers={

View File

@ -4,7 +4,7 @@ import base64
class OCR(baseocr): class OCR(baseocr):
def ocr(self, imgfile): def ocr(self, imagebinary):
# https://github.com/dmotz/thing-translator/blob/d1fec3f38d24e973af49766669f9ee00bd9e98a8/src/effects/snap.js # https://github.com/dmotz/thing-translator/blob/d1fec3f38d24e973af49766669f9ee00bd9e98a8/src/effects/snap.js
# https://cloud.google.com/vision/docs/ocr?hl=zh-cn # https://cloud.google.com/vision/docs/ocr?hl=zh-cn
# https://cloud.google.com/vision/docs/reference/rest/v1/AnnotateImageResponse#EntityAnnotation # https://cloud.google.com/vision/docs/reference/rest/v1/AnnotateImageResponse#EntityAnnotation
@ -12,9 +12,7 @@ class OCR(baseocr):
ocr_url = ( ocr_url = (
"https://vision.googleapis.com/v1/images:annotate?key=" + self.config["key"] "https://vision.googleapis.com/v1/images:annotate?key=" + self.config["key"]
) )
with open(imgfile, "rb") as f: encodestr = str(base64.b64encode(imagebinary), "utf-8")
data = f.read()
encodestr = str(base64.b64encode(data), "utf-8")
data = { data = {
"requests": [ "requests": [
{ {

View File

@ -1,9 +1,35 @@
import os import os
from myutils.config import globalconfig, _TR, static_data from myutils.config import globalconfig, _TR, static_data
from ocrengines.baseocrclass import baseocr from ocrengines.baseocrclass import baseocr
from ctypes import CDLL, c_char_p, create_string_buffer, c_uint32, POINTER, c_int32 from ctypes import (
CDLL,
c_char_p,
create_string_buffer,
c_size_t,
c_void_p,
c_int32,
POINTER,
Structure,
pointer,
cast,
c_char_p,
)
import os import os
import gobject import gobject
from traceback import print_exc
class ocrpoints(Structure):
_fields_ = [
("x1", c_int32),
("y1", c_int32),
("x2", c_int32),
("y2", c_int32),
("x3", c_int32),
("y3", c_int32),
("x4", c_int32),
("y4", c_int32),
]
class ocrwrapper: class ocrwrapper:
@ -13,7 +39,7 @@ class ocrwrapper:
def _OcrInit(self, szDetModel, szRecModel, szKeyPath, szClsModel="", nThreads=4): def _OcrInit(self, szDetModel, szRecModel, szKeyPath, szClsModel="", nThreads=4):
_OcrInit = self.dll.OcrInit _OcrInit = self.dll.OcrInit
_OcrInit.restype = POINTER(c_uint32) _OcrInit.restype = c_void_p
self.pOcrObj = _OcrInit( self.pOcrObj = _OcrInit(
c_char_p(szDetModel.encode("utf8")), c_char_p(szDetModel.encode("utf8")),
c_char_p(szClsModel.encode("utf8")), c_char_p(szClsModel.encode("utf8")),
@ -22,22 +48,53 @@ class ocrwrapper:
nThreads, nThreads,
) )
def _OcrDetect(self, imgPath, imgName, angle): def _OcrDetect(self, data: bytes, angle):
_OcrDetect = self.dll.OcrDetect _OcrDetect = self.dll.OcrDetect
return _OcrDetect( _OcrDetect.argtypes = (
self.pOcrObj, c_void_p,
c_char_p(imgPath.encode("utf8")), c_void_p,
c_char_p(imgName.encode("utf8")), c_size_t,
c_int32(angle), c_int32,
POINTER(c_int32),
POINTER(POINTER(ocrpoints)),
POINTER(POINTER(c_char_p)),
) )
def _OcrGet(self): _OcrFreeptr = self.dll.OcrFreeptr
_OcrGetLen = self.dll.OcrGetLen _OcrFreeptr.argtypes = c_int32, c_void_p, c_void_p
_OcrGetResult = self.dll.OcrGetResult
length = _OcrGetLen(self.pOcrObj) num = c_int32()
buff = create_string_buffer(length) ps = POINTER(ocrpoints)()
_OcrGetResult(self.pOcrObj, buff, length) chars = POINTER(c_char_p)()
return buff.value res = _OcrDetect(
self.pOcrObj,
data,
len(data),
c_int32(angle),
pointer(num),
pointer(ps),
pointer(chars),
)
if not res:
return [], []
texts = []
pss = []
for i in range((num.value)):
texts.append(chars[i].decode("utf8"))
pss.append(
(
ps[i].x1,
ps[i].y1,
ps[i].x2,
ps[i].y2,
ps[i].x3,
ps[i].y3,
ps[i].x4,
ps[i].y4,
)
)
_OcrFreeptr(num, ps, chars)
return pss, texts
def _OcrDestroy(self): def _OcrDestroy(self):
_OcrDestroy = self.dll.OcrDestroy _OcrDestroy = self.dll.OcrDestroy
@ -46,15 +103,12 @@ class ocrwrapper:
def init(self, det, rec, key): def init(self, det, rec, key):
self._OcrInit(det, rec, key) self._OcrInit(det, rec, key)
def ocr(self, path, name, angle=0): def ocr(self, data, angle=0):
try: try:
if self._OcrDetect(path, name, angle): return self._OcrDetect(data, angle)
return self._OcrGet().decode("utf8")
else:
return ""
except: except:
print_exc()
return "" return [], []
def trydestroy(self): def trydestroy(self):
try: try:
@ -93,20 +147,12 @@ class OCR(baseocr):
self._ocr.init(path + "/det.onnx", path + "/rec.onnx", path + "/dict.txt") self._ocr.init(path + "/det.onnx", path + "/rec.onnx", path + "/dict.txt")
self._savelang = self.srclang self._savelang = self.srclang
def ocr(self, imgfile): def ocr(self, imagebinary):
self.checkchange() self.checkchange()
s = self._ocr.ocr( pss, texts = self._ocr.ocr(
os.path.dirname(imgfile) + "/", imagebinary,
os.path.basename(imgfile),
globalconfig["verticalocr"], globalconfig["verticalocr"],
) )
ls = s.split("\n") return self.common_solve_text_orientation(pss, texts)
box = []
text = []
for i in range(len(ls) // 2):
box.append([int(_) for _ in ls[i * 2].split(",")])
text.append(ls[i * 2 + 1])
return self.common_solve_text_orientation(box, text)

View File

@ -1,20 +1,24 @@
import requests import requests
from ocrengines.baseocrclass import baseocr from ocrengines.baseocrclass import baseocr
import os import os, uuid
from myutils.ocrutil import binary2qimage
class OCR(baseocr): class OCR(baseocr):
def ocr(self, img_path): def ocr(self, imagebinary):
qimage = binary2qimage(imagebinary)
os.makedirs("./cache/ocr", exist_ok=True)
fname = "./cache/ocr/" + str(uuid.uuid4()) + ".png"
qimage.save(fname)
self.checkempty(["Port"]) self.checkempty(["Port"])
self.port = self.config["Port"] self.port = self.config["Port"]
absolute_img_path = os.path.abspath(img_path) absolute_img_path = os.path.abspath(fname)
params = {"image_path": absolute_img_path} params = {"image_path": absolute_img_path}
response = requests.get(f"http://127.0.0.1:{self.port}/image", params=params) response = requests.get(f"http://127.0.0.1:{self.port}/image", params=params)
os.remove(absolute_img_path)
try: try:
return response.json()["text"] return response.json()["text"]
except Exception as e: except Exception as e:

View File

@ -15,7 +15,7 @@ class OCR(baseocr):
"cht": "cht", "cht": "cht",
} }
def ocr(self, imgfile): def ocr(self, imagebinary):
self.checkempty(["apikey"]) self.checkempty(["apikey"])
apikey = self.config["apikey"] apikey = self.config["apikey"]
if self.config["interface"] == 1: if self.config["interface"] == 1:
@ -39,9 +39,7 @@ class OCR(baseocr):
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/105.0.0.0 Safari/537.36 Edg/105.0.1343.53", "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/105.0.0.0 Safari/537.36 Edg/105.0.1343.53",
} }
with open(imgfile, "rb") as ff: b64 = base64.b64encode(imagebinary)
f = ff.read()
b64 = base64.b64encode(f)
data = { data = {
"language": self.srclang, "language": self.srclang,
"base64Image": "data:image/jpeg;base64," + str(b64, encoding="utf8"), "base64Image": "data:image/jpeg;base64," + str(b64, encoding="utf8"),

View File

@ -1,6 +1,6 @@
import os import os, uuid
from myutils.config import _TR, ocrsetting from myutils.config import _TR, ocrsetting
from myutils.ocrutil import binary2qimage
from ocrengines.baseocrclass import baseocr from ocrengines.baseocrclass import baseocr
from myutils.subproc import subproc_w from myutils.subproc import subproc_w
@ -19,11 +19,16 @@ class OCR(baseocr):
def initocr(self): def initocr(self):
self.langs = list_langs() self.langs = list_langs()
def ocr(self, imgfile): def ocr(self, imagebinary):
self.checkempty(["路径"]) self.checkempty(["路径"])
path = self.config["路径"] path = self.config["路径"]
if os.path.exists(path) == False: if os.path.exists(path) == False:
raise Exception(_TR("路径不存在")) raise Exception(_TR("路径不存在"))
qimage = binary2qimage(imagebinary)
os.makedirs("./cache/ocr", exist_ok=True)
fname = "./cache/ocr/" + str(uuid.uuid4()) + ".png"
qimage.save(fname)
imgfile = os.path.abspath(fname)
_ = subproc_w( _ = subproc_w(
'"{}" "{}" - -l {} {}'.format( '"{}" "{}" - -l {} {}'.format(
path, imgfile, self.langs[self.config["语言"]], self.config["附加参数"] path, imgfile, self.langs[self.config["语言"]], self.config["附加参数"]
@ -32,6 +37,7 @@ class OCR(baseocr):
encoding="utf8", encoding="utf8",
run=True, run=True,
) )
os.remove(imgfile)
res = _.stdout res = _.stdout
err = _.stderr err = _.stderr
if len(err): if len(err):

View File

@ -19,12 +19,10 @@ class OCR(baseocr):
"th": "tha", "th": "tha",
} }
def ocr(self, imgfile): def ocr(self, imagebinary):
self.checkempty(["SecretId", "SecretKey"]) self.checkempty(["SecretId", "SecretKey"])
with open(imgfile, "rb") as f: encodestr = str(base64.b64encode(imagebinary), "utf-8")
data = f.read()
encodestr = str(base64.b64encode(data), "utf-8")
req_para = { req_para = {
"LanguageType": self.srclang, "LanguageType": self.srclang,
"Action": "GeneralBasicOCR", "Action": "GeneralBasicOCR",

View File

@ -8,12 +8,10 @@ class OCR(baseocr):
# https://cloud.tencent.com/document/product/551/17232 # https://cloud.tencent.com/document/product/551/17232
return {"cht": "zh-TW"} return {"cht": "zh-TW"}
def ocr(self, imgfile): def ocr(self, imagebinary):
self.checkempty(["SecretId", "SecretKey"]) self.checkempty(["SecretId", "SecretKey"])
with open(imgfile, "rb") as f: encodestr = str(base64.b64encode(imagebinary), "utf-8")
data = f.read()
encodestr = str(base64.b64encode(data), "utf-8")
req_para = { req_para = {
"Source": self.srclang, "Source": self.srclang,
"Target": self.tgtlang, "Target": self.tgtlang,

View File

@ -1740,7 +1740,7 @@ from ocrengines.baseocrclass import baseocr
class OCR(baseocr): class OCR(baseocr):
def ocr(self, imgfile): def ocr(self, imagebinary):
visual_service = VisualService() visual_service = VisualService()
self.checkempty(["Access Key ID", "Secret Access Key"]) self.checkempty(["Access Key ID", "Secret Access Key"])
# call below method if you dont set ak and sk in $HOME/.volc/config # call below method if you dont set ak and sk in $HOME/.volc/config
@ -1753,11 +1753,8 @@ class OCR(baseocr):
# if you cannot find the needed one, please check other example files in the same dir # if you cannot find the needed one, please check other example files in the same dir
# or contact us for further help # or contact us for further help
form = dict() form = dict()
import base64
with open(imgfile, "rb") as ff: b64 = base64.b64encode(imagebinary)
f = ff.read()
b64 = base64.b64encode(f)
form["image_base64"] = b64 form["image_base64"] = b64
resp = visual_service.ocr_api("MultiLanguageOCR", form, self.proxy) resp = visual_service.ocr_api("MultiLanguageOCR", form, self.proxy)
try: try:

View File

@ -25,7 +25,7 @@ class OCR(baseocr):
v = self.supportmap.pop("zh-Hant") v = self.supportmap.pop("zh-Hant")
self.supportmap["cht"] = v self.supportmap["cht"] = v
def ocr(self, imgfile): def ocr(self, imagebinary):
if self.srclang not in self.supportmap: if self.srclang not in self.supportmap:
idx = static_data["language_list_translator_inner"].index(self.srclang) idx = static_data["language_list_translator_inner"].index(self.srclang)
raise Exception( raise Exception(
@ -39,9 +39,7 @@ class OCR(baseocr):
else: else:
space = " " space = " "
ret = winrtutils.OCR_f( ret = winrtutils.OCR_f(imagebinary, self.supportmap[self.srclang], space)
os.path.abspath(imgfile), self.supportmap[self.srclang], space
)
boxs = [_[1:] for _ in ret] boxs = [_[1:] for _ in ret]
texts = [_[0] for _ in ret] texts = [_[0] for _ in ret]

View File

@ -11,7 +11,7 @@ import json
class OCR(baseocr): class OCR(baseocr):
def ocr(self, imgfile): def ocr(self, imagebinary):
self.checkempty(["APPId", "APISecret", "APIKey"]) self.checkempty(["APPId", "APISecret", "APIKey"])
APPId = self.config["APPId"] APPId = self.config["APPId"]
@ -50,9 +50,8 @@ class OCR(baseocr):
u = Url(host, path, schema) u = Url(host, path, schema)
return u return u
def get_body(self, file_path): def get_body(self, imagebinary):
file = open(file_path, "rb") buf = imagebinary
buf = file.read()
body = { body = {
"header": {"app_id": self.appid, "status": 3}, "header": {"app_id": self.appid, "status": 3},
"parameter": { "parameter": {
@ -120,7 +119,7 @@ class OCR(baseocr):
} }
# print("request_url:", request_url) # print("request_url:", request_url)
body = printed_word_recognition.get_body(file_path=imgfile) body = printed_word_recognition.get_body(file_path=imagebinary)
response = self.session.post( response = self.session.post(
request_url, data=json.dumps(body), headers=headers request_url, data=json.dumps(body), headers=headers
) )

View File

@ -10,7 +10,7 @@ class OCR(baseocr):
def langmap(self): def langmap(self):
return {"zh": "zh-CHS", "cht": "zh-CHT"} return {"zh": "zh-CHS", "cht": "zh-CHT"}
def freetest(self, imgfile): def freetest(self, imagebinary):
headers = { headers = {
"authority": "aidemo.youdao.com", "authority": "aidemo.youdao.com",
"accept": "*/*", "accept": "*/*",
@ -26,9 +26,7 @@ class OCR(baseocr):
"sec-fetch-site": "same-site", "sec-fetch-site": "same-site",
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/106.0.0.0 Safari/537.36", "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/106.0.0.0 Safari/537.36",
} }
with open(imgfile, "rb") as ff: b64 = base64.b64encode(imagebinary)
f = ff.read()
b64 = base64.b64encode(f)
data = { data = {
"imgBase": "data:image/jpeg;base64," + str(b64, encoding="utf8"), "imgBase": "data:image/jpeg;base64," + str(b64, encoding="utf8"),
"lang": "", "lang": "",
@ -50,7 +48,7 @@ class OCR(baseocr):
except: except:
raise Exception(response.text) raise Exception(response.text)
def ocrapi(self, imgfile): def ocrapi(self, imagebinary):
def truncate(q): def truncate(q):
if q is None: if q is None:
return None return None
@ -65,9 +63,7 @@ class OCR(baseocr):
self.checkempty(["APP_KEY", "APP_SECRET"]) self.checkempty(["APP_KEY", "APP_SECRET"])
APP_KEY, APP_SECRET = self.config["APP_KEY"], self.config["APP_SECRET"] APP_KEY, APP_SECRET = self.config["APP_KEY"], self.config["APP_SECRET"]
YOUDAO_URL = "https://openapi.youdao.com/ocrapi" YOUDAO_URL = "https://openapi.youdao.com/ocrapi"
file = open(imgfile, "rb") content = base64.b64encode(imagebinary).decode("utf-8")
content = base64.b64encode(file.read()).decode("utf-8")
file.close()
data = {} data = {}
data["img"] = content data["img"] = content
@ -99,10 +95,10 @@ class OCR(baseocr):
except: except:
raise Exception(response.text) raise Exception(response.text)
def ocr(self, imgfile): def ocr(self, imagebinary):
interfacetype = self.config["接口"] interfacetype = self.config["接口"]
if interfacetype == 0: if interfacetype == 0:
return self.freetest(imgfile) return self.freetest(imagebinary)
elif interfacetype == 1: elif interfacetype == 1:
return self.ocrapi(imgfile) return self.ocrapi(imagebinary)
raise Exception("unknown") raise Exception("unknown")

View File

@ -9,7 +9,7 @@ class OCR(baseocr):
def langmap(self): def langmap(self):
return {"zh": "zh-CHS", "cht": "zh-CHT"} return {"zh": "zh-CHS", "cht": "zh-CHT"}
def freetest(self, imgfile): def freetest(self, imagebinary):
headers = { headers = {
"authority": "aidemo.youdao.com", "authority": "aidemo.youdao.com",
@ -26,9 +26,7 @@ class OCR(baseocr):
"sec-fetch-site": "same-site", "sec-fetch-site": "same-site",
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/106.0.0.0 Safari/537.36", "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/106.0.0.0 Safari/537.36",
} }
with open(imgfile, "rb") as ff: b64 = base64.b64encode(imagebinary)
f = ff.read()
b64 = base64.b64encode(f)
data = { data = {
"imgBase": "data:image/jpeg;base64," + str(b64, encoding="utf8"), "imgBase": "data:image/jpeg;base64," + str(b64, encoding="utf8"),
"lang": "", "lang": "",
@ -46,13 +44,11 @@ class OCR(baseocr):
except: except:
raise Exception(response.text) raise Exception(response.text)
def ocrapi(self, imgfile): def ocrapi(self, imagebinary):
self.checkempty(["APP_KEY", "APP_SECRET"]) self.checkempty(["APP_KEY", "APP_SECRET"])
APP_KEY, APP_SECRET = self.config["APP_KEY"], self.config["APP_SECRET"] APP_KEY, APP_SECRET = self.config["APP_KEY"], self.config["APP_SECRET"]
# 待翻译图片路径, 例windows路径PATH = "C:\\youdao\\media.jpg"
PATH = imgfile
""" """
添加鉴权相关参数 - 添加鉴权相关参数 -
@ -121,7 +117,7 @@ class OCR(baseocr):
type = "1" type = "1"
# 数据的base64编码 # 数据的base64编码
q = readFileAsBase64(PATH) q = readFileAsBase64(imagebinary)
data = { data = {
"q": q, "q": q,
"from": lang_from, "from": lang_from,
@ -142,10 +138,8 @@ class OCR(baseocr):
elif "post" == method: elif "post" == method:
return self.session.post(url, params, header) return self.session.post(url, params, header)
def readFileAsBase64(path): def readFileAsBase64(imagebinary):
f = open(path, "rb") return str(base64.b64encode(imagebinary), "utf-8")
data = f.read()
return str(base64.b64encode(data), "utf-8")
self.countnum() self.countnum()
@ -161,10 +155,10 @@ class OCR(baseocr):
except: except:
raise Exception(response.text) raise Exception(response.text)
def ocr(self, imgfile): def ocr(self, imagebinary):
interfacetype = self.config["接口"] interfacetype = self.config["接口"]
if interfacetype == 0: if interfacetype == 0:
return self.freetest(imgfile) return self.freetest(imagebinary)
elif interfacetype == 1: elif interfacetype == 1:
return self.ocrapi(imgfile) return self.ocrapi(imagebinary)
raise Exception("unknown") raise Exception("unknown")

View File

@ -2,7 +2,7 @@ import time
from myutils.config import globalconfig from myutils.config import globalconfig
import winsharedutils import winsharedutils
from gui.rangeselect import rangeadjust from gui.rangeselect import rangeadjust
from myutils.ocrutil import imageCut, ocr_run, ocr_end from myutils.ocrutil import imageCut, ocr_run, ocr_end,qimage2binary
import time, gobject, os import time, gobject, os
from PyQt5.QtWidgets import QApplication from PyQt5.QtWidgets import QApplication
from PyQt5.QtGui import QImage from PyQt5.QtGui import QImage
@ -131,7 +131,7 @@ class ocrtext(basetext):
ok = False ok = False
if ok == False: if ok == False:
continue continue
text = self.ocrtest(imgr) text = ocr_run(imgr)
self.lastocrtime[i] = time.time() self.lastocrtime[i] = time.time()
if self.savelasttext[i] is not None: if self.savelasttext[i] is not None:
@ -154,7 +154,7 @@ class ocrtext(basetext):
return return
img = imageCut(self.hwnd, rect[0][0], rect[0][1], rect[1][0], rect[1][1]) img = imageCut(self.hwnd, rect[0][0], rect[0][1], rect[1][0], rect[1][1])
text = self.ocrtest(img) text = ocr_run(img)
imgr1 = qimge2np(img) imgr1 = qimge2np(img)
self.savelastimg[i] = imgr1 self.savelastimg[i] = imgr1
self.savelastrecimg[i] = imgr1 self.savelastrecimg[i] = imgr1
@ -163,14 +163,6 @@ class ocrtext(basetext):
__text.append(text) __text.append(text)
return "\n".join(__text) return "\n".join(__text)
def ocrtest(self, img):
os.makedirs("./cache/ocr", exist_ok=True)
fname = "./cache/ocr/{}.png".format(self.timestamp)
img.save(fname)
# print(fname)
text = ocr_run(fname)
# print(text)
return text
def end(self): def end(self):
globalconfig["ocrregions"] = [_.getrect() for _ in self.range_ui] globalconfig["ocrregions"] = [_.getrect() for _ in self.range_ui]

View File

@ -5,6 +5,7 @@ from ctypes import (
c_wchar_p, c_wchar_p,
pointer, pointer,
CDLL, CDLL,
c_size_t,
Structure, Structure,
c_void_p, c_void_p,
) )
@ -30,7 +31,7 @@ if winrtutilsdll:
] ]
_OCR_f = winrtutilsdll.OCR _OCR_f = winrtutilsdll.OCR
_OCR_f.argtypes = c_wchar_p, c_wchar_p, c_wchar_p, POINTER(c_uint) _OCR_f.argtypes = c_void_p, c_size_t, c_wchar_p, c_wchar_p, POINTER(c_uint)
_OCR_f.restype = ocrres _OCR_f.restype = ocrres
_freeocrres = winrtutilsdll.freeocrres _freeocrres = winrtutilsdll.freeocrres
_freeocrres.argtypes = ocrres, c_uint _freeocrres.argtypes = ocrres, c_uint
@ -54,9 +55,9 @@ if winrtutilsdll:
_freewstringlist(ret, num.value) _freewstringlist(ret, num.value)
return _allsupport return _allsupport
def OCR_f(imgpath, lang, space): def OCR_f(data, lang, space):
num = c_uint() num = c_uint()
ret = _OCR_f(imgpath, lang, space, pointer(num)) ret = _OCR_f(data, len(data), lang, space, pointer(num))
res = [] res = []
for i in range(num.value): for i in range(num.value):
res.append((ret.lines[i], ret.xs[i], ret.ys[i], ret.xs2[i], ret.ys2[i])) res.append((ret.lines[i], ret.xs[i], ret.ys[i], ret.xs2[i], ret.ys2[i]))

View File

@ -29,7 +29,7 @@ include(generate_product_version)
set(VERSION_MAJOR 2) set(VERSION_MAJOR 2)
set(VERSION_MINOR 51) set(VERSION_MINOR 51)
set(VERSION_PATCH 1) set(VERSION_PATCH 2)
add_library(pch pch.cpp) add_library(pch pch.cpp)
target_precompile_headers(pch PUBLIC pch.h) target_precompile_headers(pch PUBLIC pch.h)

View File

@ -13,7 +13,7 @@ extern "C"
__declspec(dllexport) bool check_language_valid(wchar_t *); __declspec(dllexport) bool check_language_valid(wchar_t *);
__declspec(dllexport) wchar_t **getlanguagelist(int *); __declspec(dllexport) wchar_t **getlanguagelist(int *);
__declspec(dllexport) ocrres OCR(wchar_t *fname, wchar_t *lang, wchar_t *, int *); __declspec(dllexport) ocrres OCR(void* ptr, size_t size, wchar_t *lang, wchar_t *, int *);
__declspec(dllexport) void freewstringlist(wchar_t **, int); __declspec(dllexport) void freewstringlist(wchar_t **, int);
__declspec(dllexport) void freeocrres(ocrres, int); __declspec(dllexport) void freeocrres(ocrres, int);

View File

@ -57,13 +57,13 @@ wchar_t **getlanguagelist(int *num)
*num = languages.Size(); *num = languages.Size();
return ret; return ret;
} }
ocrres OCR(wchar_t *fname, wchar_t *lang, wchar_t *space, int *num) ocrres OCR(void *ptr, size_t size, wchar_t *lang, wchar_t *space, int *num)
{ {
std::wstring imagePath = fname; IBuffer buffer = CryptographicBuffer::CreateFromByteArray(
winrt::array_view<uint8_t>(static_cast<uint8_t *>(ptr), size));
StorageFile imageFile = StorageFile::GetFileFromPathAsync(imagePath).get(); InMemoryRandomAccessStream memoryStream;
IRandomAccessStream imageStream = imageFile.OpenAsync(FileAccessMode::Read).get(); memoryStream.WriteAsync(buffer).get();
BitmapDecoder decoder = BitmapDecoder::CreateAsync(imageStream).get(); BitmapDecoder decoder = BitmapDecoder::CreateAsync(memoryStream).get();
SoftwareBitmap softwareBitmap = decoder.GetSoftwareBitmapAsync().get(); SoftwareBitmap softwareBitmap = decoder.GetSoftwareBitmapAsync().get();
std::wstring l = lang; std::wstring l = lang;