mirror of
https://github.com/HIllya51/LunaTranslator.git
synced 2025-01-15 08:53:53 +08:00
268 lines
8.2 KiB
Python
268 lines
8.2 KiB
Python
import base64
|
|
from myutils.config import globalconfig
|
|
from ocrengines.baseocrclass import baseocr
|
|
import random, zhconv
|
|
from hashlib import md5
|
|
|
|
|
|
class OCR(baseocr):
|
|
|
|
def ocr_ts1(self, imagebinary):
|
|
|
|
accstoken = self.getaccess()
|
|
|
|
params = {
|
|
"access_token": accstoken,
|
|
"from": self.srclangx,
|
|
"to": self.tgtlangx,
|
|
"v": "3",
|
|
"paste": "1",
|
|
} # '',
|
|
image = {"image": ("shit.png", imagebinary, "multipart/form-data")}
|
|
response = self.proxysession.post(
|
|
"https://aip.baidubce.com/file/2.0/mt/pictrans/v1",
|
|
params=params,
|
|
files=image,
|
|
)
|
|
|
|
try:
|
|
js = response.json()
|
|
text = [
|
|
(
|
|
zhconv.convert(_["dst"], "zh-tw")
|
|
if ("cht" == self.tgtlang_1)
|
|
else _["dst"]
|
|
)
|
|
for _ in js["data"]["content"]
|
|
]
|
|
box = [
|
|
(
|
|
l["points"][0]["x"],
|
|
l["points"][0]["y"],
|
|
l["points"][1]["x"],
|
|
l["points"][1]["y"],
|
|
l["points"][2]["x"],
|
|
l["points"][2]["y"],
|
|
l["points"][3]["x"],
|
|
l["points"][3]["y"],
|
|
)
|
|
for l in js["data"]["content"]
|
|
]
|
|
return {"box": box, "text": text, "isocrtranslate": True}
|
|
except:
|
|
raise Exception(response)
|
|
|
|
def ocr_ts2(self, imagebinary):
|
|
self.checkempty(["app_id", "app_key"])
|
|
endpoint = "http://api.fanyi.baidu.com"
|
|
path = "/api/trans/sdk/picture"
|
|
url = endpoint + path
|
|
|
|
from_lang = self.srclangx
|
|
to_lang = self.tgtlangx
|
|
|
|
# Set your own appid/appkey.
|
|
app_id = self.config["app_id"]
|
|
app_key = self.config["app_key"]
|
|
|
|
# cuid & mac
|
|
cuid = "APICUID"
|
|
mac = "mac"
|
|
|
|
# Generate salt and sign
|
|
def get_md5(string, encoding="utf-8"):
|
|
return md5(string.encode(encoding)).hexdigest()
|
|
|
|
salt = random.randint(32768, 65536)
|
|
sign = get_md5(
|
|
app_id + md5(imagebinary).hexdigest() + str(salt) + cuid + mac + app_key
|
|
)
|
|
|
|
# Build request
|
|
payload = {
|
|
"from": from_lang,
|
|
"to": to_lang,
|
|
"appid": app_id,
|
|
"salt": salt,
|
|
"sign": sign,
|
|
"cuid": cuid,
|
|
"mac": mac,
|
|
}
|
|
|
|
files = {"image": ("image.png", imagebinary, "multipart/form-data")}
|
|
response = self.proxysession.post(url, params=payload, files=files)
|
|
|
|
try:
|
|
js = response.json()
|
|
text = [
|
|
(
|
|
zhconv.convert(_["dst"], "zh-tw")
|
|
if ("cht" == self.tgtlang_1)
|
|
else _["dst"]
|
|
)
|
|
for _ in js["data"]["content"]
|
|
]
|
|
box = [
|
|
(
|
|
l["points"][0]["x"],
|
|
l["points"][0]["y"],
|
|
l["points"][1]["x"],
|
|
l["points"][1]["y"],
|
|
l["points"][2]["x"],
|
|
l["points"][2]["y"],
|
|
l["points"][3]["x"],
|
|
l["points"][3]["y"],
|
|
)
|
|
for l in js["data"]["content"]
|
|
]
|
|
return {"box": box, "text": text, "isocrtranslate": True}
|
|
except:
|
|
raise Exception(response)
|
|
|
|
@property
|
|
def srclangx(self):
|
|
return {
|
|
"cht": "zh",
|
|
"es": "spa",
|
|
"ko": "kor",
|
|
"fr": "fra",
|
|
"ja": "jp",
|
|
"vi": "vie",
|
|
"uk": "ukr",
|
|
"ar": "ara",
|
|
"sv": "swe",
|
|
}.get(self.srclang_1, self.srclang_1)
|
|
|
|
@property
|
|
def tgtlangx(self):
|
|
return {
|
|
"cht": "zh",
|
|
"es": "spa",
|
|
"ko": "kor",
|
|
"fr": "fra",
|
|
"ja": "jp",
|
|
"vi": "vie",
|
|
"uk": "ukr",
|
|
"ar": "ara",
|
|
}.get(self.tgtlang_1, self.tgtlang_1)
|
|
|
|
def langmap(self):
|
|
return {
|
|
"auto": "auto_detect",
|
|
"cht": "CHN_ENG",
|
|
"zh": "CHN_ENG",
|
|
"en": "ENG",
|
|
"ja": "JAP",
|
|
"en": "ENG",
|
|
"ko": "KOR",
|
|
"fr": "FRE",
|
|
"es": "SPA",
|
|
"pt": "POR",
|
|
"de": "GER",
|
|
"it": "ITA",
|
|
"ru": "RUS",
|
|
"nl": "DUT",
|
|
"sv": "SWE",
|
|
"pl": "POL",
|
|
"tr": "TUR",
|
|
"th": "THA",
|
|
"vi": "VIE",
|
|
"ar": "ARA",
|
|
}
|
|
|
|
def initocr(self):
|
|
self.access = {}
|
|
if self.config["接口"] != 5:
|
|
self.getaccess()
|
|
|
|
def get_access_token(self, API_KEY, SECRET_KEY):
|
|
url = "https://aip.baidubce.com/oauth/2.0/token"
|
|
params = {
|
|
"grant_type": "client_credentials",
|
|
"client_id": API_KEY,
|
|
"client_secret": SECRET_KEY,
|
|
}
|
|
resp = self.proxysession.post(url, params=params)
|
|
|
|
try:
|
|
return resp.json()["access_token"]
|
|
except:
|
|
raise Exception(resp)
|
|
|
|
def getaccess(self):
|
|
self.checkempty(["API Key", "Secret Key"])
|
|
SECRET_KEY, API_KEY = (
|
|
self.config["Secret Key"],
|
|
self.config["API Key"],
|
|
)
|
|
if not self.access.get((API_KEY, SECRET_KEY)):
|
|
acss = self.get_access_token(API_KEY, SECRET_KEY)
|
|
self.access[(API_KEY, SECRET_KEY)] = acss
|
|
return self.access[(API_KEY, SECRET_KEY)]
|
|
|
|
def ocr(self, imagebinary):
|
|
if self.config["接口"] in [0, 1, 2, 3]:
|
|
return self.ocr_x(imagebinary)
|
|
elif self.config["接口"] == 4:
|
|
return self.ocr_ts1(imagebinary)
|
|
elif self.config["接口"] == 5:
|
|
return self.ocr_ts2(imagebinary)
|
|
raise Exception("unknown")
|
|
|
|
def ocr_x(self, imagebinary):
|
|
accstoken = self.getaccess()
|
|
headers = {
|
|
"authority": "aip.baidubce.com",
|
|
"accept": "*/*",
|
|
"accept-language": "zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6",
|
|
"cache-control": "no-cache",
|
|
"origin": "chrome-extension://hmpjibmn1ncjokocepchnea",
|
|
"pragma": "no-cache",
|
|
"sec-ch-ua": '"Microsoft Edge";v="105", "Not)A;Brand";v="8", "Chromium";v="105"',
|
|
"sec-ch-ua-mobile": "?0",
|
|
"sec-ch-ua-platform": '"Windows"',
|
|
"sec-fetch-dest": "empty",
|
|
"sec-fetch-mode": "cors",
|
|
"sec-fetch-site": "none",
|
|
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/105.0.0.0 Safari/537.36 Edg/105.0.1343.53",
|
|
}
|
|
|
|
params = {"access_token": accstoken} # '',
|
|
|
|
b64 = base64.b64encode(imagebinary)
|
|
|
|
data = {
|
|
"image": b64,
|
|
"detect_direction": int(globalconfig["verticalocr"]) != 0,
|
|
"language_type": self.srclang,
|
|
}
|
|
interfacetype = self.config["接口"]
|
|
|
|
url = [
|
|
"https://aip.baidubce.com/rest/2.0/ocr/v1/general_basic",
|
|
"https://aip.baidubce.com/rest/2.0/ocr/v1/general",
|
|
"https://aip.baidubce.com/rest/2.0/ocr/v1/accurate_basic",
|
|
"https://aip.baidubce.com/rest/2.0/ocr/v1/accurate",
|
|
][interfacetype]
|
|
response = self.proxysession.post(
|
|
url, params=params, headers=headers, data=data
|
|
)
|
|
try:
|
|
|
|
if interfacetype in [0, 2]:
|
|
return {"text": [x["words"] for x in response.json()["words_result"]]}
|
|
else:
|
|
texts = [x["words"] for x in response.json()["words_result"]]
|
|
boxs = [
|
|
(
|
|
x["location"]["left"],
|
|
x["location"]["top"],
|
|
x["location"]["left"] + x["location"]["width"],
|
|
x["location"]["top"] + x["location"]["height"],
|
|
)
|
|
for x in response.json()["words_result"]
|
|
]
|
|
return {"box": boxs, "text": texts}
|
|
except:
|
|
raise Exception(response)
|