mirror of
https://github.com/HIllya51/LunaTranslator.git
synced 2025-01-15 08:53:53 +08:00
119 lines
3.6 KiB
Python
119 lines
3.6 KiB
Python
import os, uuid, gobject, winreg
|
|
from myutils.config import _TR, globalconfig
|
|
from ocrengines.baseocrclass import baseocr
|
|
from myutils.subproc import subproc_w
|
|
from language import Languages
|
|
|
|
|
|
class OCR(baseocr):
|
|
|
|
def findts__(self):
|
|
k = winreg.OpenKeyEx(
|
|
winreg.HKEY_LOCAL_MACHINE,
|
|
r"SOFTWARE\Tesseract-OCR",
|
|
0,
|
|
winreg.KEY_QUERY_VALUE,
|
|
)
|
|
base = winreg.QueryValueEx(k, "Path")[0]
|
|
winreg.CloseKey(k)
|
|
return base
|
|
|
|
def findts(self):
|
|
try:
|
|
_ = self.findts__()
|
|
_ = os.path.join(_, "tesseract.exe")
|
|
return _
|
|
except:
|
|
return
|
|
|
|
def list_langs(self):
|
|
if not (self.path and os.path.exists(self.path)):
|
|
raise Exception(_TR("not installed"))
|
|
res = subproc_w(
|
|
'"{}" --list-langs'.format(self.path),
|
|
needstdio=True,
|
|
run=True,
|
|
encoding="utf8",
|
|
).stdout
|
|
return res.split("\n")[1:-1]
|
|
|
|
def langmap(self):
|
|
# https://github.com/tesseract-ocr/tessdoc/blob/main/tess3/Data-Files.md
|
|
return {
|
|
Languages.Chinese: "chi_sim",
|
|
Languages.TradChinese: "chi_tra",
|
|
Languages.Japanese: "jpn",
|
|
Languages.English: "eng",
|
|
Languages.Russian: "rus",
|
|
Languages.Korean: "kor",
|
|
Languages.Arabic: "ara",
|
|
Languages.Italian: "ita",
|
|
Languages.Polish: "pol",
|
|
Languages.Spanish: "spa",
|
|
Languages.Swedish: "swe",
|
|
Languages.Ukrainian: "ukr",
|
|
Languages.Vietnamese: "vie",
|
|
Languages.French: "fra",
|
|
Languages.Turkish: "tur",
|
|
Languages.German: "deu",
|
|
Languages.Dutch: "nld",
|
|
Languages.Portuguese: "por",
|
|
Languages.Czech: "ces",
|
|
Languages.Hungarian: "hun",
|
|
Languages.Thai: "tha",
|
|
Languages.Latin: "lat",
|
|
}
|
|
|
|
def initocr(self):
|
|
self.path = self.findts()
|
|
self.langs = self.list_langs()
|
|
print(self.langs)
|
|
|
|
def ocr(self, imagebinary):
|
|
if not (self.path and os.path.exists(self.path)):
|
|
raise Exception(_TR("not installed"))
|
|
self.raise_cant_be_auto_lang()
|
|
lang = self.srclang
|
|
psm = 6
|
|
imgfile = None
|
|
if globalconfig["verticalocr"] == 0:
|
|
pass
|
|
elif globalconfig["verticalocr"] == 1:
|
|
lang += "_vert"
|
|
psm = 5
|
|
elif globalconfig["verticalocr"] == 2:
|
|
fname = gobject.gettempdir(str(uuid.uuid4()) + ".png")
|
|
with open(fname, "wb") as ff:
|
|
ff.write(imagebinary)
|
|
imgfile = os.path.abspath(fname)
|
|
_ = subproc_w(
|
|
'"{}" "{}" stdout -l osd --psm 0'.format(self.path, imgfile),
|
|
needstdio=True,
|
|
encoding="utf8",
|
|
run=True,
|
|
)
|
|
err = _.stderr
|
|
if len(err):
|
|
pass
|
|
elif "Orientation in degrees: 0" not in _.stdout:
|
|
lang += "_vert"
|
|
psm = 5
|
|
if not imgfile:
|
|
fname = gobject.gettempdir(str(uuid.uuid4()) + ".png")
|
|
with open(fname, "wb") as ff:
|
|
ff.write(imagebinary)
|
|
imgfile = os.path.abspath(fname)
|
|
_ = subproc_w(
|
|
'"{}" "{}" - -l {} --psm {}'.format(self.path, imgfile, lang, psm),
|
|
needstdio=True,
|
|
encoding="utf8",
|
|
run=True,
|
|
)
|
|
os.remove(imgfile)
|
|
res = _.stdout
|
|
err = _.stderr
|
|
if len(err):
|
|
raise Exception(err)
|
|
|
|
return res.split("\n")
|