This commit is contained in:
恍兮惚兮 2025-01-01 15:45:36 +08:00
parent 6647f1f8e4
commit ee6945d46a
24 changed files with 264 additions and 153 deletions

View File

@ -1,7 +1,7 @@
set(VERSION_MAJOR 6)
set(VERSION_MINOR 16)
set(VERSION_PATCH 9)
set(VERSION_PATCH 10)
set(VERSION_REVISION 0)
set(LUNA_VERSION "{${VERSION_MAJOR},${VERSION_MINOR},${VERSION_PATCH},${VERSION_REVISION}}")
add_library(VERSION_DEF ${CMAKE_CURRENT_LIST_DIR}/version_def.cpp)

View File

@ -549,6 +549,19 @@ class autoinitdialog(LDialog):
else:
items = line["list"]
lineW.addItems(items)
if "internal" in line:
lineW.setCurrentIndex(
line["internal"].index(dd.get(key))
if dd.get(key) in line["internal"]
else 0
)
def __(lineW, line):
return line["internal"][lineW.currentIndex()]
regist[key] = functools.partial(__, lineW, line)
else:
lineW.setCurrentIndex(dd.get(key, 0))
regist[key] = lineW.currentIndex
cachecombo[key] = lineW

View File

@ -10,35 +10,20 @@ from urllib.parse import urlencode
import json
class OCR(baseocr):
def ocr(self, imagebinary):
self.checkempty(["APPId", "APISecret", "APIKey"])
self.raise_cant_be_auto_lang()
APPId = self.config["APPId"]
APISecret = self.config["APISecret"]
APIKey = self.config["APIKey"]
SRCLANG = self.srclang
class AssembleHeaderException(Exception):
class AssembleHeaderException(Exception):
def __init__(self, msg):
self.message = msg
class Url:
class Url:
def __init__(this, host, path, schema):
this.host = host
this.path = path
this.schema = schema
pass
class printed_word_recognition(object):
def __init__(self):
self.appid = APPId
self.apikey = APIKey
self.apisecret = APISecret
self.url = "https://cn-east-1.api.xf-yun.com/v1/ocr"
def parse_url(self, requset_url):
def parse_url(requset_url):
stidx = requset_url.index("://")
host = requset_url[stidx + 3 :]
schema = requset_url[: stidx + 3]
@ -50,43 +35,18 @@ class OCR(baseocr):
u = Url(host, path, schema)
return u
def get_body(self, imagebinary):
buf = imagebinary
body = {
"header": {"app_id": self.appid, "status": 3},
"parameter": {
"ocr": {
"language": SRCLANG,
"ocr_output_text": {
"encoding": "utf8",
"compress": "raw",
"format": "json",
},
}
},
"payload": {
"image": {
"encoding": "jpg",
"image": str(base64.b64encode(buf), "utf-8"),
"status": 3,
}
},
}
return body
# build websocket auth request url
def assemble_ws_auth_url(requset_url, method="POST", api_key="", api_secret=""):
u = printed_word_recognition.parse_url(requset_url)
# build websocket auth request url
def assemble_ws_auth_url(requset_url, method="GET", api_key="", api_secret=""):
u = parse_url(requset_url)
host = u.host
path = u.path
now = datetime.now()
date = format_date_time(mktime(now.timetuple()))
# print(date)
# date = "Thu, 12 Dec 2019 01:57:27 GMT"
# date = "Mon, 22 Aug 2022 03:26:45 GMT"
signature_origin = "host: {}\ndate: {}\n{} {} HTTP/1.1".format(
host, date, method, path
)
# print(signature_origin)
signature_sha = hmac.new(
api_secret.encode("utf-8"),
signature_origin.encode("utf-8"),
@ -97,59 +57,141 @@ class OCR(baseocr):
'api_key="%s", algorithm="%s", headers="%s", signature="%s"'
% (api_key, "hmac-sha256", "host date request-line", signature_sha)
)
authorization = base64.b64encode(
authorization_origin.encode("utf-8")
).decode(encoding="utf-8")
# print(authorization_origin)
authorization = base64.b64encode(authorization_origin.encode("utf-8")).decode(
encoding="utf-8"
)
values = {"host": host, "date": date, "authorization": authorization}
return requset_url + "?" + urlencode(values)
printed_word_recognition = printed_word_recognition()
request_url = assemble_ws_auth_url(
printed_word_recognition.url,
"POST",
printed_word_recognition.apikey,
printed_word_recognition.apisecret,
)
def get_result(url, sess, bina, appid, apisecret, apikey):
request_url = assemble_ws_auth_url(url, "POST", apikey, apisecret)
headers = {
"content-type": "application/json",
"host": "api.xf-yun.com",
"appid": "APPID",
}
body = {
"header": {"app_id": appid, "status": 3},
"parameter": {
"hh_ocr_recognize_doc": {
"recognizeDocumentRes": {
"encoding": "utf8",
"compress": "raw",
"format": "json",
}
}
},
"payload": {
"image": {
"encoding": "jpg",
"image": str(base64.b64encode(bina), "utf-8"),
"status": 3,
}
},
}
response = sess.post(request_url, data=json.dumps(body), headers=headers)
re = response.content.decode("utf8")
try:
str_result = json.loads(re)
renew_text = str_result["payload"]["recognizeDocumentRes"]["text"]
result = json.loads(str(base64.b64decode(renew_text), "utf-8"))["lines"]
boxs = []
texts = []
for line in result:
boxs.append(line["position"])
texts.append(line["text"])
return boxs, texts
except:
raise Exception(response)
def get_result2(url, appid, apisecret, apikey, sess, bina, lang):
request_url = assemble_ws_auth_url(url, "POST", apikey, apisecret)
headers = {
"content-type": "application/json",
"host": "cn-east-1.api.xf-yun.com",
"app_id": APPId,
"app_id": appid,
}
# print("request_url:", request_url)
body = printed_word_recognition.get_body(imagebinary)
response = self.proxysession.post(
request_url, data=json.dumps(body), headers=headers
)
body = {
"header": {"app_id": appid, "status": 3},
"parameter": {
"ocr": {
"language": lang,
"ocr_output_text": {
"encoding": "utf8",
"compress": "raw",
"format": "json",
},
}
},
"payload": {
"image": {
"encoding": "jpg",
"image": str(base64.b64encode(bina), "utf-8"),
"status": 3,
}
},
}
response = sess.post(request_url, data=json.dumps(body), headers=headers)
re = response.content.decode("utf8")
try:
renew_text = response.json()["payload"]["ocr_output_text"]["text"]
finalResult = json.loads(str(base64.b64decode(renew_text), "utf-8"))
except:
raise Exception(response)
try:
res = finalResult["pages"][0]
if "lines" not in res:
return ""
str_result = json.loads(re)
renew_text = str_result["payload"]["ocr_output_text"]["text"]
pages = json.loads(str(base64.b64decode(renew_text), "utf-8"))["pages"]
boxs = []
texts = []
for line in res["lines"]:
coord = line["coord"]
for page in pages:
for line in page.get("lines", []):
texts.append(line["content"])
boxs.append(
[
coord[0]["x"],
coord[0]["y"],
coord[1]["x"],
coord[1]["y"],
coord[2]["x"],
coord[2]["y"],
coord[3]["x"],
coord[3]["y"],
line["coord"][0]["x"],
line["coord"][0]["y"],
line["coord"][1]["x"],
line["coord"][1]["y"],
line["coord"][2]["x"],
line["coord"][2]["y"],
line["coord"][3]["x"],
line["coord"][3]["y"],
]
)
texts.append(line["content"])
return {"box": boxs, "text": texts}
return boxs, texts
except:
raise Exception(finalResult)
raise Exception(response)
class OCR(baseocr):
def langmap(self):
return {"zh": "ch_en", "en": "ch_en", "cht": "ch_en"}
def ocr(self, imagebinary):
self.checkempty(["APPId", "APISecret", "APIKey"])
appid = self.config["APPId"]
apisecret = self.config["APISecret"]
apikey = self.config["APIKey"]
if self.config["interface"] == "hh_ocr_recognize_doc":
boxs, texts = get_result(
"http://api.xf-yun.com/v1/private/hh_ocr_recognize_doc",
self.proxysession,
imagebinary,
appid,
apisecret,
apikey,
)
elif self.config["interface"] == "ocr":
if self.srclang_1 == "auto":
self.raise_cant_be_auto_lang()
boxs, texts = get_result2(
"https://cn-east-1.api.xf-yun.com/v1/ocr",
appid,
apisecret,
apikey,
self.proxysession,
imagebinary,
self.srclang,
)
return {"box": boxs, "text": texts}

View File

@ -289,7 +289,23 @@
"args": {
"APPId": "",
"APISecret": "",
"APIKey": ""
"APIKey": "",
"interface": "hh_ocr_recognize_doc"
},
"argstype": {
"interface": {
"rank": 0,
"name": "接口",
"type": "combo",
"list": [
"通用文字识别 intsig",
"印刷文字识别(多语种)"
],
"internal": [
"hh_ocr_recognize_doc",
"ocr"
]
}
}
},
"mangaocr": {

View File

@ -767,5 +767,7 @@
"取词查词": "كلمة البحث",
"音频编码": "ترميز الصوت",
"系统未安装当前语言的OCR模型": "التعرف الضوئي على الحروف نموذج اللغة الحالية غير مثبتة على النظام",
"最小帧率": "الحد الأدنى من معدل الإطار"
"最小帧率": "الحد الأدنى من معدل الإطار",
"印刷文字识别(多语种)": "التعرف على الحروف المطبوعة ( متعدد اللغات )",
"通用文字识别 intsig": "التعرف على الحروف العالمية"
}

View File

@ -767,5 +767,7 @@
"取词查词": "取詞查詞",
"音频编码": "音訊編碼",
"系统未安装当前语言的OCR模型": "系統未安裝目前語言的 OCR 模型",
"最小帧率": "最小幀率"
"最小帧率": "最小幀率",
"印刷文字识别(多语种)": "印刷文字識別(多語種)",
"通用文字识别 intsig": "通用文字識別intsig"
}

View File

@ -767,5 +767,7 @@
"取词查词": "Načíst a vyhledávat slova",
"音频编码": "Kódování zvuku",
"系统未安装当前语言的OCR模型": "OCR model pro aktuální jazyk není nainstalován v systému",
"最小帧率": "Minimální snímková frekvence"
"最小帧率": "Minimální snímková frekvence",
"印刷文字识别(多语种)": "Rozpoznávání tisku textu (vícejazyčné)",
"通用文字识别 intsig": "Univerzální rozpoznávání textu Intsig"
}

View File

@ -767,5 +767,7 @@
"取词查词": "Wörter abrufen und suchen",
"音频编码": "Audio-Codierung",
"系统未安装当前语言的OCR模型": "Das OCR-Modell für die aktuelle Sprache ist nicht im System installiert",
"最小帧率": "Minimale Bildrate"
"最小帧率": "Minimale Bildrate",
"印刷文字识别(多语种)": "Drucktexterkennung (mehrsprachig)",
"通用文字识别 intsig": "Universelle Texterkennung Intsig"
}

View File

@ -767,5 +767,7 @@
"取词查词": "Retrieve and search for words",
"音频编码": "audio coding",
"系统未安装当前语言的OCR模型": "The OCR model for the current language is not installed in the system",
"最小帧率": "Minimum frame rate"
"最小帧率": "Minimum frame rate",
"印刷文字识别(多语种)": "Printing text recognition (multilingual)",
"通用文字识别 intsig": "Universal Text Recognition Intsig"
}

View File

@ -767,5 +767,7 @@
"取词查词": "Buscar palabras",
"音频编码": "Codificación de audio",
"系统未安装当前语言的OCR模型": "El sistema no instala el modelo OCR del idioma actual",
"最小帧率": "Tasa mínima de fotogramas"
"最小帧率": "Tasa mínima de fotogramas",
"印刷文字识别(多语种)": "Reconocimiento de texto impreso (multilingüe)",
"通用文字识别 intsig": "Reconocimiento de texto universal intsig"
}

View File

@ -767,5 +767,7 @@
"取词查词": "Recherche de mots",
"音频编码": "Codage audio",
"系统未安装当前语言的OCR模型": "Le système n'a pas installé le modèle OCR pour la langue actuelle",
"最小帧率": "Framerate minimum"
"最小帧率": "Framerate minimum",
"印刷文字识别(多语种)": "Reconnaissance de texte imprimée (multilingue)",
"通用文字识别 intsig": "Reconnaissance de texte générique intsig"
}

View File

@ -767,5 +767,7 @@
"取词查词": "Recupera e cerca parole",
"音频编码": "Codificazione audio",
"系统未安装当前语言的OCR模型": "Il modello OCR per la lingua corrente non è installato nel sistema",
"最小帧率": "Tasso minimo di inquadratura"
"最小帧率": "Tasso minimo di inquadratura",
"印刷文字识别(多语种)": "Riconoscimento del testo stampato (multilingue)",
"通用文字识别 intsig": "Riconoscimento universale del testo Intsig"
}

View File

@ -767,5 +767,7 @@
"取词查词": "単語を取って単語を調べる",
"音频编码": "オーディオコーディング",
"系统未安装当前语言的OCR模型": "現在の言語のOCRモデルがシステムにインストールされていません",
"最小帧率": "最小フレームレート"
"最小帧率": "最小フレームレート",
"印刷文字识别(多语种)": "印刷文字認識(多言語)",
"通用文字识别 intsig": "共通文字認識intsig"
}

View File

@ -767,5 +767,7 @@
"取词查词": "취사 조사",
"音频编码": "오디오 인코딩",
"系统未安装当前语言的OCR模型": "현재 언어의 OCR 모델이 시스템에 설치되지 않았습니다.",
"最小帧率": "최소 프레임 속도"
"最小帧率": "최소 프레임 속도",
"印刷文字识别(多语种)": "인쇄 문자 인식 (다국어)",
"通用文字识别 intsig": "일반 문자 인식 intsig"
}

View File

@ -767,5 +767,7 @@
"取词查词": "Woorden ophalen en zoeken",
"音频编码": "Audiocodering",
"系统未安装当前语言的OCR模型": "Het OCR-model voor de huidige taal is niet geïnstalleerd in het systeem",
"最小帧率": "Minimumframesnelheid"
"最小帧率": "Minimumframesnelheid",
"印刷文字识别(多语种)": "Tekstherkenning afdrukken (meertalig)",
"通用文字识别 intsig": "Universele tekstherkenning Intsig"
}

View File

@ -767,5 +767,7 @@
"取词查词": "Pobieranie i wyszukiwanie słów",
"音频编码": "Kodowanie dźwięku",
"系统未安装当前语言的OCR模型": "Model OCR dla bieżącego języka nie jest zainstalowany w systemie",
"最小帧率": "Minimalna częstotliwość klatek"
"最小帧率": "Minimalna częstotliwość klatek",
"印刷文字识别(多语种)": "Rozpoznawanie tekstu drukowania (wielojęzyczne)",
"通用文字识别 intsig": "Uniwersalne rozpoznawanie tekstu Intsig"
}

View File

@ -767,5 +767,7 @@
"取词查词": "Obter e procurar palavras",
"音频编码": "Codificação de áudio",
"系统未安装当前语言的OCR模型": "O modelo OCR para o idioma atual não está instalado no sistema",
"最小帧率": "Taxa mínima de quadros"
"最小帧率": "Taxa mínima de quadros",
"印刷文字识别(多语种)": "Impressão de reconhecimento de texto (multilingue)",
"通用文字识别 intsig": "Intsig de Reconhecimento Universal de Texto"
}

View File

@ -767,5 +767,7 @@
"取词查词": "Поиск слов",
"音频编码": "Звуковое кодирование",
"系统未安装当前语言的OCR模型": "Система не устанавливает модель OCR для текущего языка",
"最小帧率": "Минимальная частота кадров"
"最小帧率": "Минимальная частота кадров",
"印刷文字识别(多语种)": "Распознавание печатного текста (многоязычие)",
"通用文字识别 intsig": "Универсальное распознавание текста intsig"
}

View File

@ -767,5 +767,7 @@
"取词查词": "Hämta och sök efter ord",
"音频编码": "Ljudkodning",
"系统未安装当前语言的OCR模型": "OCR-modellen för det aktuella språket är inte installerad i systemet",
"最小帧率": "Minsta ramfrekvens"
"最小帧率": "Minsta ramfrekvens",
"印刷文字识别(多语种)": "Skriva ut textigenkänning (flerspråkig)",
"通用文字识别 intsig": "Universal textigenkänning Intsig"
}

View File

@ -767,5 +767,7 @@
"取词查词": "ค้นหาคำ",
"音频编码": "การเข้ารหัสเสียง",
"系统未安装当前语言的OCR模型": "ระบบไม่ได้ติดตั้งรุ่น OCR สำหรับภาษาปัจจุบัน",
"最小帧率": "อัตราเฟรมขั้นต่ำ"
"最小帧率": "อัตราเฟรมขั้นต่ำ",
"印刷文字识别(多语种)": "การจดจำคำพิมพ์ (หลายภาษา)",
"通用文字识别 intsig": "การจดจำคำทั่วไป intsig"
}

View File

@ -767,5 +767,7 @@
"取词查词": "Kelimeleri alın ve arayın",
"音频编码": "Ses Kodlama",
"系统未安装当前语言的OCR模型": "Ağımdaki dilin OCR modeli sistemde kurulmadı.",
"最小帧率": "En az fotoğraf hızı"
"最小帧率": "En az fotoğraf hızı",
"印刷文字识别(多语种)": "Metin tanımlaması (çoklu dil)",
"通用文字识别 intsig": "Universal Text Recognition Intsig"
}

View File

@ -767,5 +767,7 @@
"取词查词": "Отримати і шукати слова",
"音频编码": "Аудіокодування",
"系统未安装当前语言的OCR模型": "Модель OCR для поточної мови не встановлена у системі",
"最小帧率": "Мінімальна швидкість рамок"
"最小帧率": "Мінімальна швидкість рамок",
"印刷文字识别(多语种)": "Друк розпізнавання тексту (багатомове)",
"通用文字识别 intsig": "Універсальне розпізнавання тексту Intsig"
}

View File

@ -767,5 +767,7 @@
"取词查词": "Tìm kiếm từ",
"音频编码": "Mã hóa âm thanh",
"系统未安装当前语言的OCR模型": "Hệ thống không cài đặt mô hình OCR cho ngôn ngữ hiện tại",
"最小帧率": "Tỷ lệ khung hình tối thiểu"
"最小帧率": "Tỷ lệ khung hình tối thiểu",
"印刷文字识别(多语种)": "Nhận dạng văn bản in (đa ngôn ngữ)",
"通用文字识别 intsig": "Nhận dạng văn bản chung intsig"
}

View File

@ -767,5 +767,7 @@
"取词翻译": "",
"取词查词": "",
"音频编码": "",
"最小帧率": ""
"最小帧率": "",
"印刷文字识别(多语种)": "",
"通用文字识别 intsig": ""
}