From d84757673cac4ae89c4e9639c776031f731ded17 Mon Sep 17 00:00:00 2001 From: test123456654321 <16307130148@fudan.edu.cn> Date: Sat, 26 Oct 2024 14:47:00 +0800 Subject: [PATCH] dict --- src/LunaTranslator/cishu/japandict.py | 37 +++++++++++++ src/LunaTranslator/cishu/jisho.py | 80 ++------------------------- src/LunaTranslator/cishu/youdao.py | 16 ++++-- src/LunaTranslator/myutils/utils.py | 79 +++++++++++++++++++++++++- src/files/defaultconfig/config.json | 4 ++ src/plugins/CMakeLists.txt | 2 +- 6 files changed, 136 insertions(+), 82 deletions(-) create mode 100644 src/LunaTranslator/cishu/japandict.py diff --git a/src/LunaTranslator/cishu/japandict.py b/src/LunaTranslator/cishu/japandict.py new file mode 100644 index 00000000..b87b7f26 --- /dev/null +++ b/src/LunaTranslator/cishu/japandict.py @@ -0,0 +1,37 @@ +import requests +from urllib.parse import quote +from cishu.cishubase import cishubase +from myutils.utils import get_element_by +import threading, base64, re + + +class japandict(cishubase): + def makelinkbase64(self, link, saver): + html = requests.get( + link, + proxies=self.proxy, + ).content + base64_content = base64.b64encode(html).decode("utf-8") + saver[link] = f"data:application/octet-stream;base64,{base64_content}" + + def search(self, word): + url = "https://www.japandict.com/?s={}&lang=eng&list=1".format(quote(word)) + html = requests.get( + url, + proxies=self.proxy, + ).text + + res = get_element_by("class", "list-group list-group-flush", html) + if res is None: + return + ts = [] + saver = {} + styles = '' + for link in re.findall('href="(.*?)"', styles): + ts.append(threading.Thread(target=self.makelinkbase64, args=(link, saver))) + ts[-1].start() + for t in ts: + t.join() + for link in saver: + styles = styles.replace(link, saver[link]) + return res + styles diff --git a/src/LunaTranslator/cishu/jisho.py b/src/LunaTranslator/cishu/jisho.py index 7ff9c66a..322ddbe1 100644 --- a/src/LunaTranslator/cishu/jisho.py +++ b/src/LunaTranslator/cishu/jisho.py @@ -2,90 +2,21 @@ import requests from urllib.parse import quote import re from cishu.cishubase import cishubase - -from html.parser import HTMLParser - - -class IDParser(HTMLParser): - """Modified HTMLParser that isolates a tag with the specified id""" - - def __init__(self, id): - self.id = id - self.result = None - self.started = False - self.depth = {} - self.html = None - self.watch_startpos = False - HTMLParser.__init__(self) - - def loads(self, html): - self.html = html - self.feed(html) - self.close() - - def handle_starttag(self, tag, attrs): - attrs = dict(attrs) - if self.started: - self.find_startpos(None) - if "id" in attrs and attrs["id"] == self.id: - self.result = [tag] - self.started = True - self.watch_startpos = True - if self.started: - if not tag in self.depth: - self.depth[tag] = 0 - self.depth[tag] += 1 - - def handle_endtag(self, tag): - if self.started: - if tag in self.depth: - self.depth[tag] -= 1 - if self.depth[self.result[0]] == 0: - self.started = False - self.result.append(self.getpos()) - - def find_startpos(self, x): - """Needed to put the start position of the result (self.result[1]) - after the opening tag with the requested id""" - if self.watch_startpos: - self.watch_startpos = False - self.result.append(self.getpos()) - - handle_entityref = handle_charref = handle_data = handle_comment = handle_decl = ( - handle_pi - ) = unknown_decl = find_startpos - - def get_result(self): - if self.result == None: - return None - if len(self.result) != 3: - return None - lines = self.html.split("\n") - lines = lines[self.result[1][0] - 1 : self.result[2][0]] - lines[0] = lines[0][self.result[1][1] :] - if len(lines) == 1: - lines[-1] = lines[-1][: self.result[2][1] - self.result[1][1]] - lines[-1] = lines[-1][: self.result[2][1]] - return "\n".join(lines).strip() - - -def get_element_by_id(id, html): - """Return the content of the tag with the specified id in the passed HTML document""" - parser = IDParser(id) - parser.loads(html) - return parser.get_result() +from myutils.utils import get_element_by class jisho(cishubase): def search(self, word): - url = "https://jisho.org/word/{}".format(quote(word)) + url = "https://jisho.org/search/{}".format(quote(word)) html = requests.get( url, proxies=self.proxy, ).text - res = get_element_by_id("page_container", html) + if get_element_by("id", "no-matches", html): + return + res = get_element_by("id", "page_container", html) if res is None: return res = ( @@ -94,6 +25,7 @@ class jisho(cishubase): .replace( 'Log in to talk about this word.', "" ) + .replace(get_element_by("id", "other_dictionaries", html), "") ) ss = re.search('href="https://assets.jisho.org/assets/application(.*)"', html) diff --git a/src/LunaTranslator/cishu/youdao.py b/src/LunaTranslator/cishu/youdao.py index 624a01c7..bf1dd163 100644 --- a/src/LunaTranslator/cishu/youdao.py +++ b/src/LunaTranslator/cishu/youdao.py @@ -4,16 +4,22 @@ from urllib.parse import quote import re, os from cishu.cishubase import cishubase from myutils.utils import simplehtmlparser +from myutils.utils import get_element_by class youdao(cishubase): - def search(self, word): - url = "https://dict.youdao.com/result?word={}&lang={}".format( - quote(word), getlangsrc() - ) + def search(self, word: str): + lang = getlangsrc() + if lang == "auto": + if word.isascii(): + lang = "en" + else: + lang = "ja" + url = "https://dict.youdao.com/result?word={}&lang={}".format(quote(word), lang) text = requests.get(url, proxies=self.proxy).text - + if not get_element_by("class", "word-head", text): + return text = re.sub("", "", text) text = re.sub("", "", text) diff --git a/src/LunaTranslator/myutils/utils.py b/src/LunaTranslator/myutils/utils.py index 5f9d7405..8f76a8ac 100644 --- a/src/LunaTranslator/myutils/utils.py +++ b/src/LunaTranslator/myutils/utils.py @@ -21,6 +21,7 @@ from myutils.config import ( import threading, winreg import re, heapq, winsharedutils from myutils.wrapper import tryprint, threader +from html.parser import HTMLParser def qimage2binary(qimage: QImage, fmt="BMP"): @@ -95,8 +96,12 @@ def getlanguagespace(lang=None): def findenclose(text, tag): i = 0 - tags = f"<{tag}" - tage = f"" + if tag == "link": + tags = "" collect = "" __ = 0 while True: @@ -947,3 +952,73 @@ def createenglishlangmap(): ) mp.update({"auto": ""}) return mp + + +class IDParser(HTMLParser): + """Modified HTMLParser that isolates a tag with the specified id""" + + def __init__(self, attr, attrv): + self.id = attr, attrv + self.result = None + self.started = False + self.depth = {} + self.html = None + self.watch_startpos = False + HTMLParser.__init__(self) + + def loads(self, html): + self.html = html + self.feed(html) + self.close() + + def handle_starttag(self, tag, attrs): + attrs = dict(attrs) + if self.started: + self.find_startpos(None) + if self.id[0] in attrs and attrs[self.id[0]] == self.id[1]: + self.result = [tag] + self.started = True + self.watch_startpos = True + if self.started: + if not tag in self.depth: + self.depth[tag] = 0 + self.depth[tag] += 1 + + def handle_endtag(self, tag): + if self.started: + if tag in self.depth: + self.depth[tag] -= 1 + if self.depth[self.result[0]] == 0: + self.started = False + self.result.append(self.getpos()) + + def find_startpos(self, x): + """Needed to put the start position of the result (self.result[1]) + after the opening tag with the requested id""" + if self.watch_startpos: + self.watch_startpos = False + self.result.append(self.getpos()) + + handle_entityref = handle_charref = handle_data = handle_comment = handle_decl = ( + handle_pi + ) = unknown_decl = find_startpos + + def get_result(self): + if self.result == None: + return None + if len(self.result) != 3: + return None + lines = self.html.split("\n") + lines = lines[self.result[1][0] - 1 : self.result[2][0]] + lines[0] = lines[0][self.result[1][1] :] + if len(lines) == 1: + lines[-1] = lines[-1][: self.result[2][1] - self.result[1][1]] + lines[-1] = lines[-1][: self.result[2][1]] + return "\n".join(lines).strip() + + +def get_element_by(attr, attrv, html): + """Return the content of the tag with the specified id in the passed HTML document""" + parser = IDParser(attr, attrv) + parser.loads(html) + return parser.get_result() diff --git a/src/files/defaultconfig/config.json b/src/files/defaultconfig/config.json index 1c967a17..445537ea 100644 --- a/src/files/defaultconfig/config.json +++ b/src/files/defaultconfig/config.json @@ -1378,6 +1378,10 @@ "use": false, "name": "jisho" }, + "japandict": { + "use": false, + "name": "JapanDict" + }, "weblio": { "use": false, "name": "weblio" diff --git a/src/plugins/CMakeLists.txt b/src/plugins/CMakeLists.txt index 419a5516..a0e9ad8c 100644 --- a/src/plugins/CMakeLists.txt +++ b/src/plugins/CMakeLists.txt @@ -29,7 +29,7 @@ include(generate_product_version) set(VERSION_MAJOR 5) set(VERSION_MINOR 50) -set(VERSION_PATCH 1) +set(VERSION_PATCH 2) add_library(pch pch.cpp) target_precompile_headers(pch PUBLIC pch.h)