dict

2024-12-29 00:24:13 +08:00 · 2024-10-26 14:47:00 +08:00 · 2024-10-26 14:47:00 +08:00 · d84757673c
commit d84757673c
parent 74b0f246cc
6 changed files with 136 additions and 82 deletions
--- a/src/LunaTranslator/cishu/japandict.py
+++ b/src/LunaTranslator/cishu/japandict.py
@ -0,0 +1,37 @@
+import requests
+from urllib.parse import quote
+from cishu.cishubase import cishubase
+from myutils.utils import get_element_by
+import threading, base64, re
+
+
+class japandict(cishubase):
+    def makelinkbase64(self, link, saver):
+        html = requests.get(
+            link,
+            proxies=self.proxy,
+        ).content
+        base64_content = base64.b64encode(html).decode("utf-8")
+        saver[link] = f"data:application/octet-stream;base64,{base64_content}"
+
+    def search(self, word):
+        url = "https://www.japandict.com/?s={}&lang=eng&list=1".format(quote(word))
+        html = requests.get(
+            url,
+            proxies=self.proxy,
+        ).text
+
+        res = get_element_by("class", "list-group list-group-flush", html)
+        if res is None:
+            return
+        ts = []
+        saver = {}
+        styles = '<link rel="stylesheet" href="https://www.japandict.com/static/css/japandict.ac087f3ecbc8.css" type="text/css"><link rel="preload" href="https://www.japandict.com/static/JapaneseRadicals-Regular.woff2" as="font"><link rel="preload" href="https://www.japandict.com/static/radicals_font.woff" as="font">'
+        for link in re.findall('href="(.*?)"', styles):
+            ts.append(threading.Thread(target=self.makelinkbase64, args=(link, saver)))
+            ts[-1].start()
+        for t in ts:
+            t.join()
+        for link in saver:
+            styles = styles.replace(link, saver[link])
+        return res + styles
--- a/src/LunaTranslator/cishu/jisho.py
+++ b/src/LunaTranslator/cishu/jisho.py
@ -2,90 +2,21 @@ import requests
 from urllib.parse import quote
 import re
 from cishu.cishubase import cishubase
-
-from html.parser import HTMLParser
-
-
-class IDParser(HTMLParser):
-    """Modified HTMLParser that isolates a tag with the specified id"""
-
-    def __init__(self, id):
-        self.id = id
-        self.result = None
-        self.started = False
-        self.depth = {}
-        self.html = None
-        self.watch_startpos = False
-        HTMLParser.__init__(self)
-
-    def loads(self, html):
-        self.html = html
-        self.feed(html)
-        self.close()
-
-    def handle_starttag(self, tag, attrs):
-        attrs = dict(attrs)
-        if self.started:
-            self.find_startpos(None)
-        if "id" in attrs and attrs["id"] == self.id:
-            self.result = [tag]
-            self.started = True
-            self.watch_startpos = True
-        if self.started:
-            if not tag in self.depth:
-                self.depth[tag] = 0
-            self.depth[tag] += 1
-
-    def handle_endtag(self, tag):
-        if self.started:
-            if tag in self.depth:
-                self.depth[tag] -= 1
-            if self.depth[self.result[0]] == 0:
-                self.started = False
-                self.result.append(self.getpos())
-
-    def find_startpos(self, x):
-        """Needed to put the start position of the result (self.result[1])
-        after the opening tag with the requested id"""
-        if self.watch_startpos:
-            self.watch_startpos = False
-            self.result.append(self.getpos())
-
-    handle_entityref = handle_charref = handle_data = handle_comment = handle_decl = (
-        handle_pi
-    ) = unknown_decl = find_startpos
-
-    def get_result(self):
-        if self.result == None:
-            return None
-        if len(self.result) != 3:
-            return None
-        lines = self.html.split("\n")
-        lines = lines[self.result[1][0] - 1 : self.result[2][0]]
-        lines[0] = lines[0][self.result[1][1] :]
-        if len(lines) == 1:
-            lines[-1] = lines[-1][: self.result[2][1] - self.result[1][1]]
-        lines[-1] = lines[-1][: self.result[2][1]]
-        return "\n".join(lines).strip()
-
-
-def get_element_by_id(id, html):
-    """Return the content of the tag with the specified id in the passed HTML document"""
-    parser = IDParser(id)
-    parser.loads(html)
-    return parser.get_result()
+from myutils.utils import get_element_by


 class jisho(cishubase):

    def search(self, word):
-        url = "https://jisho.org/word/{}".format(quote(word))
+        url = "https://jisho.org/search/{}".format(quote(word))
        html = requests.get(
            url,
            proxies=self.proxy,
        ).text

-        res = get_element_by_id("page_container", html)
+        if get_element_by("id", "no-matches", html):
+            return
+        res = get_element_by("id", "page_container", html)
        if res is None:
            return
        res = (
@ -94,6 +25,7 @@ class jisho(cishubase):
            .replace(
                '<a href="#" class="signin">Log in</a> to talk about this word.', ""
            )
+            .replace(get_element_by("id", "other_dictionaries", html), "")
        )

        ss = re.search('href="https://assets.jisho.org/assets/application(.*)"', html)
--- a/src/LunaTranslator/cishu/youdao.py
+++ b/src/LunaTranslator/cishu/youdao.py
@ -4,16 +4,22 @@ from urllib.parse import quote
 import re, os
 from cishu.cishubase import cishubase
 from myutils.utils import simplehtmlparser
+from myutils.utils import get_element_by


 class youdao(cishubase):

-    def search(self, word):
-        url = "https://dict.youdao.com/result?word={}&lang={}".format(
-            quote(word), getlangsrc()
-        )
+    def search(self, word: str):
+        lang = getlangsrc()
+        if lang == "auto":
+            if word.isascii():
+                lang = "en"
+            else:
+                lang = "ja"
+        url = "https://dict.youdao.com/result?word={}&lang={}".format(quote(word), lang)
        text = requests.get(url, proxies=self.proxy).text
-
+        if not get_element_by("class", "word-head", text):
+            return
        text = re.sub("<header([\\s\\S]*?)></header>", "", text)
        text = re.sub("<aside([\\s\\S]*?)></aside>", "", text)

--- a/src/LunaTranslator/myutils/utils.py
+++ b/src/LunaTranslator/myutils/utils.py
@ -21,6 +21,7 @@ from myutils.config import (
 import threading, winreg
 import re, heapq, winsharedutils
 from myutils.wrapper import tryprint, threader
+from html.parser import HTMLParser


 def qimage2binary(qimage: QImage, fmt="BMP"):
@ -95,6 +96,10 @@ def getlanguagespace(lang=None):

 def findenclose(text, tag):
    i = 0
+    if tag == "link":
+        tags = "<link"
+        tage = ">"
+    else:
        tags = f"<{tag}"
        tage = f"</{tag}>"
    collect = ""
@ -947,3 +952,73 @@ def createenglishlangmap():
    )
    mp.update({"auto": ""})
    return mp
+
+
+class IDParser(HTMLParser):
+    """Modified HTMLParser that isolates a tag with the specified id"""
+
+    def __init__(self, attr, attrv):
+        self.id = attr, attrv
+        self.result = None
+        self.started = False
+        self.depth = {}
+        self.html = None
+        self.watch_startpos = False
+        HTMLParser.__init__(self)
+
+    def loads(self, html):
+        self.html = html
+        self.feed(html)
+        self.close()
+
+    def handle_starttag(self, tag, attrs):
+        attrs = dict(attrs)
+        if self.started:
+            self.find_startpos(None)
+        if self.id[0] in attrs and attrs[self.id[0]] == self.id[1]:
+            self.result = [tag]
+            self.started = True
+            self.watch_startpos = True
+        if self.started:
+            if not tag in self.depth:
+                self.depth[tag] = 0
+            self.depth[tag] += 1
+
+    def handle_endtag(self, tag):
+        if self.started:
+            if tag in self.depth:
+                self.depth[tag] -= 1
+            if self.depth[self.result[0]] == 0:
+                self.started = False
+                self.result.append(self.getpos())
+
+    def find_startpos(self, x):
+        """Needed to put the start position of the result (self.result[1])
+        after the opening tag with the requested id"""
+        if self.watch_startpos:
+            self.watch_startpos = False
+            self.result.append(self.getpos())
+
+    handle_entityref = handle_charref = handle_data = handle_comment = handle_decl = (
+        handle_pi
+    ) = unknown_decl = find_startpos
+
+    def get_result(self):
+        if self.result == None:
+            return None
+        if len(self.result) != 3:
+            return None
+        lines = self.html.split("\n")
+        lines = lines[self.result[1][0] - 1 : self.result[2][0]]
+        lines[0] = lines[0][self.result[1][1] :]
+        if len(lines) == 1:
+            lines[-1] = lines[-1][: self.result[2][1] - self.result[1][1]]
+        lines[-1] = lines[-1][: self.result[2][1]]
+        return "\n".join(lines).strip()
+
+
+def get_element_by(attr, attrv, html):
+    """Return the content of the tag with the specified id in the passed HTML document"""
+    parser = IDParser(attr, attrv)
+    parser.loads(html)
+    return parser.get_result()
--- a/src/files/defaultconfig/config.json
+++ b/src/files/defaultconfig/config.json
@ -1378,6 +1378,10 @@
            "use": false,
            "name": "jisho"
        },
+        "japandict": {
+            "use": false,
+            "name": "JapanDict"
+        },
        "weblio": {
            "use": false,
            "name": "weblio"
--- a/src/plugins/CMakeLists.txt
+++ b/src/plugins/CMakeLists.txt
@ -29,7 +29,7 @@ include(generate_product_version)

 set(VERSION_MAJOR 5)
 set(VERSION_MINOR 50)
-set(VERSION_PATCH 1)
+set(VERSION_PATCH 2)

 add_library(pch pch.cpp)
 target_precompile_headers(pch PUBLIC pch.h)