dict

2024-12-29 16:44:13 +08:00 · 2024-10-26 14:47:00 +08:00 · 2024-10-26 14:47:00 +08:00 · d84757673c
commit d84757673c
parent 74b0f246cc
6 changed files with 136 additions and 82 deletions
--- a/src/LunaTranslator/cishu/japandict.py
+++ b/src/LunaTranslator/cishu/japandict.py
@ -0,0 +1,37 @@
 import requests
 from urllib.parse import quote
 from cishu.cishubase import cishubase
 from myutils.utils import get_element_by
 import threading, base64, re
 class japandict(cishubase):
    def makelinkbase64(self, link, saver):
        html = requests.get(
            link,
            proxies=self.proxy,
        ).content
        base64_content = base64.b64encode(html).decode("utf-8")
        saver[link] = f"data:application/octet-stream;base64,{base64_content}"
    def search(self, word):
        url = "https://www.japandict.com/?s={}&lang=eng&list=1".format(quote(word))
        html = requests.get(
            url,
            proxies=self.proxy,
        ).text
        res = get_element_by("class", "list-group list-group-flush", html)
        if res is None:
            return
        ts = []
        saver = {}
        styles = '<link rel="stylesheet" href="https://www.japandict.com/static/css/japandict.ac087f3ecbc8.css" type="text/css"><link rel="preload" href="https://www.japandict.com/static/JapaneseRadicals-Regular.woff2" as="font"><link rel="preload" href="https://www.japandict.com/static/radicals_font.woff" as="font">'
        for link in re.findall('href="(.*?)"', styles):
            ts.append(threading.Thread(target=self.makelinkbase64, args=(link, saver)))
            ts[-1].start()
        for t in ts:
            t.join()
        for link in saver:
            styles = styles.replace(link, saver[link])
        return res + styles
--- a/src/LunaTranslator/cishu/jisho.py
+++ b/src/LunaTranslator/cishu/jisho.py
@ -2,90 +2,21 @@ import requests
 from urllib.parse import quote
 import re
 from cishu.cishubase import cishubase
-
+from myutils.utils import get_element_by
 from html.parser import HTMLParser
 class IDParser(HTMLParser):
    """Modified HTMLParser that isolates a tag with the specified id"""
    def __init__(self, id):
        self.id = id
        self.result = None
        self.started = False
        self.depth = {}
        self.html = None
        self.watch_startpos = False
        HTMLParser.__init__(self)
    def loads(self, html):
        self.html = html
        self.feed(html)
        self.close()
    def handle_starttag(self, tag, attrs):
        attrs = dict(attrs)
        if self.started:
            self.find_startpos(None)
        if "id" in attrs and attrs["id"] == self.id:
            self.result = [tag]
            self.started = True
            self.watch_startpos = True
        if self.started:
            if not tag in self.depth:
                self.depth[tag] = 0
            self.depth[tag] += 1
    def handle_endtag(self, tag):
        if self.started:
            if tag in self.depth:
                self.depth[tag] -= 1
            if self.depth[self.result[0]] == 0:
                self.started = False
                self.result.append(self.getpos())
    def find_startpos(self, x):
        """Needed to put the start position of the result (self.result[1])
        after the opening tag with the requested id"""
        if self.watch_startpos:
            self.watch_startpos = False
            self.result.append(self.getpos())
    handle_entityref = handle_charref = handle_data = handle_comment = handle_decl = (
        handle_pi
    ) = unknown_decl = find_startpos
    def get_result(self):
        if self.result == None:
            return None
        if len(self.result) != 3:
            return None
        lines = self.html.split("\n")
        lines = lines[self.result[1][0] - 1 : self.result[2][0]]
        lines[0] = lines[0][self.result[1][1] :]
        if len(lines) == 1:
            lines[-1] = lines[-1][: self.result[2][1] - self.result[1][1]]
        lines[-1] = lines[-1][: self.result[2][1]]
        return "\n".join(lines).strip()
 def get_element_by_id(id, html):
    """Return the content of the tag with the specified id in the passed HTML document"""
    parser = IDParser(id)
    parser.loads(html)
    return parser.get_result()
 class jisho(cishubase):
    def search(self, word):
-        url = "https://jisho.org/word/{}".format(quote(word))
+        url = "https://jisho.org/search/{}".format(quote(word))
        html = requests.get(
            url,
            proxies=self.proxy,
        ).text
-        res = get_element_by_id("page_container", html)
+        if get_element_by("id", "no-matches", html):
            return
        res = get_element_by("id", "page_container", html)
        if res is None:
            return
        res = (
@ -94,6 +25,7 @@ class jisho(cishubase):
            .replace(
                '<a href="#" class="signin">Log in</a> to talk about this word.', ""
            )
            .replace(get_element_by("id", "other_dictionaries", html), "")
        )
        ss = re.search('href="https://assets.jisho.org/assets/application(.*)"', html)
--- a/src/LunaTranslator/cishu/youdao.py
+++ b/src/LunaTranslator/cishu/youdao.py
@ -4,16 +4,22 @@ from urllib.parse import quote
 import re, os
 from cishu.cishubase import cishubase
 from myutils.utils import simplehtmlparser
 from myutils.utils import get_element_by
 class youdao(cishubase):
-    def search(self, word):
+    def search(self, word: str):
-        url = "https://dict.youdao.com/result?word={}&lang={}".format(
+        lang = getlangsrc()
-            quote(word), getlangsrc()
+        if lang == "auto":
-        )
+            if word.isascii():
                lang = "en"
            else:
                lang = "ja"
        url = "https://dict.youdao.com/result?word={}&lang={}".format(quote(word), lang)
        text = requests.get(url, proxies=self.proxy).text
-
+        if not get_element_by("class", "word-head", text):
            return
        text = re.sub("<header([\\s\\S]*?)></header>", "", text)
        text = re.sub("<aside([\\s\\S]*?)></aside>", "", text)
--- a/src/LunaTranslator/myutils/utils.py
+++ b/src/LunaTranslator/myutils/utils.py
@ -21,6 +21,7 @@ from myutils.config import (
 import threading, winreg
 import re, heapq, winsharedutils
 from myutils.wrapper import tryprint, threader
 from html.parser import HTMLParser
 def qimage2binary(qimage: QImage, fmt="BMP"):
@ -95,6 +96,10 @@ def getlanguagespace(lang=None):
 def findenclose(text, tag):
    i = 0
    if tag == "link":
        tags = "<link"
        tage = ">"
    else:
        tags = f"<{tag}"
        tage = f"</{tag}>"
    collect = ""
@ -947,3 +952,73 @@ def createenglishlangmap():
    )
    mp.update({"auto": ""})
    return mp
 class IDParser(HTMLParser):
    """Modified HTMLParser that isolates a tag with the specified id"""
    def __init__(self, attr, attrv):
        self.id = attr, attrv
        self.result = None
        self.started = False
        self.depth = {}
        self.html = None
        self.watch_startpos = False
        HTMLParser.__init__(self)
    def loads(self, html):
        self.html = html
        self.feed(html)
        self.close()
    def handle_starttag(self, tag, attrs):
        attrs = dict(attrs)
        if self.started:
            self.find_startpos(None)
        if self.id[0] in attrs and attrs[self.id[0]] == self.id[1]:
            self.result = [tag]
            self.started = True
            self.watch_startpos = True
        if self.started:
            if not tag in self.depth:
                self.depth[tag] = 0
            self.depth[tag] += 1
    def handle_endtag(self, tag):
        if self.started:
            if tag in self.depth:
                self.depth[tag] -= 1
            if self.depth[self.result[0]] == 0:
                self.started = False
                self.result.append(self.getpos())
    def find_startpos(self, x):
        """Needed to put the start position of the result (self.result[1])
        after the opening tag with the requested id"""
        if self.watch_startpos:
            self.watch_startpos = False
            self.result.append(self.getpos())
    handle_entityref = handle_charref = handle_data = handle_comment = handle_decl = (
        handle_pi
    ) = unknown_decl = find_startpos
    def get_result(self):
        if self.result == None:
            return None
        if len(self.result) != 3:
            return None
        lines = self.html.split("\n")
        lines = lines[self.result[1][0] - 1 : self.result[2][0]]
        lines[0] = lines[0][self.result[1][1] :]
        if len(lines) == 1:
            lines[-1] = lines[-1][: self.result[2][1] - self.result[1][1]]
        lines[-1] = lines[-1][: self.result[2][1]]
        return "\n".join(lines).strip()
 def get_element_by(attr, attrv, html):
    """Return the content of the tag with the specified id in the passed HTML document"""
    parser = IDParser(attr, attrv)
    parser.loads(html)
    return parser.get_result()
--- a/src/files/defaultconfig/config.json
+++ b/src/files/defaultconfig/config.json
@ -1378,6 +1378,10 @@
            "use": false,
            "name": "jisho"
        },
        "japandict": {
            "use": false,
            "name": "JapanDict"
        },
        "weblio": {
            "use": false,
            "name": "weblio"
--- a/src/plugins/CMakeLists.txt
+++ b/src/plugins/CMakeLists.txt
@ -29,7 +29,7 @@ include(generate_product_version)
 set(VERSION_MAJOR 5)
 set(VERSION_MINOR 50)
-set(VERSION_PATCH 1)
+set(VERSION_PATCH 2)
 add_library(pch pch.cpp)
 target_precompile_headers(pch PUBLIC pch.h)