From d84757673cac4ae89c4e9639c776031f731ded17 Mon Sep 17 00:00:00 2001
From: test123456654321 <16307130148@fudan.edu.cn>
Date: Sat, 26 Oct 2024 14:47:00 +0800
Subject: [PATCH] dict

---
 src/LunaTranslator/cishu/japandict.py | 37 +++++++++++++
 src/LunaTranslator/cishu/jisho.py     | 80 ++-------------------------
 src/LunaTranslator/cishu/youdao.py    | 16 ++++--
 src/LunaTranslator/myutils/utils.py   | 79 +++++++++++++++++++++++++-
 src/files/defaultconfig/config.json   |  4 ++
 src/plugins/CMakeLists.txt            |  2 +-
 6 files changed, 136 insertions(+), 82 deletions(-)
 create mode 100644 src/LunaTranslator/cishu/japandict.py
diff --git a/src/LunaTranslator/cishu/japandict.py b/src/LunaTranslator/cishu/japandict.py
new file mode 100644
index 00000000..b87b7f26
--- /dev/null
+++ b/src/LunaTranslator/cishu/japandict.py
@@ -0,0 +1,37 @@
+import requests
+from urllib.parse import quote
+from cishu.cishubase import cishubase
+from myutils.utils import get_element_by
+import threading, base64, re
+
+
+class japandict(cishubase):
+    def makelinkbase64(self, link, saver):
+        html = requests.get(
+            link,
+            proxies=self.proxy,
+        ).content
+        base64_content = base64.b64encode(html).decode("utf-8")
+        saver[link] = f"data:application/octet-stream;base64,{base64_content}"
+
+    def search(self, word):
+        url = "https://www.japandict.com/?s={}&lang=eng&list=1".format(quote(word))
+        html = requests.get(
+            url,
+            proxies=self.proxy,
+        ).text
+
+        res = get_element_by("class", "list-group list-group-flush", html)
+        if res is None:
+            return
+        ts = []
+        saver = {}
+        styles = '<link rel="stylesheet" href="https://www.japandict.com/static/css/japandict.ac087f3ecbc8.css" type="text/css"><link rel="preload" href="https://www.japandict.com/static/JapaneseRadicals-Regular.woff2" as="font"><link rel="preload" href="https://www.japandict.com/static/radicals_font.woff" as="font">'
+        for link in re.findall('href="(.*?)"', styles):
+            ts.append(threading.Thread(target=self.makelinkbase64, args=(link, saver)))
+            ts[-1].start()
+        for t in ts:
+            t.join()
+        for link in saver:
+            styles = styles.replace(link, saver[link])
+        return res + styles
diff --git a/src/LunaTranslator/cishu/jisho.py b/src/LunaTranslator/cishu/jisho.py
index 7ff9c66a..322ddbe1 100644
--- a/src/LunaTranslator/cishu/jisho.py
+++ b/src/LunaTranslator/cishu/jisho.py
@@ -2,90 +2,21 @@ import requests
 from urllib.parse import quote
 import re
 from cishu.cishubase import cishubase
-
-from html.parser import HTMLParser
-
-
-class IDParser(HTMLParser):
-    """Modified HTMLParser that isolates a tag with the specified id"""
-
-    def __init__(self, id):
-        self.id = id
-        self.result = None
-        self.started = False
-        self.depth = {}
-        self.html = None
-        self.watch_startpos = False
-        HTMLParser.__init__(self)
-
-    def loads(self, html):
-        self.html = html
-        self.feed(html)
-        self.close()
-
-    def handle_starttag(self, tag, attrs):
-        attrs = dict(attrs)
-        if self.started:
-            self.find_startpos(None)
-        if "id" in attrs and attrs["id"] == self.id:
-            self.result = [tag]
-            self.started = True
-            self.watch_startpos = True
-        if self.started:
-            if not tag in self.depth:
-                self.depth[tag] = 0
-            self.depth[tag] += 1
-
-    def handle_endtag(self, tag):
-        if self.started:
-            if tag in self.depth:
-                self.depth[tag] -= 1
-            if self.depth[self.result[0]] == 0:
-                self.started = False
-                self.result.append(self.getpos())
-
-    def find_startpos(self, x):
-        """Needed to put the start position of the result (self.result[1])
-        after the opening tag with the requested id"""
-        if self.watch_startpos:
-            self.watch_startpos = False
-            self.result.append(self.getpos())
-
-    handle_entityref = handle_charref = handle_data = handle_comment = handle_decl = (
-        handle_pi
-    ) = unknown_decl = find_startpos
-
-    def get_result(self):
-        if self.result == None:
-            return None
-        if len(self.result) != 3:
-            return None
-        lines = self.html.split("\n")
-        lines = lines[self.result[1][0] - 1 : self.result[2][0]]
-        lines[0] = lines[0][self.result[1][1] :]
-        if len(lines) == 1:
-            lines[-1] = lines[-1][: self.result[2][1] - self.result[1][1]]
-        lines[-1] = lines[-1][: self.result[2][1]]
-        return "\n".join(lines).strip()
-
-
-def get_element_by_id(id, html):
-    """Return the content of the tag with the specified id in the passed HTML document"""
-    parser = IDParser(id)
-    parser.loads(html)
-    return parser.get_result()
+from myutils.utils import get_element_by
 
 
 class jisho(cishubase):
 
     def search(self, word):
-        url = "https://jisho.org/word/{}".format(quote(word))
+        url = "https://jisho.org/search/{}".format(quote(word))
         html = requests.get(
             url,
             proxies=self.proxy,
         ).text
 
-        res = get_element_by_id("page_container", html)
+        if get_element_by("id", "no-matches", html):
+            return
+        res = get_element_by("id", "page_container", html)
         if res is None:
             return
         res = (
@@ -94,6 +25,7 @@ class jisho(cishubase):
             .replace(
                 '<a href="#" class="signin">Log in</a> to talk about this word.', ""
             )
+            .replace(get_element_by("id", "other_dictionaries", html), "")
         )
 
         ss = re.search('href="https://assets.jisho.org/assets/application(.*)"', html)
diff --git a/src/LunaTranslator/cishu/youdao.py b/src/LunaTranslator/cishu/youdao.py
index 624a01c7..bf1dd163 100644
--- a/src/LunaTranslator/cishu/youdao.py
+++ b/src/LunaTranslator/cishu/youdao.py
@@ -4,16 +4,22 @@ from urllib.parse import quote
 import re, os
 from cishu.cishubase import cishubase
 from myutils.utils import simplehtmlparser
+from myutils.utils import get_element_by
 
 
 class youdao(cishubase):
 
-    def search(self, word):
-        url = "https://dict.youdao.com/result?word={}&lang={}".format(
-            quote(word), getlangsrc()
-        )
+    def search(self, word: str):
+        lang = getlangsrc()
+        if lang == "auto":
+            if word.isascii():
+                lang = "en"
+            else:
+                lang = "ja"
+        url = "https://dict.youdao.com/result?word={}&lang={}".format(quote(word), lang)
         text = requests.get(url, proxies=self.proxy).text
-
+        if not get_element_by("class", "word-head", text):
+            return
         text = re.sub("<header([\\s\\S]*?)></header>", "", text)
         text = re.sub("<aside([\\s\\S]*?)></aside>", "", text)
 
diff --git a/src/LunaTranslator/myutils/utils.py b/src/LunaTranslator/myutils/utils.py
index 5f9d7405..8f76a8ac 100644
--- a/src/LunaTranslator/myutils/utils.py
+++ b/src/LunaTranslator/myutils/utils.py
@@ -21,6 +21,7 @@ from myutils.config import (
 import threading, winreg
 import re, heapq, winsharedutils
 from myutils.wrapper import tryprint, threader
+from html.parser import HTMLParser
 
 
 def qimage2binary(qimage: QImage, fmt="BMP"):
@@ -95,8 +96,12 @@ def getlanguagespace(lang=None):
 
 def findenclose(text, tag):
     i = 0
-    tags = f"<{tag}"
-    tage = f"</{tag}>"
+    if tag == "link":
+        tags = "<link"
+        tage = ">"
+    else:
+        tags = f"<{tag}"
+        tage = f"</{tag}>"
     collect = ""
     __ = 0
     while True:
@@ -947,3 +952,73 @@ def createenglishlangmap():
     )
     mp.update({"auto": ""})
     return mp
+
+
+class IDParser(HTMLParser):
+    """Modified HTMLParser that isolates a tag with the specified id"""
+
+    def __init__(self, attr, attrv):
+        self.id = attr, attrv
+        self.result = None
+        self.started = False
+        self.depth = {}
+        self.html = None
+        self.watch_startpos = False
+        HTMLParser.__init__(self)
+
+    def loads(self, html):
+        self.html = html
+        self.feed(html)
+        self.close()
+
+    def handle_starttag(self, tag, attrs):
+        attrs = dict(attrs)
+        if self.started:
+            self.find_startpos(None)
+        if self.id[0] in attrs and attrs[self.id[0]] == self.id[1]:
+            self.result = [tag]
+            self.started = True
+            self.watch_startpos = True
+        if self.started:
+            if not tag in self.depth:
+                self.depth[tag] = 0
+            self.depth[tag] += 1
+
+    def handle_endtag(self, tag):
+        if self.started:
+            if tag in self.depth:
+                self.depth[tag] -= 1
+            if self.depth[self.result[0]] == 0:
+                self.started = False
+                self.result.append(self.getpos())
+
+    def find_startpos(self, x):
+        """Needed to put the start position of the result (self.result[1])
+        after the opening tag with the requested id"""
+        if self.watch_startpos:
+            self.watch_startpos = False
+            self.result.append(self.getpos())
+
+    handle_entityref = handle_charref = handle_data = handle_comment = handle_decl = (
+        handle_pi
+    ) = unknown_decl = find_startpos
+
+    def get_result(self):
+        if self.result == None:
+            return None
+        if len(self.result) != 3:
+            return None
+        lines = self.html.split("\n")
+        lines = lines[self.result[1][0] - 1 : self.result[2][0]]
+        lines[0] = lines[0][self.result[1][1] :]
+        if len(lines) == 1:
+            lines[-1] = lines[-1][: self.result[2][1] - self.result[1][1]]
+        lines[-1] = lines[-1][: self.result[2][1]]
+        return "\n".join(lines).strip()
+
+
+def get_element_by(attr, attrv, html):
+    """Return the content of the tag with the specified id in the passed HTML document"""
+    parser = IDParser(attr, attrv)
+    parser.loads(html)
+    return parser.get_result()
diff --git a/src/files/defaultconfig/config.json b/src/files/defaultconfig/config.json
index 1c967a17..445537ea 100644
--- a/src/files/defaultconfig/config.json
+++ b/src/files/defaultconfig/config.json
@@ -1378,6 +1378,10 @@
             "use": false,
             "name": "jisho"
         },
+        "japandict": {
+            "use": false,
+            "name": "JapanDict"
+        },
         "weblio": {
             "use": false,
             "name": "weblio"
diff --git a/src/plugins/CMakeLists.txt b/src/plugins/CMakeLists.txt
index 419a5516..a0e9ad8c 100644
--- a/src/plugins/CMakeLists.txt
+++ b/src/plugins/CMakeLists.txt
@@ -29,7 +29,7 @@ include(generate_product_version)
 
 set(VERSION_MAJOR 5)
 set(VERSION_MINOR 50)
-set(VERSION_PATCH 1)
+set(VERSION_PATCH 2)
 
 add_library(pch pch.cpp)
 target_precompile_headers(pch PUBLIC pch.h)