mirror of
https://github.com/HIllya51/LunaTranslator.git
synced 2024-12-29 16:44:13 +08:00
dict
This commit is contained in:
parent
74b0f246cc
commit
d84757673c
37
src/LunaTranslator/cishu/japandict.py
Normal file
37
src/LunaTranslator/cishu/japandict.py
Normal file
@ -0,0 +1,37 @@
|
|||||||
|
import requests
|
||||||
|
from urllib.parse import quote
|
||||||
|
from cishu.cishubase import cishubase
|
||||||
|
from myutils.utils import get_element_by
|
||||||
|
import threading, base64, re
|
||||||
|
|
||||||
|
|
||||||
|
class japandict(cishubase):
|
||||||
|
def makelinkbase64(self, link, saver):
|
||||||
|
html = requests.get(
|
||||||
|
link,
|
||||||
|
proxies=self.proxy,
|
||||||
|
).content
|
||||||
|
base64_content = base64.b64encode(html).decode("utf-8")
|
||||||
|
saver[link] = f"data:application/octet-stream;base64,{base64_content}"
|
||||||
|
|
||||||
|
def search(self, word):
|
||||||
|
url = "https://www.japandict.com/?s={}&lang=eng&list=1".format(quote(word))
|
||||||
|
html = requests.get(
|
||||||
|
url,
|
||||||
|
proxies=self.proxy,
|
||||||
|
).text
|
||||||
|
|
||||||
|
res = get_element_by("class", "list-group list-group-flush", html)
|
||||||
|
if res is None:
|
||||||
|
return
|
||||||
|
ts = []
|
||||||
|
saver = {}
|
||||||
|
styles = '<link rel="stylesheet" href="https://www.japandict.com/static/css/japandict.ac087f3ecbc8.css" type="text/css"><link rel="preload" href="https://www.japandict.com/static/JapaneseRadicals-Regular.woff2" as="font"><link rel="preload" href="https://www.japandict.com/static/radicals_font.woff" as="font">'
|
||||||
|
for link in re.findall('href="(.*?)"', styles):
|
||||||
|
ts.append(threading.Thread(target=self.makelinkbase64, args=(link, saver)))
|
||||||
|
ts[-1].start()
|
||||||
|
for t in ts:
|
||||||
|
t.join()
|
||||||
|
for link in saver:
|
||||||
|
styles = styles.replace(link, saver[link])
|
||||||
|
return res + styles
|
@ -2,90 +2,21 @@ import requests
|
|||||||
from urllib.parse import quote
|
from urllib.parse import quote
|
||||||
import re
|
import re
|
||||||
from cishu.cishubase import cishubase
|
from cishu.cishubase import cishubase
|
||||||
|
from myutils.utils import get_element_by
|
||||||
from html.parser import HTMLParser
|
|
||||||
|
|
||||||
|
|
||||||
class IDParser(HTMLParser):
|
|
||||||
"""Modified HTMLParser that isolates a tag with the specified id"""
|
|
||||||
|
|
||||||
def __init__(self, id):
|
|
||||||
self.id = id
|
|
||||||
self.result = None
|
|
||||||
self.started = False
|
|
||||||
self.depth = {}
|
|
||||||
self.html = None
|
|
||||||
self.watch_startpos = False
|
|
||||||
HTMLParser.__init__(self)
|
|
||||||
|
|
||||||
def loads(self, html):
|
|
||||||
self.html = html
|
|
||||||
self.feed(html)
|
|
||||||
self.close()
|
|
||||||
|
|
||||||
def handle_starttag(self, tag, attrs):
|
|
||||||
attrs = dict(attrs)
|
|
||||||
if self.started:
|
|
||||||
self.find_startpos(None)
|
|
||||||
if "id" in attrs and attrs["id"] == self.id:
|
|
||||||
self.result = [tag]
|
|
||||||
self.started = True
|
|
||||||
self.watch_startpos = True
|
|
||||||
if self.started:
|
|
||||||
if not tag in self.depth:
|
|
||||||
self.depth[tag] = 0
|
|
||||||
self.depth[tag] += 1
|
|
||||||
|
|
||||||
def handle_endtag(self, tag):
|
|
||||||
if self.started:
|
|
||||||
if tag in self.depth:
|
|
||||||
self.depth[tag] -= 1
|
|
||||||
if self.depth[self.result[0]] == 0:
|
|
||||||
self.started = False
|
|
||||||
self.result.append(self.getpos())
|
|
||||||
|
|
||||||
def find_startpos(self, x):
|
|
||||||
"""Needed to put the start position of the result (self.result[1])
|
|
||||||
after the opening tag with the requested id"""
|
|
||||||
if self.watch_startpos:
|
|
||||||
self.watch_startpos = False
|
|
||||||
self.result.append(self.getpos())
|
|
||||||
|
|
||||||
handle_entityref = handle_charref = handle_data = handle_comment = handle_decl = (
|
|
||||||
handle_pi
|
|
||||||
) = unknown_decl = find_startpos
|
|
||||||
|
|
||||||
def get_result(self):
|
|
||||||
if self.result == None:
|
|
||||||
return None
|
|
||||||
if len(self.result) != 3:
|
|
||||||
return None
|
|
||||||
lines = self.html.split("\n")
|
|
||||||
lines = lines[self.result[1][0] - 1 : self.result[2][0]]
|
|
||||||
lines[0] = lines[0][self.result[1][1] :]
|
|
||||||
if len(lines) == 1:
|
|
||||||
lines[-1] = lines[-1][: self.result[2][1] - self.result[1][1]]
|
|
||||||
lines[-1] = lines[-1][: self.result[2][1]]
|
|
||||||
return "\n".join(lines).strip()
|
|
||||||
|
|
||||||
|
|
||||||
def get_element_by_id(id, html):
|
|
||||||
"""Return the content of the tag with the specified id in the passed HTML document"""
|
|
||||||
parser = IDParser(id)
|
|
||||||
parser.loads(html)
|
|
||||||
return parser.get_result()
|
|
||||||
|
|
||||||
|
|
||||||
class jisho(cishubase):
|
class jisho(cishubase):
|
||||||
|
|
||||||
def search(self, word):
|
def search(self, word):
|
||||||
url = "https://jisho.org/word/{}".format(quote(word))
|
url = "https://jisho.org/search/{}".format(quote(word))
|
||||||
html = requests.get(
|
html = requests.get(
|
||||||
url,
|
url,
|
||||||
proxies=self.proxy,
|
proxies=self.proxy,
|
||||||
).text
|
).text
|
||||||
|
|
||||||
res = get_element_by_id("page_container", html)
|
if get_element_by("id", "no-matches", html):
|
||||||
|
return
|
||||||
|
res = get_element_by("id", "page_container", html)
|
||||||
if res is None:
|
if res is None:
|
||||||
return
|
return
|
||||||
res = (
|
res = (
|
||||||
@ -94,6 +25,7 @@ class jisho(cishubase):
|
|||||||
.replace(
|
.replace(
|
||||||
'<a href="#" class="signin">Log in</a> to talk about this word.', ""
|
'<a href="#" class="signin">Log in</a> to talk about this word.', ""
|
||||||
)
|
)
|
||||||
|
.replace(get_element_by("id", "other_dictionaries", html), "")
|
||||||
)
|
)
|
||||||
|
|
||||||
ss = re.search('href="https://assets.jisho.org/assets/application(.*)"', html)
|
ss = re.search('href="https://assets.jisho.org/assets/application(.*)"', html)
|
||||||
|
@ -4,16 +4,22 @@ from urllib.parse import quote
|
|||||||
import re, os
|
import re, os
|
||||||
from cishu.cishubase import cishubase
|
from cishu.cishubase import cishubase
|
||||||
from myutils.utils import simplehtmlparser
|
from myutils.utils import simplehtmlparser
|
||||||
|
from myutils.utils import get_element_by
|
||||||
|
|
||||||
|
|
||||||
class youdao(cishubase):
|
class youdao(cishubase):
|
||||||
|
|
||||||
def search(self, word):
|
def search(self, word: str):
|
||||||
url = "https://dict.youdao.com/result?word={}&lang={}".format(
|
lang = getlangsrc()
|
||||||
quote(word), getlangsrc()
|
if lang == "auto":
|
||||||
)
|
if word.isascii():
|
||||||
|
lang = "en"
|
||||||
|
else:
|
||||||
|
lang = "ja"
|
||||||
|
url = "https://dict.youdao.com/result?word={}&lang={}".format(quote(word), lang)
|
||||||
text = requests.get(url, proxies=self.proxy).text
|
text = requests.get(url, proxies=self.proxy).text
|
||||||
|
if not get_element_by("class", "word-head", text):
|
||||||
|
return
|
||||||
text = re.sub("<header([\\s\\S]*?)></header>", "", text)
|
text = re.sub("<header([\\s\\S]*?)></header>", "", text)
|
||||||
text = re.sub("<aside([\\s\\S]*?)></aside>", "", text)
|
text = re.sub("<aside([\\s\\S]*?)></aside>", "", text)
|
||||||
|
|
||||||
|
@ -21,6 +21,7 @@ from myutils.config import (
|
|||||||
import threading, winreg
|
import threading, winreg
|
||||||
import re, heapq, winsharedutils
|
import re, heapq, winsharedutils
|
||||||
from myutils.wrapper import tryprint, threader
|
from myutils.wrapper import tryprint, threader
|
||||||
|
from html.parser import HTMLParser
|
||||||
|
|
||||||
|
|
||||||
def qimage2binary(qimage: QImage, fmt="BMP"):
|
def qimage2binary(qimage: QImage, fmt="BMP"):
|
||||||
@ -95,6 +96,10 @@ def getlanguagespace(lang=None):
|
|||||||
|
|
||||||
def findenclose(text, tag):
|
def findenclose(text, tag):
|
||||||
i = 0
|
i = 0
|
||||||
|
if tag == "link":
|
||||||
|
tags = "<link"
|
||||||
|
tage = ">"
|
||||||
|
else:
|
||||||
tags = f"<{tag}"
|
tags = f"<{tag}"
|
||||||
tage = f"</{tag}>"
|
tage = f"</{tag}>"
|
||||||
collect = ""
|
collect = ""
|
||||||
@ -947,3 +952,73 @@ def createenglishlangmap():
|
|||||||
)
|
)
|
||||||
mp.update({"auto": ""})
|
mp.update({"auto": ""})
|
||||||
return mp
|
return mp
|
||||||
|
|
||||||
|
|
||||||
|
class IDParser(HTMLParser):
|
||||||
|
"""Modified HTMLParser that isolates a tag with the specified id"""
|
||||||
|
|
||||||
|
def __init__(self, attr, attrv):
|
||||||
|
self.id = attr, attrv
|
||||||
|
self.result = None
|
||||||
|
self.started = False
|
||||||
|
self.depth = {}
|
||||||
|
self.html = None
|
||||||
|
self.watch_startpos = False
|
||||||
|
HTMLParser.__init__(self)
|
||||||
|
|
||||||
|
def loads(self, html):
|
||||||
|
self.html = html
|
||||||
|
self.feed(html)
|
||||||
|
self.close()
|
||||||
|
|
||||||
|
def handle_starttag(self, tag, attrs):
|
||||||
|
attrs = dict(attrs)
|
||||||
|
if self.started:
|
||||||
|
self.find_startpos(None)
|
||||||
|
if self.id[0] in attrs and attrs[self.id[0]] == self.id[1]:
|
||||||
|
self.result = [tag]
|
||||||
|
self.started = True
|
||||||
|
self.watch_startpos = True
|
||||||
|
if self.started:
|
||||||
|
if not tag in self.depth:
|
||||||
|
self.depth[tag] = 0
|
||||||
|
self.depth[tag] += 1
|
||||||
|
|
||||||
|
def handle_endtag(self, tag):
|
||||||
|
if self.started:
|
||||||
|
if tag in self.depth:
|
||||||
|
self.depth[tag] -= 1
|
||||||
|
if self.depth[self.result[0]] == 0:
|
||||||
|
self.started = False
|
||||||
|
self.result.append(self.getpos())
|
||||||
|
|
||||||
|
def find_startpos(self, x):
|
||||||
|
"""Needed to put the start position of the result (self.result[1])
|
||||||
|
after the opening tag with the requested id"""
|
||||||
|
if self.watch_startpos:
|
||||||
|
self.watch_startpos = False
|
||||||
|
self.result.append(self.getpos())
|
||||||
|
|
||||||
|
handle_entityref = handle_charref = handle_data = handle_comment = handle_decl = (
|
||||||
|
handle_pi
|
||||||
|
) = unknown_decl = find_startpos
|
||||||
|
|
||||||
|
def get_result(self):
|
||||||
|
if self.result == None:
|
||||||
|
return None
|
||||||
|
if len(self.result) != 3:
|
||||||
|
return None
|
||||||
|
lines = self.html.split("\n")
|
||||||
|
lines = lines[self.result[1][0] - 1 : self.result[2][0]]
|
||||||
|
lines[0] = lines[0][self.result[1][1] :]
|
||||||
|
if len(lines) == 1:
|
||||||
|
lines[-1] = lines[-1][: self.result[2][1] - self.result[1][1]]
|
||||||
|
lines[-1] = lines[-1][: self.result[2][1]]
|
||||||
|
return "\n".join(lines).strip()
|
||||||
|
|
||||||
|
|
||||||
|
def get_element_by(attr, attrv, html):
|
||||||
|
"""Return the content of the tag with the specified id in the passed HTML document"""
|
||||||
|
parser = IDParser(attr, attrv)
|
||||||
|
parser.loads(html)
|
||||||
|
return parser.get_result()
|
||||||
|
@ -1378,6 +1378,10 @@
|
|||||||
"use": false,
|
"use": false,
|
||||||
"name": "jisho"
|
"name": "jisho"
|
||||||
},
|
},
|
||||||
|
"japandict": {
|
||||||
|
"use": false,
|
||||||
|
"name": "JapanDict"
|
||||||
|
},
|
||||||
"weblio": {
|
"weblio": {
|
||||||
"use": false,
|
"use": false,
|
||||||
"name": "weblio"
|
"name": "weblio"
|
||||||
|
@ -29,7 +29,7 @@ include(generate_product_version)
|
|||||||
|
|
||||||
set(VERSION_MAJOR 5)
|
set(VERSION_MAJOR 5)
|
||||||
set(VERSION_MINOR 50)
|
set(VERSION_MINOR 50)
|
||||||
set(VERSION_PATCH 1)
|
set(VERSION_PATCH 2)
|
||||||
|
|
||||||
add_library(pch pch.cpp)
|
add_library(pch pch.cpp)
|
||||||
target_precompile_headers(pch PUBLIC pch.h)
|
target_precompile_headers(pch PUBLIC pch.h)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user