mirror of
https://github.com/HIllya51/LunaTranslator.git
synced 2024-12-28 08:04:13 +08:00
update
This commit is contained in:
parent
6f0132a166
commit
220e6e3051
@ -12,14 +12,14 @@ from PyQt5.QtWidgets import (
|
||||
QTabWidget,
|
||||
QFileDialog,
|
||||
QTabBar,
|
||||
QLabel,
|
||||
QLabel
|
||||
)
|
||||
from PyQt5.QtGui import QPixmap, QImage
|
||||
from traceback import print_exc
|
||||
import requests, json
|
||||
import requests, json, subprocess, time
|
||||
from PyQt5.QtCore import pyqtSignal, Qt
|
||||
import qtawesome, functools, os, base64
|
||||
import gobject, uuid
|
||||
import gobject, uuid, signal
|
||||
from myutils.config import globalconfig, _TR, static_data
|
||||
import myutils.ankiconnect as anki
|
||||
from gui.usefulwidget import (
|
||||
@ -31,13 +31,65 @@ from gui.usefulwidget import (
|
||||
getlineedit,
|
||||
getsimpleswitch,
|
||||
getcolorbutton,
|
||||
tabadd_lazy,
|
||||
tabadd_lazy
|
||||
)
|
||||
from myutils.wrapper import threader
|
||||
from myutils.ocrutil import imageCut, ocr_run
|
||||
from gui.rangeselect import rangeselct_function
|
||||
|
||||
|
||||
class ffmpeg_virtual_audio_capturer:
|
||||
def __init__(self):
|
||||
os.makedirs("./cache/tts", exist_ok=True)
|
||||
self.file = os.path.abspath(
|
||||
os.path.join("./cache/tts", str(time.time()) + ".mp3")
|
||||
)
|
||||
try:
|
||||
self.engine = subprocess.Popen(
|
||||
os.path.join(
|
||||
globalconfig["ffmpeg"],
|
||||
f'ffmpeg.exe -f dshow -i audio="virtual-audio-capturer" "{self.file}"',
|
||||
),
|
||||
stdin=subprocess.PIPE,
|
||||
)
|
||||
except:
|
||||
print_exc()
|
||||
def end(self):
|
||||
try:
|
||||
self.engine.stdin.write(b"q")
|
||||
self.engine.stdin.flush()
|
||||
except:
|
||||
pass
|
||||
|
||||
|
||||
class statusbutton(QPushButton):
|
||||
statuschanged1 = pyqtSignal(int)
|
||||
statuschanged2 = pyqtSignal(int)
|
||||
|
||||
def __init__(self, icons, colors):
|
||||
super().__init__()
|
||||
self.idx = 0
|
||||
self.icons = icons
|
||||
self.colors = colors
|
||||
self.clicked.connect(self.setChecked)
|
||||
self.seticon()
|
||||
|
||||
def seticon(self):
|
||||
self.setIcon(
|
||||
qtawesome.icon(
|
||||
self.icons[(self.idx) % len(self.icons)],
|
||||
color=self.colors[(self.idx) % len(self.colors)],
|
||||
)
|
||||
)
|
||||
|
||||
def setChecked(self, a0):
|
||||
super().setChecked(a0)
|
||||
self.idx += 1
|
||||
self.statuschanged1.emit((self.idx) % len(self.icons))
|
||||
self.statuschanged2.emit((self.idx) % len(self.colors))
|
||||
self.seticon()
|
||||
|
||||
|
||||
class AnkiWindow(QWidget):
|
||||
__ocrsettext = pyqtSignal(str)
|
||||
refreshhtml = pyqtSignal()
|
||||
@ -288,8 +340,47 @@ class AnkiWindow(QWidget):
|
||||
getsimpleswitch(globalconfig["ankiconnect"], "ocrcroped"),
|
||||
)
|
||||
|
||||
layout.addWidget(QLabel())
|
||||
layout.addRow(_TR("录音"), QLabel())
|
||||
lb = QLabel()
|
||||
lb.setOpenExternalLinks(True)
|
||||
lb.setText(
|
||||
'<a href="https://github.com/HIllya51/RESOURCES/releases/download/softwares/virtual-audio.zip">virtual-audio-capturer</a>'
|
||||
)
|
||||
layout.addRow(_TR("安装录音驱动"), lb)
|
||||
ffmpegpath = getlineedit(globalconfig, "ffmpeg", readonly=True)
|
||||
|
||||
def selectpath():
|
||||
f = QFileDialog.getExistingDirectory()
|
||||
if f != "":
|
||||
ffmpegpath.setText(f)
|
||||
|
||||
layout.addRow(
|
||||
_TR("ffmpeg"),
|
||||
getboxlayout(
|
||||
[
|
||||
ffmpegpath,
|
||||
getcolorbutton(
|
||||
"",
|
||||
"",
|
||||
selectpath,
|
||||
icon="fa.gear",
|
||||
constcolor="#FF69B4",
|
||||
),
|
||||
],
|
||||
makewidget=True,
|
||||
),
|
||||
)
|
||||
|
||||
return wid
|
||||
|
||||
def startorendrecord(self, target: QLineEdit, idx):
|
||||
if idx == 1:
|
||||
self.recorder = ffmpeg_virtual_audio_capturer()
|
||||
else:
|
||||
self.recorder.end()
|
||||
target.setText(self.recorder.file)
|
||||
|
||||
def createaddtab(self):
|
||||
layout = QVBoxLayout()
|
||||
wid = QWidget()
|
||||
@ -311,6 +402,14 @@ class AnkiWindow(QWidget):
|
||||
self.editpath.textChanged.connect(self.wrappedpixmap)
|
||||
self.example = QPlainTextEdit()
|
||||
self.remarks = QTextEdit()
|
||||
recordbtn1 = statusbutton(icons=["fa.microphone", "fa.stop"], colors=[""])
|
||||
recordbtn1.statuschanged1.connect(
|
||||
functools.partial(self.startorendrecord, self.audiopath)
|
||||
)
|
||||
recordbtn2 = statusbutton(icons=["fa.microphone", "fa.stop"], colors=[""])
|
||||
recordbtn2.statuschanged1.connect(
|
||||
functools.partial(self.startorendrecord, self.audiopath_sentence)
|
||||
)
|
||||
layout.addLayout(
|
||||
getboxlayout(
|
||||
[
|
||||
@ -335,11 +434,14 @@ class AnkiWindow(QWidget):
|
||||
[
|
||||
QLabel(_TR("语音")),
|
||||
self.audiopath,
|
||||
recordbtn1,
|
||||
soundbutton,
|
||||
getcolorbutton(
|
||||
"",
|
||||
"",
|
||||
functools.partial(self.selectaudio),
|
||||
functools.partial(
|
||||
self.selecfile, self.audiopath
|
||||
),
|
||||
icon="fa.gear",
|
||||
constcolor="#FF69B4",
|
||||
),
|
||||
@ -349,11 +451,14 @@ class AnkiWindow(QWidget):
|
||||
[
|
||||
QLabel(_TR("语音_例句")),
|
||||
self.audiopath_sentence,
|
||||
recordbtn2,
|
||||
soundbutton2,
|
||||
getcolorbutton(
|
||||
"",
|
||||
"",
|
||||
functools.partial(self.selectaudio2),
|
||||
functools.partial(
|
||||
self.selecfile, self.audiopath_sentence
|
||||
),
|
||||
icon="fa.gear",
|
||||
constcolor="#FF69B4",
|
||||
),
|
||||
@ -367,7 +472,9 @@ class AnkiWindow(QWidget):
|
||||
getcolorbutton(
|
||||
"",
|
||||
"",
|
||||
functools.partial(self.selectimage),
|
||||
functools.partial(
|
||||
self.selecfile, self.editpath
|
||||
),
|
||||
icon="fa.gear",
|
||||
constcolor="#FF69B4",
|
||||
),
|
||||
@ -406,23 +513,11 @@ class AnkiWindow(QWidget):
|
||||
pix = pix.scaled(self.viewimagelabel.size() * rate, Qt.KeepAspectRatio)
|
||||
self.viewimagelabel.setPixmap(pix)
|
||||
|
||||
def selectimage(self):
|
||||
def selecfile(self, item):
|
||||
f = QFileDialog.getOpenFileName()
|
||||
res = f[0]
|
||||
if res != "":
|
||||
self.editpath.setText(res)
|
||||
|
||||
def selectaudio(self):
|
||||
f = QFileDialog.getOpenFileName()
|
||||
res = f[0]
|
||||
if res != "":
|
||||
self.audiopath.setText(res)
|
||||
|
||||
def selectaudio2(self):
|
||||
f = QFileDialog.getOpenFileName()
|
||||
res = f[0]
|
||||
if res != "":
|
||||
self.audiopath_sentence.setText(res)
|
||||
item.setText(res)
|
||||
|
||||
def reset(self, text):
|
||||
self.currentword = text
|
||||
@ -578,8 +673,8 @@ class searchwordW(closeashidewindow):
|
||||
soundbutton.clicked.connect(self.langdu)
|
||||
self.searchlayout.addWidget(soundbutton)
|
||||
|
||||
ankiconnect = QPushButton(qtawesome.icon("fa.adn"), "")
|
||||
ankiconnect.clicked.connect(self.onceaddankiwindow)
|
||||
ankiconnect = statusbutton(icons=["fa.adn"], colors=["", "#FF69B4"])
|
||||
ankiconnect.statuschanged2.connect(self.onceaddankiwindow)
|
||||
self.searchlayout.addWidget(ankiconnect)
|
||||
|
||||
self.tab = QTabBar(self)
|
||||
@ -596,7 +691,6 @@ class searchwordW(closeashidewindow):
|
||||
self.textOutput = textOutput
|
||||
self.cache_results = {}
|
||||
self.hiding = True
|
||||
self.addankiwindowidx = 0
|
||||
|
||||
tablayout = QVBoxLayout()
|
||||
tablayout.addWidget(self.tab)
|
||||
@ -604,16 +698,17 @@ class searchwordW(closeashidewindow):
|
||||
tablayout.setContentsMargins(0, 0, 0, 0)
|
||||
tablayout.setSpacing(0)
|
||||
self.vboxlayout.addLayout(tablayout)
|
||||
self.isfirstshowanki = True
|
||||
|
||||
def onceaddankiwindow(self):
|
||||
if self.addankiwindowidx == 0:
|
||||
self.vboxlayout.addWidget(self.ankiwindow)
|
||||
else:
|
||||
if self.addankiwindowidx % 2 == 0:
|
||||
self.ankiwindow.show()
|
||||
def onceaddankiwindow(self, idx):
|
||||
if idx == 1:
|
||||
if self.isfirstshowanki:
|
||||
self.vboxlayout.addWidget(self.ankiwindow)
|
||||
else:
|
||||
self.ankiwindow.hide()
|
||||
self.addankiwindowidx += 1
|
||||
self.ankiwindow.show()
|
||||
else:
|
||||
self.ankiwindow.hide()
|
||||
self.isfirstshowanki = False
|
||||
|
||||
def langdu(self):
|
||||
if gobject.baseobject.reader:
|
||||
|
@ -399,9 +399,10 @@ def getsimplecombobox(lst, d, k, callback=None):
|
||||
return s
|
||||
|
||||
|
||||
def getlineedit(d, key, callback=None):
|
||||
def getlineedit(d, key, callback=None, readonly=False):
|
||||
s = QLineEdit()
|
||||
s.setText(d[key])
|
||||
s.setReadOnly(readonly)
|
||||
s.textChanged.connect(functools.partial(callbackwrap, d, key, callback))
|
||||
return s
|
||||
|
||||
|
@ -84,22 +84,6 @@ log.addHandler(logging.NullHandler())
|
||||
|
||||
|
||||
def tts_langs():
|
||||
"""Languages Google Text-to-Speech supports.
|
||||
|
||||
Returns:
|
||||
dict: A dictionary of the type `{ '<lang>': '<name>'}`
|
||||
|
||||
Where `<lang>` is an IETF language tag such as `en` or `zh-TW`,
|
||||
and `<name>` is the full English name of the language, such as
|
||||
`English` or `Chinese (Mandarin/Taiwan)`.
|
||||
|
||||
The dictionary returned combines languages from two origins:
|
||||
|
||||
- Languages fetched from Google Translate (pre-generated in :mod:`gtts.langs`)
|
||||
- Languages that are undocumented variations that were observed to work and
|
||||
present different dialects or accents.
|
||||
|
||||
"""
|
||||
langs = dict()
|
||||
langs.update(_main_langs())
|
||||
langs.update(_extra_langs())
|
||||
@ -108,16 +92,6 @@ def tts_langs():
|
||||
|
||||
|
||||
def _extra_langs():
|
||||
"""Define extra languages.
|
||||
|
||||
Returns:
|
||||
dict: A dictionary of extra languages manually defined.
|
||||
|
||||
Variations of the ones generated in `_main_langs`,
|
||||
observed to provide different dialects or accents or
|
||||
just simply accepted by the Google Translate Text-to-Speech API.
|
||||
|
||||
"""
|
||||
return {
|
||||
# Chinese
|
||||
"zh-TW": "Chinese (Mandarin/Taiwan)",
|
||||
@ -126,25 +100,6 @@ def _extra_langs():
|
||||
|
||||
|
||||
def _fallback_deprecated_lang(lang):
|
||||
"""Languages Google Text-to-Speech used to support.
|
||||
|
||||
Language tags that don't work anymore, but that can
|
||||
fallback to a more general language code to maintain
|
||||
compatibility.
|
||||
|
||||
Args:
|
||||
lang (string): The language tag.
|
||||
|
||||
Returns:
|
||||
string: The language tag, as-is if not deprecated,
|
||||
or a fallback if it exits.
|
||||
|
||||
Example:
|
||||
``en-GB`` returns ``en``.
|
||||
``en-gb`` returns ``en``.
|
||||
|
||||
"""
|
||||
|
||||
deprecated = {
|
||||
# '<fallback>': [<list of deprecated langs>]
|
||||
"en": [
|
||||
@ -205,51 +160,6 @@ class symbols:
|
||||
|
||||
|
||||
class RegexBuilder:
|
||||
r"""Builds regex using arguments passed into a pattern template.
|
||||
|
||||
Builds a regex object for which the pattern is made from an argument
|
||||
passed into a template. If more than one argument is passed (iterable),
|
||||
each pattern is joined by "|" (regex alternation 'or') to create a
|
||||
single pattern.
|
||||
|
||||
Args:
|
||||
pattern_args (iteratable): String element(s) to be each passed to
|
||||
``pattern_func`` to create a regex pattern. Each element is
|
||||
``re.escape``'d before being passed.
|
||||
pattern_func (callable): A 'template' function that should take a
|
||||
string and return a string. It should take an element of
|
||||
``pattern_args`` and return a valid regex pattern group string.
|
||||
flags: ``re`` flag(s) to compile with the regex.
|
||||
|
||||
Example:
|
||||
To create a simple regex that matches on the characters "a", "b",
|
||||
or "c", followed by a period::
|
||||
|
||||
>>> rb = RegexBuilder('abc', lambda x: "{}\.".format(x))
|
||||
|
||||
Looking at ``rb.regex`` we get the following compiled regex::
|
||||
|
||||
>>> print(rb.regex)
|
||||
'a\.|b\.|c\.'
|
||||
|
||||
The above is fairly simple, but this class can help in writing more
|
||||
complex repetitive regex, making them more readable and easier to
|
||||
create by using existing data structures.
|
||||
|
||||
Example:
|
||||
To match the character following the words "lorem", "ipsum", "meili"
|
||||
or "koda"::
|
||||
|
||||
>>> words = ['lorem', 'ipsum', 'meili', 'koda']
|
||||
>>> rb = RegexBuilder(words, lambda x: "(?<={}).".format(x))
|
||||
|
||||
Looking at ``rb.regex`` we get the following compiled regex::
|
||||
|
||||
>>> print(rb.regex)
|
||||
'(?<=lorem).|(?<=ipsum).|(?<=meili).|(?<=koda).'
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self, pattern_args, pattern_func, flags=0):
|
||||
self.pattern_args = pattern_args
|
||||
self.pattern_func = pattern_func
|
||||
@ -273,49 +183,6 @@ class RegexBuilder:
|
||||
|
||||
|
||||
class PreProcessorRegex:
|
||||
r"""Regex-based substitution text pre-processor.
|
||||
|
||||
Runs a series of regex substitutions (``re.sub``) from each ``regex`` of a
|
||||
:class:`gtts.tokenizer.core.RegexBuilder` with an extra ``repl``
|
||||
replacement parameter.
|
||||
|
||||
Args:
|
||||
search_args (iteratable): String element(s) to be each passed to
|
||||
``search_func`` to create a regex pattern. Each element is
|
||||
``re.escape``'d before being passed.
|
||||
search_func (callable): A 'template' function that should take a
|
||||
string and return a string. It should take an element of
|
||||
``search_args`` and return a valid regex search pattern string.
|
||||
repl (string): The common replacement passed to the ``sub`` method for
|
||||
each ``regex``. Can be a raw string (the case of a regex
|
||||
backreference, for example)
|
||||
flags: ``re`` flag(s) to compile with each `regex`.
|
||||
|
||||
Example:
|
||||
Add "!" after the words "lorem" or "ipsum", while ignoring case::
|
||||
|
||||
>>> import re
|
||||
>>> words = ['lorem', 'ipsum']
|
||||
>>> pp = PreProcessorRegex(words,
|
||||
... lambda x: "({})".format(x), r'\\1!',
|
||||
... re.IGNORECASE)
|
||||
|
||||
In this case, the regex is a group and the replacement uses its
|
||||
backreference ``\\1`` (as a raw string). Looking at ``pp`` we get the
|
||||
following list of search/replacement pairs::
|
||||
|
||||
>>> print(pp)
|
||||
(re.compile('(lorem)', re.IGNORECASE), repl='\1!'),
|
||||
(re.compile('(ipsum)', re.IGNORECASE), repl='\1!')
|
||||
|
||||
It can then be run on any string of text::
|
||||
|
||||
>>> pp.run("LOREM ipSuM")
|
||||
"LOREM! ipSuM!"
|
||||
|
||||
See :mod:`gtts.tokenizer.pre_processors` for more examples.
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self, search_args, search_func, repl, flags=0):
|
||||
self.repl = repl
|
||||
@ -327,16 +194,6 @@ class PreProcessorRegex:
|
||||
self.regexes.append(rb.regex)
|
||||
|
||||
def run(self, text):
|
||||
"""Run each regex substitution on ``text``.
|
||||
|
||||
Args:
|
||||
text (string): the input text.
|
||||
|
||||
Returns:
|
||||
string: text after all substitutions have been sequentially
|
||||
applied.
|
||||
|
||||
"""
|
||||
for regex in self.regexes:
|
||||
text = regex.sub(self.repl, text)
|
||||
return text
|
||||
@ -349,39 +206,6 @@ class PreProcessorRegex:
|
||||
|
||||
|
||||
class PreProcessorSub:
|
||||
r"""Simple substitution text preprocessor.
|
||||
|
||||
Performs string-for-string substitution from list a find/replace pairs.
|
||||
It abstracts :class:`gtts.tokenizer.core.PreProcessorRegex` with a default
|
||||
simple substitution regex.
|
||||
|
||||
Args:
|
||||
sub_pairs (list): A list of tuples of the style
|
||||
``(<search str>, <replace str>)``
|
||||
ignore_case (bool): Ignore case during search. Defaults to ``True``.
|
||||
|
||||
Example:
|
||||
Replace all occurences of "Mac" to "PC" and "Firefox" to "Chrome"::
|
||||
|
||||
>>> sub_pairs = [('Mac', 'PC'), ('Firefox', 'Chrome')]
|
||||
>>> pp = PreProcessorSub(sub_pairs)
|
||||
|
||||
Looking at the ``pp``, we get the following list of
|
||||
search (regex)/replacement pairs::
|
||||
|
||||
>>> print(pp)
|
||||
(re.compile('Mac', re.IGNORECASE), repl='PC'),
|
||||
(re.compile('Firefox', re.IGNORECASE), repl='Chrome')
|
||||
|
||||
It can then be run on any string of text::
|
||||
|
||||
>>> pp.run("I use firefox on my mac")
|
||||
"I use Chrome on my PC"
|
||||
|
||||
See :mod:`gtts.tokenizer.pre_processors` for more examples.
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self, sub_pairs, ignore_case=True):
|
||||
def search_func(x):
|
||||
return "{}".format(x)
|
||||
@ -396,16 +220,6 @@ class PreProcessorSub:
|
||||
self.pre_processors.append(pp)
|
||||
|
||||
def run(self, text):
|
||||
"""Run each substitution on ``text``.
|
||||
|
||||
Args:
|
||||
text (string): the input text.
|
||||
|
||||
Returns:
|
||||
string: text after all substitutions have been sequentially
|
||||
applied.
|
||||
|
||||
"""
|
||||
for pp in self.pre_processors:
|
||||
text = pp.run(text)
|
||||
return text
|
||||
@ -415,80 +229,6 @@ class PreProcessorSub:
|
||||
|
||||
|
||||
class Tokenizer:
|
||||
r"""An extensible but simple generic rule-based tokenizer.
|
||||
|
||||
A generic and simple string tokenizer that takes a list of functions
|
||||
(called `tokenizer cases`) returning ``regex`` objects and joins them by
|
||||
"|" (regex alternation 'or') to create a single regex to use with the
|
||||
standard ``regex.split()`` function.
|
||||
|
||||
``regex_funcs`` is a list of any function that can return a ``regex``
|
||||
(from ``re.compile()``) object, such as a
|
||||
:class:`gtts.tokenizer.core.RegexBuilder` instance (and its ``regex``
|
||||
attribute).
|
||||
|
||||
See the :mod:`gtts.tokenizer.tokenizer_cases` module for examples.
|
||||
|
||||
Args:
|
||||
regex_funcs (list): List of compiled ``regex`` objects. Each
|
||||
function's pattern will be joined into a single pattern and
|
||||
compiled.
|
||||
flags: ``re`` flag(s) to compile with the final regex. Defaults to
|
||||
``re.IGNORECASE``
|
||||
|
||||
Note:
|
||||
When the ``regex`` objects obtained from ``regex_funcs`` are joined,
|
||||
their individual ``re`` flags are ignored in favour of ``flags``.
|
||||
|
||||
Raises:
|
||||
TypeError: When an element of ``regex_funcs`` is not a function, or
|
||||
a function that does not return a compiled ``regex`` object.
|
||||
|
||||
Warning:
|
||||
Joined ``regex`` patterns can easily interfere with one another in
|
||||
unexpected ways. It is recommanded that each tokenizer case operate
|
||||
on distinct or non-overlapping chracters/sets of characters
|
||||
(For example, a tokenizer case for the period (".") should also
|
||||
handle not matching/cutting on decimals, instead of making that
|
||||
a seperate tokenizer case).
|
||||
|
||||
Example:
|
||||
A tokenizer with a two simple case (*Note: these are bad cases to
|
||||
tokenize on, this is simply a usage example*)::
|
||||
|
||||
>>> import re, RegexBuilder
|
||||
>>>
|
||||
>>> def case1():
|
||||
... return re.compile("\,")
|
||||
>>>
|
||||
>>> def case2():
|
||||
... return RegexBuilder('abc', lambda x: "{}\.".format(x)).regex
|
||||
>>>
|
||||
>>> t = Tokenizer([case1, case2])
|
||||
|
||||
Looking at ``case1().pattern``, we get::
|
||||
|
||||
>>> print(case1().pattern)
|
||||
'\\,'
|
||||
|
||||
Looking at ``case2().pattern``, we get::
|
||||
|
||||
>>> print(case2().pattern)
|
||||
'a\\.|b\\.|c\\.'
|
||||
|
||||
Finally, looking at ``t``, we get them combined::
|
||||
|
||||
>>> print(t)
|
||||
're.compile('\\,|a\\.|b\\.|c\\.', re.IGNORECASE)
|
||||
from: [<function case1 at 0x10bbcdd08>, <function case2 at 0x10b5c5e18>]'
|
||||
|
||||
It can then be run on any string of text::
|
||||
|
||||
>>> t.run("Hello, my name is Linda a. Call me Lin, b. I'm your friend")
|
||||
['Hello', ' my name is Linda ', ' Call me Lin', ' ', " I'm your friend"]
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self, regex_funcs, flags=re.IGNORECASE):
|
||||
self.regex_funcs = regex_funcs
|
||||
self.flags = flags
|
||||
@ -511,15 +251,6 @@ class Tokenizer:
|
||||
return re.compile(pattern, self.flags)
|
||||
|
||||
def run(self, text):
|
||||
"""Tokenize `text`.
|
||||
|
||||
Args:
|
||||
text (string): the input text to tokenize.
|
||||
|
||||
Returns:
|
||||
list: A list of strings (token) split according to the tokenizer cases.
|
||||
|
||||
"""
|
||||
return self.total_regex.split(text)
|
||||
|
||||
def __repr__(self): # pragma: no cover
|
||||
@ -529,51 +260,22 @@ class Tokenizer:
|
||||
class tokenizer_cases:
|
||||
|
||||
def tone_marks():
|
||||
"""Keep tone-modifying punctuation by matching following character.
|
||||
|
||||
Assumes the `tone_marks` pre-processor was run for cases where there might
|
||||
not be any space after a tone-modifying punctuation mark.
|
||||
"""
|
||||
return RegexBuilder(
|
||||
pattern_args=symbols.TONE_MARKS, pattern_func=lambda x: "(?<={}).".format(x)
|
||||
).regex
|
||||
|
||||
def period_comma():
|
||||
"""Period and comma case.
|
||||
|
||||
Match if not preceded by ".<letter>" and only if followed by space.
|
||||
Won't cut in the middle/after dotted abbreviations; won't cut numbers.
|
||||
|
||||
Note:
|
||||
Won't match if a dotted abbreviation ends a sentence.
|
||||
|
||||
Note:
|
||||
Won't match the end of a sentence if not followed by a space.
|
||||
|
||||
"""
|
||||
return RegexBuilder(
|
||||
pattern_args=symbols.PERIOD_COMMA,
|
||||
pattern_func=lambda x: r"(?<!\.[a-z]){} ".format(x),
|
||||
).regex
|
||||
|
||||
def colon():
|
||||
"""Colon case.
|
||||
|
||||
Match a colon ":" only if not preceded by a digit.
|
||||
Mainly to prevent a cut in the middle of time notations e.g. 10:01
|
||||
|
||||
"""
|
||||
return RegexBuilder(
|
||||
pattern_args=symbols.COLON, pattern_func=lambda x: r"(?<!\d){}".format(x)
|
||||
).regex
|
||||
|
||||
def other_punctuation():
|
||||
"""Match other punctuation.
|
||||
|
||||
Match other punctuation to split on; punctuation that naturally
|
||||
inserts a break in speech.
|
||||
|
||||
"""
|
||||
punc = "".join(
|
||||
set(symbols.ALL_PUNC)
|
||||
- set(symbols.TONE_MARKS)
|
||||
@ -584,11 +286,7 @@ class tokenizer_cases:
|
||||
pattern_args=punc, pattern_func=lambda x: "{}".format(x)
|
||||
).regex
|
||||
|
||||
def legacy_all_punctuation(): # pragma: no cover b/c tested but Coveralls: ¯\_(ツ)_/¯
|
||||
"""Match all punctuation.
|
||||
|
||||
Use as only tokenizer case to mimic gTTS 1.x tokenization.
|
||||
"""
|
||||
def legacy_all_punctuation():
|
||||
punc = symbols.ALL_PUNC
|
||||
return RegexBuilder(
|
||||
pattern_args=punc, pattern_func=lambda x: "{}".format(x)
|
||||
@ -598,12 +296,6 @@ class tokenizer_cases:
|
||||
class pre_processors:
|
||||
|
||||
def tone_marks(text):
|
||||
"""Add a space after tone-modifying punctuation.
|
||||
|
||||
Because the `tone_marks` tokenizer case will split after a tone-modifying
|
||||
punctuation mark, make sure there's whitespace after.
|
||||
|
||||
"""
|
||||
return PreProcessorRegex(
|
||||
search_args=symbols.TONE_MARKS,
|
||||
search_func=lambda x: "(?<={})".format(x),
|
||||
@ -611,29 +303,11 @@ class pre_processors:
|
||||
).run(text)
|
||||
|
||||
def end_of_line(text):
|
||||
"""Re-form words cut by end-of-line hyphens.
|
||||
|
||||
Remove "<hyphen><newline>".
|
||||
|
||||
"""
|
||||
return PreProcessorRegex(
|
||||
search_args="-", search_func=lambda x: "{}\n".format(x), repl=""
|
||||
).run(text)
|
||||
|
||||
def abbreviations(text):
|
||||
"""Remove periods after an abbreviation from a list of known
|
||||
abbreviations that can be spoken the same without that period. This
|
||||
prevents having to handle tokenization of that period.
|
||||
|
||||
Note:
|
||||
Could potentially remove the ending period of a sentence.
|
||||
|
||||
Note:
|
||||
Abbreviations that Google Translate can't pronounce without
|
||||
(or even with) a period should be added as a word substitution with a
|
||||
:class:`PreProcessorSub` pre-processor. Ex.: 'Esq.', 'Esquire'.
|
||||
|
||||
"""
|
||||
return PreProcessorRegex(
|
||||
search_args=symbols.ABBREVIATIONS,
|
||||
search_func=lambda x: r"(?<={})(?=\.).".format(x),
|
||||
@ -642,7 +316,6 @@ class pre_processors:
|
||||
).run(text)
|
||||
|
||||
def word_sub(text):
|
||||
"""Word-for-word substitutions."""
|
||||
return PreProcessorSub(sub_pairs=symbols.SUB_PAIRS).run(text)
|
||||
|
||||
|
||||
@ -651,37 +324,8 @@ from string import whitespace as ws
|
||||
import re
|
||||
|
||||
_ALL_PUNC_OR_SPACE = re.compile("^[{}]*$".format(re.escape(punc + ws)))
|
||||
"""Regex that matches if an entire line is only comprised
|
||||
of whitespace and punctuation
|
||||
|
||||
"""
|
||||
|
||||
|
||||
def _minimize(the_string, delim, max_size):
|
||||
"""Recursively split a string in the largest chunks
|
||||
possible from the highest position of a delimiter all the way
|
||||
to a maximum size
|
||||
|
||||
Args:
|
||||
the_string (string): The string to split.
|
||||
delim (string): The delimiter to split on.
|
||||
max_size (int): The maximum size of a chunk.
|
||||
|
||||
Returns:
|
||||
list: the minimized string in tokens
|
||||
|
||||
Every chunk size will be at minimum ``the_string[0:idx]`` where ``idx``
|
||||
is the highest index of ``delim`` found in ``the_string``; and at maximum
|
||||
``the_string[0:max_size]`` if no ``delim`` was found in ``the_string``.
|
||||
In the latter case, the split will occur at ``the_string[max_size]``
|
||||
which can be any character. The function runs itself again on the rest of
|
||||
``the_string`` (``the_string[idx:]``) until no chunk is larger than
|
||||
``max_size``.
|
||||
|
||||
"""
|
||||
# Remove `delim` from start of `the_string`
|
||||
# i.e. prevent a recursive infinite loop on `the_string[0:0]`
|
||||
# if `the_string` starts with `delim` and is larger than `max_size`
|
||||
if the_string.startswith(delim):
|
||||
the_string = the_string[len(delim) :]
|
||||
|
||||
@ -701,31 +345,10 @@ def _minimize(the_string, delim, max_size):
|
||||
|
||||
|
||||
def _clean_tokens(tokens):
|
||||
"""Clean a list of strings
|
||||
|
||||
Args:
|
||||
tokens (list): A list of strings (tokens) to clean.
|
||||
|
||||
Returns:
|
||||
list: Stripped strings ``tokens`` without the original elements
|
||||
that only consisted of whitespace and/or punctuation characters.
|
||||
|
||||
"""
|
||||
return [t.strip() for t in tokens if not _ALL_PUNC_OR_SPACE.match(t)]
|
||||
|
||||
|
||||
def _translate_url(tld="com", path=""):
|
||||
"""Generates a Google Translate URL
|
||||
|
||||
Args:
|
||||
tld (string): Top-level domain for the Google Translate host,
|
||||
i.e ``https://translate.google.<tld>``. Default is ``com``.
|
||||
path: (string): A path to append to the Google Translate host,
|
||||
i.e ``https://translate.google.com/<path>``. Default is ``""``.
|
||||
|
||||
Returns:
|
||||
string: A Google Translate URL `https://translate.google.<tld>/path`
|
||||
"""
|
||||
_GOOGLE_TTS_URL = "https://translate.google.{}/{}"
|
||||
return _GOOGLE_TTS_URL.format(tld, path)
|
||||
|
||||
@ -738,76 +361,11 @@ log.addHandler(logging.NullHandler())
|
||||
|
||||
|
||||
class Speed:
|
||||
"""Read Speed
|
||||
|
||||
The Google TTS Translate API supports two speeds:
|
||||
Slow: True
|
||||
Normal: None
|
||||
"""
|
||||
|
||||
SLOW = True
|
||||
NORMAL = None
|
||||
|
||||
|
||||
class gTTS:
|
||||
"""gTTS -- Google Text-to-Speech.
|
||||
|
||||
An interface to Google Translate's Text-to-Speech API.
|
||||
|
||||
Args:
|
||||
text (string): The text to be read.
|
||||
tld (string): Top-level domain for the Google Translate host,
|
||||
i.e `https://translate.google.<tld>`. Different Google domains
|
||||
can produce different localized 'accents' for a given
|
||||
language. This is also useful when ``google.com`` might be blocked
|
||||
within a network but a local or different Google host
|
||||
(e.g. ``google.com.hk``) is not. Default is ``com``.
|
||||
lang (string, optional): The language (IETF language tag) to
|
||||
read the text in. Default is ``en``.
|
||||
slow (bool, optional): Reads text more slowly. Defaults to ``False``.
|
||||
lang_check (bool, optional): Strictly enforce an existing ``lang``,
|
||||
to catch a language error early. If set to ``True``,
|
||||
a ``ValueError`` is raised if ``lang`` doesn't exist.
|
||||
Setting ``lang_check`` to ``False`` skips Web requests
|
||||
(to validate language) and therefore speeds up instantiation.
|
||||
Default is ``True``.
|
||||
pre_processor_funcs (list): A list of zero or more functions that are
|
||||
called to transform (pre-process) text before tokenizing. Those
|
||||
functions must take a string and return a string. Defaults to::
|
||||
|
||||
[
|
||||
pre_processors.tone_marks,
|
||||
pre_processors.end_of_line,
|
||||
pre_processors.abbreviations,
|
||||
pre_processors.word_sub
|
||||
]
|
||||
|
||||
tokenizer_func (callable): A function that takes in a string and
|
||||
returns a list of string (tokens). Defaults to::
|
||||
|
||||
Tokenizer([
|
||||
tokenizer_cases.tone_marks,
|
||||
tokenizer_cases.period_comma,
|
||||
tokenizer_cases.colon,
|
||||
tokenizer_cases.other_punctuation
|
||||
]).run
|
||||
|
||||
timeout (float or tuple, optional): Seconds to wait for the server to
|
||||
send data before giving up, as a float, or a ``(connect timeout,
|
||||
read timeout)`` tuple. ``None`` will wait forever (default).
|
||||
|
||||
See Also:
|
||||
:doc:`Pre-processing and tokenizing <tokenizer>`
|
||||
|
||||
Raises:
|
||||
AssertionError: When ``text`` is ``None`` or empty; when there's nothing
|
||||
left to speak after pre-precessing, tokenizing and cleaning.
|
||||
ValueError: When ``lang_check`` is ``True`` and ``lang`` is not supported.
|
||||
RuntimeError: When ``lang_check`` is ``True`` but there's an error loading
|
||||
the languages dictionary.
|
||||
|
||||
"""
|
||||
|
||||
GOOGLE_TTS_MAX_CHARS = 100 # Max characters the Google TTS API takes at a time
|
||||
GOOGLE_TTS_HEADERS = {
|
||||
"Referer": "http://translate.google.com/",
|
||||
@ -913,12 +471,6 @@ class gTTS:
|
||||
return tokens
|
||||
|
||||
def _prepare_requests(self):
|
||||
"""Created the TTS API the request(s) without sending them.
|
||||
|
||||
Returns:
|
||||
list: ``requests.PreparedRequests_``. <https://2.python-requests.org/en/master/api/#requests.PreparedRequest>`_``.
|
||||
"""
|
||||
# TTS API URL
|
||||
translate_url = _translate_url(
|
||||
tld=self.tld, path="_/TranslateWebserverUi/data/batchexecute"
|
||||
)
|
||||
@ -956,14 +508,6 @@ class gTTS:
|
||||
return "f.req={}&".format(urllib.parse.quote(espaced_rpc))
|
||||
|
||||
def stream(self):
|
||||
"""Do the TTS API request(s) and stream bytes
|
||||
|
||||
Raises:
|
||||
:class:`gTTSError`: When there's an error with the API request.
|
||||
|
||||
"""
|
||||
# When disabling ssl verify in requests (for proxies and firewalls),
|
||||
# urllib3 prints an insecure warning on stdout. We disable that.
|
||||
try:
|
||||
requests.packages.urllib3.disable_warnings(
|
||||
requests.packages.urllib3.exceptions.InsecureRequestWarning
|
||||
@ -989,16 +533,6 @@ class gTTS:
|
||||
log.debug("part-%i created", idx)
|
||||
|
||||
def write_to_fp(self, fp):
|
||||
"""Do the TTS API request(s) and write bytes to a file-like object.
|
||||
|
||||
Args:
|
||||
fp (file object): Any file-like object to write the ``mp3`` to.
|
||||
|
||||
Raises:
|
||||
:class:`gTTSError`: When there's an error with the API request.
|
||||
TypeError: When ``fp`` is not a file-like object that takes bytes.
|
||||
|
||||
"""
|
||||
|
||||
try:
|
||||
for idx, decoded in enumerate(self.stream()):
|
||||
@ -1010,15 +544,6 @@ class gTTS:
|
||||
)
|
||||
|
||||
def save(self, savefile):
|
||||
"""Do the TTS API request and write result to file.
|
||||
|
||||
Args:
|
||||
savefile (string): The path and file name to save the ``mp3`` to.
|
||||
|
||||
Raises:
|
||||
:class:`gTTSError`: When there's an error with the API request.
|
||||
|
||||
"""
|
||||
with open(str(savefile), "wb") as f:
|
||||
self.write_to_fp(f)
|
||||
f.flush()
|
||||
@ -1026,8 +551,6 @@ class gTTS:
|
||||
|
||||
|
||||
class gTTSError(Exception):
|
||||
"""Exception that uses context to present a meaningful error message"""
|
||||
|
||||
def __init__(self, msg=None, **kwargs):
|
||||
self.tts = kwargs.pop("tts", None)
|
||||
self.rsp = kwargs.pop("response", None)
|
||||
@ -1040,10 +563,6 @@ class gTTSError(Exception):
|
||||
super(gTTSError, self).__init__(self.msg)
|
||||
|
||||
def infer_msg(self, tts, rsp=None):
|
||||
"""Attempt to guess what went wrong by using known
|
||||
information (e.g. http response) and observed behaviour
|
||||
|
||||
"""
|
||||
cause = "Unknown"
|
||||
|
||||
if rsp is None:
|
||||
@ -1077,7 +596,7 @@ class gTTSError(Exception):
|
||||
|
||||
|
||||
from tts.basettsclass import TTSbase
|
||||
from myutils.config import globalconfig, getlangsrc
|
||||
from myutils.config import getlangsrc
|
||||
|
||||
|
||||
class TTS(TTSbase):
|
||||
|
@ -54,6 +54,7 @@
|
||||
"changecharset": false,
|
||||
"changecharset_charset": 2
|
||||
},
|
||||
"ffmpeg": "",
|
||||
"requestinterval": 1,
|
||||
"keepontop": true,
|
||||
"buttonsize": 20,
|
||||
|
@ -788,5 +788,7 @@
|
||||
"添加时更新模板": "تحديث القالب عند إضافة",
|
||||
"截图后进行OCR": "التعرف الضوئي على الحروف",
|
||||
"优先级": "الأولوية",
|
||||
"编码": "ترميز"
|
||||
"编码": "ترميز",
|
||||
"安装录音驱动": "تثبيت برنامج تشغيل التسجيل",
|
||||
"录音": "تسجيل صوتي"
|
||||
}
|
@ -788,5 +788,7 @@
|
||||
"添加时更新模板": "添加時更新範本",
|
||||
"截图后进行OCR": "截圖後進行OCR",
|
||||
"优先级": "優先順序",
|
||||
"编码": "編碼"
|
||||
"编码": "編碼",
|
||||
"安装录音驱动": "安裝錄音驅動",
|
||||
"录音": "錄音"
|
||||
}
|
@ -788,5 +788,7 @@
|
||||
"添加时更新模板": "Update template when adding",
|
||||
"截图后进行OCR": "Perform OCR after taking screenshots",
|
||||
"优先级": "priority",
|
||||
"编码": "coding"
|
||||
"编码": "coding",
|
||||
"安装录音驱动": "Install recording driver",
|
||||
"录音": "tape"
|
||||
}
|
@ -788,5 +788,7 @@
|
||||
"添加时更新模板": "Actualizar la plantilla al agregar",
|
||||
"截图后进行OCR": "OCR después de la captura de pantalla",
|
||||
"优先级": "Prioridad",
|
||||
"编码": "Codificación"
|
||||
"编码": "Codificación",
|
||||
"安装录音驱动": "Instalación de la unidad de grabación",
|
||||
"录音": "Grabación"
|
||||
}
|
@ -788,5 +788,7 @@
|
||||
"添加时更新模板": "Mettre à jour le modèle lorsque vous l'ajoutez",
|
||||
"截图后进行OCR": "OCR après capture d'écran",
|
||||
"优先级": "Priorité",
|
||||
"编码": "Codage"
|
||||
"编码": "Codage",
|
||||
"安装录音驱动": "Installer le driver d'enregistrement",
|
||||
"录音": "Enregistrement sonore"
|
||||
}
|
@ -788,5 +788,7 @@
|
||||
"添加时更新模板": "Aggiorna modello quando aggiungi",
|
||||
"截图后进行OCR": "Esegui OCR dopo aver scattato screenshot",
|
||||
"优先级": "priorità",
|
||||
"编码": "codifica"
|
||||
"编码": "codifica",
|
||||
"安装录音驱动": "Installa il driver di registrazione",
|
||||
"录音": "nastro"
|
||||
}
|
@ -788,5 +788,7 @@
|
||||
"添加时更新模板": "追加時にテンプレートを更新する",
|
||||
"截图后进行OCR": "スクリーンショット後にOCR",
|
||||
"优先级": "優先度",
|
||||
"编码": "エンコード"
|
||||
"编码": "エンコード",
|
||||
"安装录音驱动": "録音ドライブのインストール",
|
||||
"录音": "レコーディング"
|
||||
}
|
@ -788,5 +788,7 @@
|
||||
"添加时更新模板": "추가 시 템플릿 업데이트",
|
||||
"截图后进行OCR": "캡처해서 OCR 진행하도록 하겠습니다.",
|
||||
"优先级": "우선 순위",
|
||||
"编码": "인코딩"
|
||||
"编码": "인코딩",
|
||||
"安装录音驱动": "녹음 드라이브 설치",
|
||||
"录音": "녹음"
|
||||
}
|
@ -788,5 +788,7 @@
|
||||
"添加时更新模板": "Aktualizuj szablon podczas dodawania",
|
||||
"截图后进行OCR": "Wykonanie OCR po wykonaniu zrzutów ekranu",
|
||||
"优先级": "priorytet",
|
||||
"编码": "kodowanie"
|
||||
"编码": "kodowanie",
|
||||
"安装录音驱动": "Zainstaluj sterownik nagrywania",
|
||||
"录音": "taśma"
|
||||
}
|
@ -788,5 +788,7 @@
|
||||
"添加时更新模板": "Обновить шаблон при добавлении",
|
||||
"截图后进行OCR": "Снимок экрана после OCR",
|
||||
"优先级": "Приоритеты",
|
||||
"编码": "Код"
|
||||
"编码": "Код",
|
||||
"安装录音驱动": "Установка привода звукозаписи",
|
||||
"录音": "Запись"
|
||||
}
|
@ -788,5 +788,7 @@
|
||||
"添加时更新模板": "ปรับปรุงแม่แบบเมื่อคุณเพิ่ม",
|
||||
"截图后进行OCR": "ทำ OCR หลังจากจับภาพหน้าจอ",
|
||||
"优先级": "ลำดับความสำคัญ",
|
||||
"编码": "การเข้ารหัส"
|
||||
"编码": "การเข้ารหัส",
|
||||
"安装录音驱动": "ติดตั้งไดรฟ์บันทึก",
|
||||
"录音": "การบันทึกเสียง"
|
||||
}
|
@ -788,5 +788,7 @@
|
||||
"添加时更新模板": "Eklence şablonu güncelle",
|
||||
"截图后进行OCR": "Ekran fotoğraflarını aldıktan sonra OCR yap",
|
||||
"优先级": "Prioritet",
|
||||
"编码": "coding"
|
||||
"编码": "coding",
|
||||
"安装录音驱动": "Kayıt sürücüsünü kur",
|
||||
"录音": "kaset"
|
||||
}
|
@ -788,5 +788,7 @@
|
||||
"添加时更新模板": "Оновити шаблон під час додавання",
|
||||
"截图后进行OCR": "Виконати OCR після роботи знімків екрана",
|
||||
"优先级": "пріоритет",
|
||||
"编码": "кодування"
|
||||
"编码": "кодування",
|
||||
"安装录音驱动": "Встановити драйвер запису",
|
||||
"录音": "стрічку"
|
||||
}
|
@ -788,5 +788,7 @@
|
||||
"添加时更新模板": "Cập nhật mẫu khi thêm",
|
||||
"截图后进行OCR": "OCR sau khi chụp ảnh màn hình",
|
||||
"优先级": "Ưu tiên",
|
||||
"编码": "Mã hóa"
|
||||
"编码": "Mã hóa",
|
||||
"安装录音驱动": "Cài đặt Recording Drive",
|
||||
"录音": "Ghi âm"
|
||||
}
|
@ -788,5 +788,7 @@
|
||||
"添加时更新模板": "",
|
||||
"截图后进行OCR": "",
|
||||
"优先级": "",
|
||||
"编码": ""
|
||||
"编码": "",
|
||||
"安装录音驱动": "",
|
||||
"录音": ""
|
||||
}
|
@ -29,7 +29,7 @@ include(generate_product_version)
|
||||
|
||||
set(VERSION_MAJOR 2)
|
||||
set(VERSION_MINOR 51)
|
||||
set(VERSION_PATCH 2)
|
||||
set(VERSION_PATCH 3)
|
||||
|
||||
add_library(pch pch.cpp)
|
||||
target_precompile_headers(pch PUBLIC pch.h)
|
||||
|
Loading…
x
Reference in New Issue
Block a user