This commit is contained in:
恍兮惚兮 2024-05-17 13:20:02 +08:00
parent 6f0132a166
commit 220e6e3051
20 changed files with 178 additions and 532 deletions

View File

@ -12,14 +12,14 @@ from PyQt5.QtWidgets import (
QTabWidget, QTabWidget,
QFileDialog, QFileDialog,
QTabBar, QTabBar,
QLabel, QLabel
) )
from PyQt5.QtGui import QPixmap, QImage from PyQt5.QtGui import QPixmap, QImage
from traceback import print_exc from traceback import print_exc
import requests, json import requests, json, subprocess, time
from PyQt5.QtCore import pyqtSignal, Qt from PyQt5.QtCore import pyqtSignal, Qt
import qtawesome, functools, os, base64 import qtawesome, functools, os, base64
import gobject, uuid import gobject, uuid, signal
from myutils.config import globalconfig, _TR, static_data from myutils.config import globalconfig, _TR, static_data
import myutils.ankiconnect as anki import myutils.ankiconnect as anki
from gui.usefulwidget import ( from gui.usefulwidget import (
@ -31,13 +31,65 @@ from gui.usefulwidget import (
getlineedit, getlineedit,
getsimpleswitch, getsimpleswitch,
getcolorbutton, getcolorbutton,
tabadd_lazy, tabadd_lazy
) )
from myutils.wrapper import threader from myutils.wrapper import threader
from myutils.ocrutil import imageCut, ocr_run from myutils.ocrutil import imageCut, ocr_run
from gui.rangeselect import rangeselct_function from gui.rangeselect import rangeselct_function
class ffmpeg_virtual_audio_capturer:
def __init__(self):
os.makedirs("./cache/tts", exist_ok=True)
self.file = os.path.abspath(
os.path.join("./cache/tts", str(time.time()) + ".mp3")
)
try:
self.engine = subprocess.Popen(
os.path.join(
globalconfig["ffmpeg"],
f'ffmpeg.exe -f dshow -i audio="virtual-audio-capturer" "{self.file}"',
),
stdin=subprocess.PIPE,
)
except:
print_exc()
def end(self):
try:
self.engine.stdin.write(b"q")
self.engine.stdin.flush()
except:
pass
class statusbutton(QPushButton):
statuschanged1 = pyqtSignal(int)
statuschanged2 = pyqtSignal(int)
def __init__(self, icons, colors):
super().__init__()
self.idx = 0
self.icons = icons
self.colors = colors
self.clicked.connect(self.setChecked)
self.seticon()
def seticon(self):
self.setIcon(
qtawesome.icon(
self.icons[(self.idx) % len(self.icons)],
color=self.colors[(self.idx) % len(self.colors)],
)
)
def setChecked(self, a0):
super().setChecked(a0)
self.idx += 1
self.statuschanged1.emit((self.idx) % len(self.icons))
self.statuschanged2.emit((self.idx) % len(self.colors))
self.seticon()
class AnkiWindow(QWidget): class AnkiWindow(QWidget):
__ocrsettext = pyqtSignal(str) __ocrsettext = pyqtSignal(str)
refreshhtml = pyqtSignal() refreshhtml = pyqtSignal()
@ -288,8 +340,47 @@ class AnkiWindow(QWidget):
getsimpleswitch(globalconfig["ankiconnect"], "ocrcroped"), getsimpleswitch(globalconfig["ankiconnect"], "ocrcroped"),
) )
layout.addWidget(QLabel())
layout.addRow(_TR("录音"), QLabel())
lb = QLabel()
lb.setOpenExternalLinks(True)
lb.setText(
'<a href="https://github.com/HIllya51/RESOURCES/releases/download/softwares/virtual-audio.zip">virtual-audio-capturer</a>'
)
layout.addRow(_TR("安装录音驱动"), lb)
ffmpegpath = getlineedit(globalconfig, "ffmpeg", readonly=True)
def selectpath():
f = QFileDialog.getExistingDirectory()
if f != "":
ffmpegpath.setText(f)
layout.addRow(
_TR("ffmpeg"),
getboxlayout(
[
ffmpegpath,
getcolorbutton(
"",
"",
selectpath,
icon="fa.gear",
constcolor="#FF69B4",
),
],
makewidget=True,
),
)
return wid return wid
def startorendrecord(self, target: QLineEdit, idx):
if idx == 1:
self.recorder = ffmpeg_virtual_audio_capturer()
else:
self.recorder.end()
target.setText(self.recorder.file)
def createaddtab(self): def createaddtab(self):
layout = QVBoxLayout() layout = QVBoxLayout()
wid = QWidget() wid = QWidget()
@ -311,6 +402,14 @@ class AnkiWindow(QWidget):
self.editpath.textChanged.connect(self.wrappedpixmap) self.editpath.textChanged.connect(self.wrappedpixmap)
self.example = QPlainTextEdit() self.example = QPlainTextEdit()
self.remarks = QTextEdit() self.remarks = QTextEdit()
recordbtn1 = statusbutton(icons=["fa.microphone", "fa.stop"], colors=[""])
recordbtn1.statuschanged1.connect(
functools.partial(self.startorendrecord, self.audiopath)
)
recordbtn2 = statusbutton(icons=["fa.microphone", "fa.stop"], colors=[""])
recordbtn2.statuschanged1.connect(
functools.partial(self.startorendrecord, self.audiopath_sentence)
)
layout.addLayout( layout.addLayout(
getboxlayout( getboxlayout(
[ [
@ -335,11 +434,14 @@ class AnkiWindow(QWidget):
[ [
QLabel(_TR("语音")), QLabel(_TR("语音")),
self.audiopath, self.audiopath,
recordbtn1,
soundbutton, soundbutton,
getcolorbutton( getcolorbutton(
"", "",
"", "",
functools.partial(self.selectaudio), functools.partial(
self.selecfile, self.audiopath
),
icon="fa.gear", icon="fa.gear",
constcolor="#FF69B4", constcolor="#FF69B4",
), ),
@ -349,11 +451,14 @@ class AnkiWindow(QWidget):
[ [
QLabel(_TR("语音_例句")), QLabel(_TR("语音_例句")),
self.audiopath_sentence, self.audiopath_sentence,
recordbtn2,
soundbutton2, soundbutton2,
getcolorbutton( getcolorbutton(
"", "",
"", "",
functools.partial(self.selectaudio2), functools.partial(
self.selecfile, self.audiopath_sentence
),
icon="fa.gear", icon="fa.gear",
constcolor="#FF69B4", constcolor="#FF69B4",
), ),
@ -367,7 +472,9 @@ class AnkiWindow(QWidget):
getcolorbutton( getcolorbutton(
"", "",
"", "",
functools.partial(self.selectimage), functools.partial(
self.selecfile, self.editpath
),
icon="fa.gear", icon="fa.gear",
constcolor="#FF69B4", constcolor="#FF69B4",
), ),
@ -406,23 +513,11 @@ class AnkiWindow(QWidget):
pix = pix.scaled(self.viewimagelabel.size() * rate, Qt.KeepAspectRatio) pix = pix.scaled(self.viewimagelabel.size() * rate, Qt.KeepAspectRatio)
self.viewimagelabel.setPixmap(pix) self.viewimagelabel.setPixmap(pix)
def selectimage(self): def selecfile(self, item):
f = QFileDialog.getOpenFileName() f = QFileDialog.getOpenFileName()
res = f[0] res = f[0]
if res != "": if res != "":
self.editpath.setText(res) item.setText(res)
def selectaudio(self):
f = QFileDialog.getOpenFileName()
res = f[0]
if res != "":
self.audiopath.setText(res)
def selectaudio2(self):
f = QFileDialog.getOpenFileName()
res = f[0]
if res != "":
self.audiopath_sentence.setText(res)
def reset(self, text): def reset(self, text):
self.currentword = text self.currentword = text
@ -578,8 +673,8 @@ class searchwordW(closeashidewindow):
soundbutton.clicked.connect(self.langdu) soundbutton.clicked.connect(self.langdu)
self.searchlayout.addWidget(soundbutton) self.searchlayout.addWidget(soundbutton)
ankiconnect = QPushButton(qtawesome.icon("fa.adn"), "") ankiconnect = statusbutton(icons=["fa.adn"], colors=["", "#FF69B4"])
ankiconnect.clicked.connect(self.onceaddankiwindow) ankiconnect.statuschanged2.connect(self.onceaddankiwindow)
self.searchlayout.addWidget(ankiconnect) self.searchlayout.addWidget(ankiconnect)
self.tab = QTabBar(self) self.tab = QTabBar(self)
@ -596,7 +691,6 @@ class searchwordW(closeashidewindow):
self.textOutput = textOutput self.textOutput = textOutput
self.cache_results = {} self.cache_results = {}
self.hiding = True self.hiding = True
self.addankiwindowidx = 0
tablayout = QVBoxLayout() tablayout = QVBoxLayout()
tablayout.addWidget(self.tab) tablayout.addWidget(self.tab)
@ -604,16 +698,17 @@ class searchwordW(closeashidewindow):
tablayout.setContentsMargins(0, 0, 0, 0) tablayout.setContentsMargins(0, 0, 0, 0)
tablayout.setSpacing(0) tablayout.setSpacing(0)
self.vboxlayout.addLayout(tablayout) self.vboxlayout.addLayout(tablayout)
self.isfirstshowanki = True
def onceaddankiwindow(self): def onceaddankiwindow(self, idx):
if self.addankiwindowidx == 0: if idx == 1:
self.vboxlayout.addWidget(self.ankiwindow) if self.isfirstshowanki:
else: self.vboxlayout.addWidget(self.ankiwindow)
if self.addankiwindowidx % 2 == 0:
self.ankiwindow.show()
else: else:
self.ankiwindow.hide() self.ankiwindow.show()
self.addankiwindowidx += 1 else:
self.ankiwindow.hide()
self.isfirstshowanki = False
def langdu(self): def langdu(self):
if gobject.baseobject.reader: if gobject.baseobject.reader:

View File

@ -399,9 +399,10 @@ def getsimplecombobox(lst, d, k, callback=None):
return s return s
def getlineedit(d, key, callback=None): def getlineedit(d, key, callback=None, readonly=False):
s = QLineEdit() s = QLineEdit()
s.setText(d[key]) s.setText(d[key])
s.setReadOnly(readonly)
s.textChanged.connect(functools.partial(callbackwrap, d, key, callback)) s.textChanged.connect(functools.partial(callbackwrap, d, key, callback))
return s return s

View File

@ -84,22 +84,6 @@ log.addHandler(logging.NullHandler())
def tts_langs(): def tts_langs():
"""Languages Google Text-to-Speech supports.
Returns:
dict: A dictionary of the type `{ '<lang>': '<name>'}`
Where `<lang>` is an IETF language tag such as `en` or `zh-TW`,
and `<name>` is the full English name of the language, such as
`English` or `Chinese (Mandarin/Taiwan)`.
The dictionary returned combines languages from two origins:
- Languages fetched from Google Translate (pre-generated in :mod:`gtts.langs`)
- Languages that are undocumented variations that were observed to work and
present different dialects or accents.
"""
langs = dict() langs = dict()
langs.update(_main_langs()) langs.update(_main_langs())
langs.update(_extra_langs()) langs.update(_extra_langs())
@ -108,16 +92,6 @@ def tts_langs():
def _extra_langs(): def _extra_langs():
"""Define extra languages.
Returns:
dict: A dictionary of extra languages manually defined.
Variations of the ones generated in `_main_langs`,
observed to provide different dialects or accents or
just simply accepted by the Google Translate Text-to-Speech API.
"""
return { return {
# Chinese # Chinese
"zh-TW": "Chinese (Mandarin/Taiwan)", "zh-TW": "Chinese (Mandarin/Taiwan)",
@ -126,25 +100,6 @@ def _extra_langs():
def _fallback_deprecated_lang(lang): def _fallback_deprecated_lang(lang):
"""Languages Google Text-to-Speech used to support.
Language tags that don't work anymore, but that can
fallback to a more general language code to maintain
compatibility.
Args:
lang (string): The language tag.
Returns:
string: The language tag, as-is if not deprecated,
or a fallback if it exits.
Example:
``en-GB`` returns ``en``.
``en-gb`` returns ``en``.
"""
deprecated = { deprecated = {
# '<fallback>': [<list of deprecated langs>] # '<fallback>': [<list of deprecated langs>]
"en": [ "en": [
@ -205,51 +160,6 @@ class symbols:
class RegexBuilder: class RegexBuilder:
r"""Builds regex using arguments passed into a pattern template.
Builds a regex object for which the pattern is made from an argument
passed into a template. If more than one argument is passed (iterable),
each pattern is joined by "|" (regex alternation 'or') to create a
single pattern.
Args:
pattern_args (iteratable): String element(s) to be each passed to
``pattern_func`` to create a regex pattern. Each element is
``re.escape``'d before being passed.
pattern_func (callable): A 'template' function that should take a
string and return a string. It should take an element of
``pattern_args`` and return a valid regex pattern group string.
flags: ``re`` flag(s) to compile with the regex.
Example:
To create a simple regex that matches on the characters "a", "b",
or "c", followed by a period::
>>> rb = RegexBuilder('abc', lambda x: "{}\.".format(x))
Looking at ``rb.regex`` we get the following compiled regex::
>>> print(rb.regex)
'a\.|b\.|c\.'
The above is fairly simple, but this class can help in writing more
complex repetitive regex, making them more readable and easier to
create by using existing data structures.
Example:
To match the character following the words "lorem", "ipsum", "meili"
or "koda"::
>>> words = ['lorem', 'ipsum', 'meili', 'koda']
>>> rb = RegexBuilder(words, lambda x: "(?<={}).".format(x))
Looking at ``rb.regex`` we get the following compiled regex::
>>> print(rb.regex)
'(?<=lorem).|(?<=ipsum).|(?<=meili).|(?<=koda).'
"""
def __init__(self, pattern_args, pattern_func, flags=0): def __init__(self, pattern_args, pattern_func, flags=0):
self.pattern_args = pattern_args self.pattern_args = pattern_args
self.pattern_func = pattern_func self.pattern_func = pattern_func
@ -273,49 +183,6 @@ class RegexBuilder:
class PreProcessorRegex: class PreProcessorRegex:
r"""Regex-based substitution text pre-processor.
Runs a series of regex substitutions (``re.sub``) from each ``regex`` of a
:class:`gtts.tokenizer.core.RegexBuilder` with an extra ``repl``
replacement parameter.
Args:
search_args (iteratable): String element(s) to be each passed to
``search_func`` to create a regex pattern. Each element is
``re.escape``'d before being passed.
search_func (callable): A 'template' function that should take a
string and return a string. It should take an element of
``search_args`` and return a valid regex search pattern string.
repl (string): The common replacement passed to the ``sub`` method for
each ``regex``. Can be a raw string (the case of a regex
backreference, for example)
flags: ``re`` flag(s) to compile with each `regex`.
Example:
Add "!" after the words "lorem" or "ipsum", while ignoring case::
>>> import re
>>> words = ['lorem', 'ipsum']
>>> pp = PreProcessorRegex(words,
... lambda x: "({})".format(x), r'\\1!',
... re.IGNORECASE)
In this case, the regex is a group and the replacement uses its
backreference ``\\1`` (as a raw string). Looking at ``pp`` we get the
following list of search/replacement pairs::
>>> print(pp)
(re.compile('(lorem)', re.IGNORECASE), repl='\1!'),
(re.compile('(ipsum)', re.IGNORECASE), repl='\1!')
It can then be run on any string of text::
>>> pp.run("LOREM ipSuM")
"LOREM! ipSuM!"
See :mod:`gtts.tokenizer.pre_processors` for more examples.
"""
def __init__(self, search_args, search_func, repl, flags=0): def __init__(self, search_args, search_func, repl, flags=0):
self.repl = repl self.repl = repl
@ -327,16 +194,6 @@ class PreProcessorRegex:
self.regexes.append(rb.regex) self.regexes.append(rb.regex)
def run(self, text): def run(self, text):
"""Run each regex substitution on ``text``.
Args:
text (string): the input text.
Returns:
string: text after all substitutions have been sequentially
applied.
"""
for regex in self.regexes: for regex in self.regexes:
text = regex.sub(self.repl, text) text = regex.sub(self.repl, text)
return text return text
@ -349,39 +206,6 @@ class PreProcessorRegex:
class PreProcessorSub: class PreProcessorSub:
r"""Simple substitution text preprocessor.
Performs string-for-string substitution from list a find/replace pairs.
It abstracts :class:`gtts.tokenizer.core.PreProcessorRegex` with a default
simple substitution regex.
Args:
sub_pairs (list): A list of tuples of the style
``(<search str>, <replace str>)``
ignore_case (bool): Ignore case during search. Defaults to ``True``.
Example:
Replace all occurences of "Mac" to "PC" and "Firefox" to "Chrome"::
>>> sub_pairs = [('Mac', 'PC'), ('Firefox', 'Chrome')]
>>> pp = PreProcessorSub(sub_pairs)
Looking at the ``pp``, we get the following list of
search (regex)/replacement pairs::
>>> print(pp)
(re.compile('Mac', re.IGNORECASE), repl='PC'),
(re.compile('Firefox', re.IGNORECASE), repl='Chrome')
It can then be run on any string of text::
>>> pp.run("I use firefox on my mac")
"I use Chrome on my PC"
See :mod:`gtts.tokenizer.pre_processors` for more examples.
"""
def __init__(self, sub_pairs, ignore_case=True): def __init__(self, sub_pairs, ignore_case=True):
def search_func(x): def search_func(x):
return "{}".format(x) return "{}".format(x)
@ -396,16 +220,6 @@ class PreProcessorSub:
self.pre_processors.append(pp) self.pre_processors.append(pp)
def run(self, text): def run(self, text):
"""Run each substitution on ``text``.
Args:
text (string): the input text.
Returns:
string: text after all substitutions have been sequentially
applied.
"""
for pp in self.pre_processors: for pp in self.pre_processors:
text = pp.run(text) text = pp.run(text)
return text return text
@ -415,80 +229,6 @@ class PreProcessorSub:
class Tokenizer: class Tokenizer:
r"""An extensible but simple generic rule-based tokenizer.
A generic and simple string tokenizer that takes a list of functions
(called `tokenizer cases`) returning ``regex`` objects and joins them by
"|" (regex alternation 'or') to create a single regex to use with the
standard ``regex.split()`` function.
``regex_funcs`` is a list of any function that can return a ``regex``
(from ``re.compile()``) object, such as a
:class:`gtts.tokenizer.core.RegexBuilder` instance (and its ``regex``
attribute).
See the :mod:`gtts.tokenizer.tokenizer_cases` module for examples.
Args:
regex_funcs (list): List of compiled ``regex`` objects. Each
function's pattern will be joined into a single pattern and
compiled.
flags: ``re`` flag(s) to compile with the final regex. Defaults to
``re.IGNORECASE``
Note:
When the ``regex`` objects obtained from ``regex_funcs`` are joined,
their individual ``re`` flags are ignored in favour of ``flags``.
Raises:
TypeError: When an element of ``regex_funcs`` is not a function, or
a function that does not return a compiled ``regex`` object.
Warning:
Joined ``regex`` patterns can easily interfere with one another in
unexpected ways. It is recommanded that each tokenizer case operate
on distinct or non-overlapping chracters/sets of characters
(For example, a tokenizer case for the period (".") should also
handle not matching/cutting on decimals, instead of making that
a seperate tokenizer case).
Example:
A tokenizer with a two simple case (*Note: these are bad cases to
tokenize on, this is simply a usage example*)::
>>> import re, RegexBuilder
>>>
>>> def case1():
... return re.compile("\,")
>>>
>>> def case2():
... return RegexBuilder('abc', lambda x: "{}\.".format(x)).regex
>>>
>>> t = Tokenizer([case1, case2])
Looking at ``case1().pattern``, we get::
>>> print(case1().pattern)
'\\,'
Looking at ``case2().pattern``, we get::
>>> print(case2().pattern)
'a\\.|b\\.|c\\.'
Finally, looking at ``t``, we get them combined::
>>> print(t)
're.compile('\\,|a\\.|b\\.|c\\.', re.IGNORECASE)
from: [<function case1 at 0x10bbcdd08>, <function case2 at 0x10b5c5e18>]'
It can then be run on any string of text::
>>> t.run("Hello, my name is Linda a. Call me Lin, b. I'm your friend")
['Hello', ' my name is Linda ', ' Call me Lin', ' ', " I'm your friend"]
"""
def __init__(self, regex_funcs, flags=re.IGNORECASE): def __init__(self, regex_funcs, flags=re.IGNORECASE):
self.regex_funcs = regex_funcs self.regex_funcs = regex_funcs
self.flags = flags self.flags = flags
@ -511,15 +251,6 @@ class Tokenizer:
return re.compile(pattern, self.flags) return re.compile(pattern, self.flags)
def run(self, text): def run(self, text):
"""Tokenize `text`.
Args:
text (string): the input text to tokenize.
Returns:
list: A list of strings (token) split according to the tokenizer cases.
"""
return self.total_regex.split(text) return self.total_regex.split(text)
def __repr__(self): # pragma: no cover def __repr__(self): # pragma: no cover
@ -529,51 +260,22 @@ class Tokenizer:
class tokenizer_cases: class tokenizer_cases:
def tone_marks(): def tone_marks():
"""Keep tone-modifying punctuation by matching following character.
Assumes the `tone_marks` pre-processor was run for cases where there might
not be any space after a tone-modifying punctuation mark.
"""
return RegexBuilder( return RegexBuilder(
pattern_args=symbols.TONE_MARKS, pattern_func=lambda x: "(?<={}).".format(x) pattern_args=symbols.TONE_MARKS, pattern_func=lambda x: "(?<={}).".format(x)
).regex ).regex
def period_comma(): def period_comma():
"""Period and comma case.
Match if not preceded by ".<letter>" and only if followed by space.
Won't cut in the middle/after dotted abbreviations; won't cut numbers.
Note:
Won't match if a dotted abbreviation ends a sentence.
Note:
Won't match the end of a sentence if not followed by a space.
"""
return RegexBuilder( return RegexBuilder(
pattern_args=symbols.PERIOD_COMMA, pattern_args=symbols.PERIOD_COMMA,
pattern_func=lambda x: r"(?<!\.[a-z]){} ".format(x), pattern_func=lambda x: r"(?<!\.[a-z]){} ".format(x),
).regex ).regex
def colon(): def colon():
"""Colon case.
Match a colon ":" only if not preceded by a digit.
Mainly to prevent a cut in the middle of time notations e.g. 10:01
"""
return RegexBuilder( return RegexBuilder(
pattern_args=symbols.COLON, pattern_func=lambda x: r"(?<!\d){}".format(x) pattern_args=symbols.COLON, pattern_func=lambda x: r"(?<!\d){}".format(x)
).regex ).regex
def other_punctuation(): def other_punctuation():
"""Match other punctuation.
Match other punctuation to split on; punctuation that naturally
inserts a break in speech.
"""
punc = "".join( punc = "".join(
set(symbols.ALL_PUNC) set(symbols.ALL_PUNC)
- set(symbols.TONE_MARKS) - set(symbols.TONE_MARKS)
@ -584,11 +286,7 @@ class tokenizer_cases:
pattern_args=punc, pattern_func=lambda x: "{}".format(x) pattern_args=punc, pattern_func=lambda x: "{}".format(x)
).regex ).regex
def legacy_all_punctuation(): # pragma: no cover b/c tested but Coveralls: ¯\_(ツ)_/¯ def legacy_all_punctuation():
"""Match all punctuation.
Use as only tokenizer case to mimic gTTS 1.x tokenization.
"""
punc = symbols.ALL_PUNC punc = symbols.ALL_PUNC
return RegexBuilder( return RegexBuilder(
pattern_args=punc, pattern_func=lambda x: "{}".format(x) pattern_args=punc, pattern_func=lambda x: "{}".format(x)
@ -598,12 +296,6 @@ class tokenizer_cases:
class pre_processors: class pre_processors:
def tone_marks(text): def tone_marks(text):
"""Add a space after tone-modifying punctuation.
Because the `tone_marks` tokenizer case will split after a tone-modifying
punctuation mark, make sure there's whitespace after.
"""
return PreProcessorRegex( return PreProcessorRegex(
search_args=symbols.TONE_MARKS, search_args=symbols.TONE_MARKS,
search_func=lambda x: "(?<={})".format(x), search_func=lambda x: "(?<={})".format(x),
@ -611,29 +303,11 @@ class pre_processors:
).run(text) ).run(text)
def end_of_line(text): def end_of_line(text):
"""Re-form words cut by end-of-line hyphens.
Remove "<hyphen><newline>".
"""
return PreProcessorRegex( return PreProcessorRegex(
search_args="-", search_func=lambda x: "{}\n".format(x), repl="" search_args="-", search_func=lambda x: "{}\n".format(x), repl=""
).run(text) ).run(text)
def abbreviations(text): def abbreviations(text):
"""Remove periods after an abbreviation from a list of known
abbreviations that can be spoken the same without that period. This
prevents having to handle tokenization of that period.
Note:
Could potentially remove the ending period of a sentence.
Note:
Abbreviations that Google Translate can't pronounce without
(or even with) a period should be added as a word substitution with a
:class:`PreProcessorSub` pre-processor. Ex.: 'Esq.', 'Esquire'.
"""
return PreProcessorRegex( return PreProcessorRegex(
search_args=symbols.ABBREVIATIONS, search_args=symbols.ABBREVIATIONS,
search_func=lambda x: r"(?<={})(?=\.).".format(x), search_func=lambda x: r"(?<={})(?=\.).".format(x),
@ -642,7 +316,6 @@ class pre_processors:
).run(text) ).run(text)
def word_sub(text): def word_sub(text):
"""Word-for-word substitutions."""
return PreProcessorSub(sub_pairs=symbols.SUB_PAIRS).run(text) return PreProcessorSub(sub_pairs=symbols.SUB_PAIRS).run(text)
@ -651,37 +324,8 @@ from string import whitespace as ws
import re import re
_ALL_PUNC_OR_SPACE = re.compile("^[{}]*$".format(re.escape(punc + ws))) _ALL_PUNC_OR_SPACE = re.compile("^[{}]*$".format(re.escape(punc + ws)))
"""Regex that matches if an entire line is only comprised
of whitespace and punctuation
"""
def _minimize(the_string, delim, max_size): def _minimize(the_string, delim, max_size):
"""Recursively split a string in the largest chunks
possible from the highest position of a delimiter all the way
to a maximum size
Args:
the_string (string): The string to split.
delim (string): The delimiter to split on.
max_size (int): The maximum size of a chunk.
Returns:
list: the minimized string in tokens
Every chunk size will be at minimum ``the_string[0:idx]`` where ``idx``
is the highest index of ``delim`` found in ``the_string``; and at maximum
``the_string[0:max_size]`` if no ``delim`` was found in ``the_string``.
In the latter case, the split will occur at ``the_string[max_size]``
which can be any character. The function runs itself again on the rest of
``the_string`` (``the_string[idx:]``) until no chunk is larger than
``max_size``.
"""
# Remove `delim` from start of `the_string`
# i.e. prevent a recursive infinite loop on `the_string[0:0]`
# if `the_string` starts with `delim` and is larger than `max_size`
if the_string.startswith(delim): if the_string.startswith(delim):
the_string = the_string[len(delim) :] the_string = the_string[len(delim) :]
@ -701,31 +345,10 @@ def _minimize(the_string, delim, max_size):
def _clean_tokens(tokens): def _clean_tokens(tokens):
"""Clean a list of strings
Args:
tokens (list): A list of strings (tokens) to clean.
Returns:
list: Stripped strings ``tokens`` without the original elements
that only consisted of whitespace and/or punctuation characters.
"""
return [t.strip() for t in tokens if not _ALL_PUNC_OR_SPACE.match(t)] return [t.strip() for t in tokens if not _ALL_PUNC_OR_SPACE.match(t)]
def _translate_url(tld="com", path=""): def _translate_url(tld="com", path=""):
"""Generates a Google Translate URL
Args:
tld (string): Top-level domain for the Google Translate host,
i.e ``https://translate.google.<tld>``. Default is ``com``.
path: (string): A path to append to the Google Translate host,
i.e ``https://translate.google.com/<path>``. Default is ``""``.
Returns:
string: A Google Translate URL `https://translate.google.<tld>/path`
"""
_GOOGLE_TTS_URL = "https://translate.google.{}/{}" _GOOGLE_TTS_URL = "https://translate.google.{}/{}"
return _GOOGLE_TTS_URL.format(tld, path) return _GOOGLE_TTS_URL.format(tld, path)
@ -738,76 +361,11 @@ log.addHandler(logging.NullHandler())
class Speed: class Speed:
"""Read Speed
The Google TTS Translate API supports two speeds:
Slow: True
Normal: None
"""
SLOW = True SLOW = True
NORMAL = None NORMAL = None
class gTTS: class gTTS:
"""gTTS -- Google Text-to-Speech.
An interface to Google Translate's Text-to-Speech API.
Args:
text (string): The text to be read.
tld (string): Top-level domain for the Google Translate host,
i.e `https://translate.google.<tld>`. Different Google domains
can produce different localized 'accents' for a given
language. This is also useful when ``google.com`` might be blocked
within a network but a local or different Google host
(e.g. ``google.com.hk``) is not. Default is ``com``.
lang (string, optional): The language (IETF language tag) to
read the text in. Default is ``en``.
slow (bool, optional): Reads text more slowly. Defaults to ``False``.
lang_check (bool, optional): Strictly enforce an existing ``lang``,
to catch a language error early. If set to ``True``,
a ``ValueError`` is raised if ``lang`` doesn't exist.
Setting ``lang_check`` to ``False`` skips Web requests
(to validate language) and therefore speeds up instantiation.
Default is ``True``.
pre_processor_funcs (list): A list of zero or more functions that are
called to transform (pre-process) text before tokenizing. Those
functions must take a string and return a string. Defaults to::
[
pre_processors.tone_marks,
pre_processors.end_of_line,
pre_processors.abbreviations,
pre_processors.word_sub
]
tokenizer_func (callable): A function that takes in a string and
returns a list of string (tokens). Defaults to::
Tokenizer([
tokenizer_cases.tone_marks,
tokenizer_cases.period_comma,
tokenizer_cases.colon,
tokenizer_cases.other_punctuation
]).run
timeout (float or tuple, optional): Seconds to wait for the server to
send data before giving up, as a float, or a ``(connect timeout,
read timeout)`` tuple. ``None`` will wait forever (default).
See Also:
:doc:`Pre-processing and tokenizing <tokenizer>`
Raises:
AssertionError: When ``text`` is ``None`` or empty; when there's nothing
left to speak after pre-precessing, tokenizing and cleaning.
ValueError: When ``lang_check`` is ``True`` and ``lang`` is not supported.
RuntimeError: When ``lang_check`` is ``True`` but there's an error loading
the languages dictionary.
"""
GOOGLE_TTS_MAX_CHARS = 100 # Max characters the Google TTS API takes at a time GOOGLE_TTS_MAX_CHARS = 100 # Max characters the Google TTS API takes at a time
GOOGLE_TTS_HEADERS = { GOOGLE_TTS_HEADERS = {
"Referer": "http://translate.google.com/", "Referer": "http://translate.google.com/",
@ -913,12 +471,6 @@ class gTTS:
return tokens return tokens
def _prepare_requests(self): def _prepare_requests(self):
"""Created the TTS API the request(s) without sending them.
Returns:
list: ``requests.PreparedRequests_``. <https://2.python-requests.org/en/master/api/#requests.PreparedRequest>`_``.
"""
# TTS API URL
translate_url = _translate_url( translate_url = _translate_url(
tld=self.tld, path="_/TranslateWebserverUi/data/batchexecute" tld=self.tld, path="_/TranslateWebserverUi/data/batchexecute"
) )
@ -956,14 +508,6 @@ class gTTS:
return "f.req={}&".format(urllib.parse.quote(espaced_rpc)) return "f.req={}&".format(urllib.parse.quote(espaced_rpc))
def stream(self): def stream(self):
"""Do the TTS API request(s) and stream bytes
Raises:
:class:`gTTSError`: When there's an error with the API request.
"""
# When disabling ssl verify in requests (for proxies and firewalls),
# urllib3 prints an insecure warning on stdout. We disable that.
try: try:
requests.packages.urllib3.disable_warnings( requests.packages.urllib3.disable_warnings(
requests.packages.urllib3.exceptions.InsecureRequestWarning requests.packages.urllib3.exceptions.InsecureRequestWarning
@ -989,16 +533,6 @@ class gTTS:
log.debug("part-%i created", idx) log.debug("part-%i created", idx)
def write_to_fp(self, fp): def write_to_fp(self, fp):
"""Do the TTS API request(s) and write bytes to a file-like object.
Args:
fp (file object): Any file-like object to write the ``mp3`` to.
Raises:
:class:`gTTSError`: When there's an error with the API request.
TypeError: When ``fp`` is not a file-like object that takes bytes.
"""
try: try:
for idx, decoded in enumerate(self.stream()): for idx, decoded in enumerate(self.stream()):
@ -1010,15 +544,6 @@ class gTTS:
) )
def save(self, savefile): def save(self, savefile):
"""Do the TTS API request and write result to file.
Args:
savefile (string): The path and file name to save the ``mp3`` to.
Raises:
:class:`gTTSError`: When there's an error with the API request.
"""
with open(str(savefile), "wb") as f: with open(str(savefile), "wb") as f:
self.write_to_fp(f) self.write_to_fp(f)
f.flush() f.flush()
@ -1026,8 +551,6 @@ class gTTS:
class gTTSError(Exception): class gTTSError(Exception):
"""Exception that uses context to present a meaningful error message"""
def __init__(self, msg=None, **kwargs): def __init__(self, msg=None, **kwargs):
self.tts = kwargs.pop("tts", None) self.tts = kwargs.pop("tts", None)
self.rsp = kwargs.pop("response", None) self.rsp = kwargs.pop("response", None)
@ -1040,10 +563,6 @@ class gTTSError(Exception):
super(gTTSError, self).__init__(self.msg) super(gTTSError, self).__init__(self.msg)
def infer_msg(self, tts, rsp=None): def infer_msg(self, tts, rsp=None):
"""Attempt to guess what went wrong by using known
information (e.g. http response) and observed behaviour
"""
cause = "Unknown" cause = "Unknown"
if rsp is None: if rsp is None:
@ -1077,7 +596,7 @@ class gTTSError(Exception):
from tts.basettsclass import TTSbase from tts.basettsclass import TTSbase
from myutils.config import globalconfig, getlangsrc from myutils.config import getlangsrc
class TTS(TTSbase): class TTS(TTSbase):

View File

@ -54,6 +54,7 @@
"changecharset": false, "changecharset": false,
"changecharset_charset": 2 "changecharset_charset": 2
}, },
"ffmpeg": "",
"requestinterval": 1, "requestinterval": 1,
"keepontop": true, "keepontop": true,
"buttonsize": 20, "buttonsize": 20,

View File

@ -788,5 +788,7 @@
"添加时更新模板": "تحديث القالب عند إضافة", "添加时更新模板": "تحديث القالب عند إضافة",
"截图后进行OCR": "التعرف الضوئي على الحروف", "截图后进行OCR": "التعرف الضوئي على الحروف",
"优先级": "الأولوية", "优先级": "الأولوية",
"编码": "ترميز" "编码": "ترميز",
"安装录音驱动": "تثبيت برنامج تشغيل التسجيل",
"录音": "تسجيل صوتي"
} }

View File

@ -788,5 +788,7 @@
"添加时更新模板": "添加時更新範本", "添加时更新模板": "添加時更新範本",
"截图后进行OCR": "截圖後進行OCR", "截图后进行OCR": "截圖後進行OCR",
"优先级": "優先順序", "优先级": "優先順序",
"编码": "編碼" "编码": "編碼",
"安装录音驱动": "安裝錄音驅動",
"录音": "錄音"
} }

View File

@ -788,5 +788,7 @@
"添加时更新模板": "Update template when adding", "添加时更新模板": "Update template when adding",
"截图后进行OCR": "Perform OCR after taking screenshots", "截图后进行OCR": "Perform OCR after taking screenshots",
"优先级": "priority", "优先级": "priority",
"编码": "coding" "编码": "coding",
"安装录音驱动": "Install recording driver",
"录音": "tape"
} }

View File

@ -788,5 +788,7 @@
"添加时更新模板": "Actualizar la plantilla al agregar", "添加时更新模板": "Actualizar la plantilla al agregar",
"截图后进行OCR": "OCR después de la captura de pantalla", "截图后进行OCR": "OCR después de la captura de pantalla",
"优先级": "Prioridad", "优先级": "Prioridad",
"编码": "Codificación" "编码": "Codificación",
"安装录音驱动": "Instalación de la unidad de grabación",
"录音": "Grabación"
} }

View File

@ -788,5 +788,7 @@
"添加时更新模板": "Mettre à jour le modèle lorsque vous l'ajoutez", "添加时更新模板": "Mettre à jour le modèle lorsque vous l'ajoutez",
"截图后进行OCR": "OCR après capture d'écran", "截图后进行OCR": "OCR après capture d'écran",
"优先级": "Priorité", "优先级": "Priorité",
"编码": "Codage" "编码": "Codage",
"安装录音驱动": "Installer le driver d'enregistrement",
"录音": "Enregistrement sonore"
} }

View File

@ -788,5 +788,7 @@
"添加时更新模板": "Aggiorna modello quando aggiungi", "添加时更新模板": "Aggiorna modello quando aggiungi",
"截图后进行OCR": "Esegui OCR dopo aver scattato screenshot", "截图后进行OCR": "Esegui OCR dopo aver scattato screenshot",
"优先级": "priorità", "优先级": "priorità",
"编码": "codifica" "编码": "codifica",
"安装录音驱动": "Installa il driver di registrazione",
"录音": "nastro"
} }

View File

@ -788,5 +788,7 @@
"添加时更新模板": "追加時にテンプレートを更新する", "添加时更新模板": "追加時にテンプレートを更新する",
"截图后进行OCR": "スクリーンショット後にOCR", "截图后进行OCR": "スクリーンショット後にOCR",
"优先级": "優先度", "优先级": "優先度",
"编码": "エンコード" "编码": "エンコード",
"安装录音驱动": "録音ドライブのインストール",
"录音": "レコーディング"
} }

View File

@ -788,5 +788,7 @@
"添加时更新模板": "추가 시 템플릿 업데이트", "添加时更新模板": "추가 시 템플릿 업데이트",
"截图后进行OCR": "캡처해서 OCR 진행하도록 하겠습니다.", "截图后进行OCR": "캡처해서 OCR 진행하도록 하겠습니다.",
"优先级": "우선 순위", "优先级": "우선 순위",
"编码": "인코딩" "编码": "인코딩",
"安装录音驱动": "녹음 드라이브 설치",
"录音": "녹음"
} }

View File

@ -788,5 +788,7 @@
"添加时更新模板": "Aktualizuj szablon podczas dodawania", "添加时更新模板": "Aktualizuj szablon podczas dodawania",
"截图后进行OCR": "Wykonanie OCR po wykonaniu zrzutów ekranu", "截图后进行OCR": "Wykonanie OCR po wykonaniu zrzutów ekranu",
"优先级": "priorytet", "优先级": "priorytet",
"编码": "kodowanie" "编码": "kodowanie",
"安装录音驱动": "Zainstaluj sterownik nagrywania",
"录音": "taśma"
} }

View File

@ -788,5 +788,7 @@
"添加时更新模板": "Обновить шаблон при добавлении", "添加时更新模板": "Обновить шаблон при добавлении",
"截图后进行OCR": "Снимок экрана после OCR", "截图后进行OCR": "Снимок экрана после OCR",
"优先级": "Приоритеты", "优先级": "Приоритеты",
"编码": "Код" "编码": "Код",
"安装录音驱动": "Установка привода звукозаписи",
"录音": "Запись"
} }

View File

@ -788,5 +788,7 @@
"添加时更新模板": "ปรับปรุงแม่แบบเมื่อคุณเพิ่ม", "添加时更新模板": "ปรับปรุงแม่แบบเมื่อคุณเพิ่ม",
"截图后进行OCR": "ทำ OCR หลังจากจับภาพหน้าจอ", "截图后进行OCR": "ทำ OCR หลังจากจับภาพหน้าจอ",
"优先级": "ลำดับความสำคัญ", "优先级": "ลำดับความสำคัญ",
"编码": "การเข้ารหัส" "编码": "การเข้ารหัส",
"安装录音驱动": "ติดตั้งไดรฟ์บันทึก",
"录音": "การบันทึกเสียง"
} }

View File

@ -788,5 +788,7 @@
"添加时更新模板": "Eklence şablonu güncelle", "添加时更新模板": "Eklence şablonu güncelle",
"截图后进行OCR": "Ekran fotoğraflarını aldıktan sonra OCR yap", "截图后进行OCR": "Ekran fotoğraflarını aldıktan sonra OCR yap",
"优先级": "Prioritet", "优先级": "Prioritet",
"编码": "coding" "编码": "coding",
"安装录音驱动": "Kayıt sürücüsünü kur",
"录音": "kaset"
} }

View File

@ -788,5 +788,7 @@
"添加时更新模板": "Оновити шаблон під час додавання", "添加时更新模板": "Оновити шаблон під час додавання",
"截图后进行OCR": "Виконати OCR після роботи знімків екрана", "截图后进行OCR": "Виконати OCR після роботи знімків екрана",
"优先级": "пріоритет", "优先级": "пріоритет",
"编码": "кодування" "编码": "кодування",
"安装录音驱动": "Встановити драйвер запису",
"录音": "стрічку"
} }

View File

@ -788,5 +788,7 @@
"添加时更新模板": "Cập nhật mẫu khi thêm", "添加时更新模板": "Cập nhật mẫu khi thêm",
"截图后进行OCR": "OCR sau khi chụp ảnh màn hình", "截图后进行OCR": "OCR sau khi chụp ảnh màn hình",
"优先级": "Ưu tiên", "优先级": "Ưu tiên",
"编码": "Mã hóa" "编码": "Mã hóa",
"安装录音驱动": "Cài đặt Recording Drive",
"录音": "Ghi âm"
} }

View File

@ -788,5 +788,7 @@
"添加时更新模板": "", "添加时更新模板": "",
"截图后进行OCR": "", "截图后进行OCR": "",
"优先级": "", "优先级": "",
"编码": "" "编码": "",
"安装录音驱动": "",
"录音": ""
} }

View File

@ -29,7 +29,7 @@ include(generate_product_version)
set(VERSION_MAJOR 2) set(VERSION_MAJOR 2)
set(VERSION_MINOR 51) set(VERSION_MINOR 51)
set(VERSION_PATCH 2) set(VERSION_PATCH 3)
add_library(pch pch.cpp) add_library(pch pch.cpp)
target_precompile_headers(pch PUBLIC pch.h) target_precompile_headers(pch PUBLIC pch.h)