mirror of
https://github.com/HIllya51/LunaTranslator.git
synced 2024-12-29 16:44:13 +08:00
repair
This commit is contained in:
parent
f104630ccb
commit
6213fddc0e
@ -3,6 +3,25 @@ import os
|
|||||||
|
|
||||||
from hiraparse.basehira import basehira
|
from hiraparse.basehira import basehira
|
||||||
|
|
||||||
|
# # 2.1.2 src schema
|
||||||
|
# UnidicFeatures17 = namedtuple('UnidicFeatures17',
|
||||||
|
# ('pos1 pos2 pos3 pos4 cType cForm lForm lemma orth pron '
|
||||||
|
# 'orthBase pronBase goshu iType iForm fType fForm').split(' '))
|
||||||
|
|
||||||
|
# # 2.1.2 bin schema
|
||||||
|
# # The unidic-mecab-2.1.2_bin distribution adds kana accent fields.
|
||||||
|
# UnidicFeatures26 = namedtuple('UnidicFeatures26',
|
||||||
|
# ('pos1 pos2 pos3 pos4 cType cForm lForm lemma orth pron '
|
||||||
|
# 'orthBase pronBase goshu iType iForm fType fForm '
|
||||||
|
# 'kana kanaBase form formBase iConType fConType aType '
|
||||||
|
# 'aConType aModeType').split(' '))
|
||||||
|
|
||||||
|
# # schema used in 2.2.0, 2.3.0
|
||||||
|
# UnidicFeatures29 = namedtuple('UnidicFeatures29', 'pos1 pos2 pos3 pos4 cType '
|
||||||
|
# 'cForm lForm lemma orth pron orthBase pronBase goshu iType iForm fType '
|
||||||
|
# 'fForm iConType fConType type kana kanaBase form formBase aType aConType '
|
||||||
|
# 'aModType lid lemma_id'.split(' '))
|
||||||
|
|
||||||
|
|
||||||
class mecab(basehira):
|
class mecab(basehira):
|
||||||
def init(self) -> None:
|
def init(self) -> None:
|
||||||
@ -20,34 +39,26 @@ class mecab(basehira):
|
|||||||
text, codec
|
text, codec
|
||||||
): # self.kks.parseToNodeList(text):
|
): # self.kks.parseToNodeList(text):
|
||||||
kana = ""
|
kana = ""
|
||||||
pos1 = ""
|
origorig = ""
|
||||||
origorig = None
|
|
||||||
if len(fields):
|
|
||||||
pos1 = fields[0]
|
pos1 = fields[0]
|
||||||
if len(fields) > 29:
|
if len(fields) == 26:
|
||||||
kana = fields[22]
|
|
||||||
elif len(fields) == 29:
|
|
||||||
kana = fields[20]
|
|
||||||
elif 29 > len(fields) >= 26:
|
|
||||||
kana = fields[17]
|
kana = fields[17]
|
||||||
origorig = fields[7]
|
origorig = fields[7]
|
||||||
elif len(fields) > 9:
|
elif len(fields) == 29:
|
||||||
kana = fields[9] # 无kana,用lform代替
|
kana = fields[20]
|
||||||
|
origorig = fields[7]
|
||||||
|
elif len(fields) == 17:
|
||||||
|
kana = fields[9]
|
||||||
|
origorig = fields[7]
|
||||||
elif len(fields) == 9:
|
elif len(fields) == 9:
|
||||||
kana = fields[8] # 7/8均可,issues/514
|
kana = fields[8]
|
||||||
else:
|
origorig = fields[7]
|
||||||
kana = ""
|
|
||||||
if len(fields) >= 8:
|
|
||||||
origorig = fields[7] # unsafe
|
|
||||||
l = 0
|
l = 0
|
||||||
if text[start] == "\n":
|
|
||||||
start += 1
|
|
||||||
while str(node) not in text[start : start + l]:
|
while str(node) not in text[start : start + l]:
|
||||||
l += 1
|
l += 1
|
||||||
orig = text[start : start + l]
|
orig = text[start : start + l]
|
||||||
if origorig is None:
|
|
||||||
origorig = orig
|
|
||||||
|
|
||||||
start += l
|
start += l
|
||||||
hira = kana # .translate(self.h2k)
|
hira = kana # .translate(self.h2k)
|
||||||
|
|
||||||
@ -66,4 +77,9 @@ class mecab(basehira):
|
|||||||
result.append(
|
result.append(
|
||||||
{"orig": orig, "hira": hira, "cixing": pos1, "origorig": origorig}
|
{"orig": orig, "hira": hira, "cixing": pos1, "origorig": origorig}
|
||||||
)
|
)
|
||||||
|
extras=text[start :]
|
||||||
|
if len(extras):
|
||||||
|
result.append(
|
||||||
|
{"orig": extras, "hira": extras, "cixing": '', "origorig": extras}
|
||||||
|
)
|
||||||
return result
|
return result
|
||||||
|
@ -22,7 +22,7 @@ from ctypes import (
|
|||||||
)
|
)
|
||||||
from ctypes.wintypes import WORD, HANDLE, HWND, LONG, DWORD
|
from ctypes.wintypes import WORD, HANDLE, HWND, LONG, DWORD
|
||||||
from windows import WINDOWPLACEMENT
|
from windows import WINDOWPLACEMENT
|
||||||
import gobject
|
import gobject, csv
|
||||||
|
|
||||||
utilsdll = CDLL(gobject.GetDllpath(("winsharedutils32.dll", "winsharedutils64.dll")))
|
utilsdll = CDLL(gobject.GetDllpath(("winsharedutils32.dll", "winsharedutils64.dll")))
|
||||||
|
|
||||||
@ -157,7 +157,7 @@ class mecabwrap:
|
|||||||
res = []
|
res = []
|
||||||
for i in range(num.value):
|
for i in range(num.value):
|
||||||
f = feature[i]
|
f = feature[i]
|
||||||
fields = f.decode(codec).split(",")
|
fields = list(csv.reader([f.decode(codec)]))[0]
|
||||||
res.append((surface[i].decode(codec), fields))
|
res.append((surface[i].decode(codec), fields))
|
||||||
_freestringlist(feature, num.value)
|
_freestringlist(feature, num.value)
|
||||||
_freestringlist(surface, num.value)
|
_freestringlist(surface, num.value)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user