Fix empty sentence. (#839)

Properly handle empty sentences when spliting original text.
This commit is contained in:
chaihahaha 2024-06-22 04:32:27 +08:00 committed by GitHub
parent cb418e4f7f
commit 345a288f74
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -256,13 +256,16 @@ class TS(basetrans):
return output_ids_py return output_ids_py
def translate(self, content): def translate(self, content):
delimiters = ['.','','\n',':','','?','','!','','……','',''] delimiters = ['.','','\n',':','','?','','!','','','','',]
content_split = [i for i in re.split('(['+''.join(delimiters)+'])', content) if i] raw_split = [i.strip() for i in re.split('(['+''.join(delimiters)+'])', content)]
content_split = [i for i in raw_split if i]
translated_list = [] translated_list = []
i = 0 i = 0
while i < len(content_split): while i < len(content_split):
sentence = content_split[i] sentence = content_split[i]
if i+1 < len(content_split): while i + 1 < len(content_split):
if content_split[i+1] not in delimiters:
break
i += 1 i += 1
sentence += content_split[i] sentence += content_split[i]
input_ids_py = self.encode_as_ids(sentence) input_ids_py = self.encode_as_ids(sentence)