Add EOS token at end of sentence to prevent repetition. (#490)

Fix repetition at the end of sentence by adding a <EOS> token id.
This commit is contained in:
chaihahaha 2024-01-12 22:43:43 +08:00 committed by GitHub
parent 734e6782d7
commit f06244e125
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -158,6 +158,7 @@ class TS(basetrans):
) )
input_ids_len = n_tokens.value input_ids_len = n_tokens.value
input_ids_py = [token_ids[i] for i in range(input_ids_len)] input_ids_py = [token_ids[i] for i in range(input_ids_len)]
input_ids_py += [1] # add EOS token to notify the end of sentence and prevent repetition
return input_ids_py return input_ids_py
def decode_from_ids(self, output_ids_py): def decode_from_ids(self, output_ids_py):