|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
import unittest
|
|
from transformers import AutoTokenizer
|
|
from .configuration_sapnous import SapnousT1Config
|
|
from .tokenization_sapnous import SapnousT1Tokenizer
|
|
|
|
class TestSapnousTokenizer(unittest.TestCase):
|
|
@classmethod
|
|
def setUpClass(cls):
|
|
cls.tokenizer = SapnousT1Tokenizer(
|
|
vocab_file="vocab.json",
|
|
merges_file="merges.txt"
|
|
)
|
|
|
|
def test_tokenizer_from_pretrained(self):
|
|
tokenizer = AutoTokenizer.from_pretrained(
|
|
"Sapnous-AI/Sapnous-VR-6B",
|
|
trust_remote_code=True
|
|
)
|
|
self.assertIsInstance(tokenizer, SapnousT1Tokenizer)
|
|
|
|
def test_save_load_pretrained(self):
|
|
vocab = self.tokenizer.get_vocab()
|
|
self.assertIsInstance(vocab, dict)
|
|
self.assertGreater(len(vocab), 0)
|
|
|
|
def test_tokenization(self):
|
|
text = "Hello, world!"
|
|
tokens = self.tokenizer.tokenize(text)
|
|
self.assertIsInstance(tokens, list)
|
|
self.assertGreater(len(tokens), 0)
|
|
|
|
def test_special_tokens(self):
|
|
self.assertIsNotNone(self.tokenizer.unk_token)
|
|
self.assertIsNotNone(self.tokenizer.bos_token)
|
|
self.assertIsNotNone(self.tokenizer.eos_token)
|
|
|
|
if __name__ == '__main__':
|
|
unittest.main() |