remove log from tokenizer

#12
by zhouzaida - opened
Files changed (1) hide show
  1. tokenization_moonshot.py +0 -10
tokenization_moonshot.py CHANGED
@@ -94,12 +94,6 @@ class TikTokenTokenizer(PreTrainedTokenizer):
94
  i: added_tokens_decoder[i].content for i in added_tokens_decoder
95
  }
96
 
97
- special_tokens = (
98
- [str(bos_token), str(eos_token)]
99
- + additional_special_tokens
100
- + [str(unk_token), str(pad_token)]
101
- )
102
-
103
  self.vocab_file = vocab_file
104
  mergeable_ranks = load_tiktoken_bpe(vocab_file)
105
  num_base_tokens = len(mergeable_ranks)
@@ -116,15 +110,11 @@ class TikTokenTokenizer(PreTrainedTokenizer):
116
  mergeable_ranks=mergeable_ranks,
117
  special_tokens=self.special_tokens,
118
  )
119
- logger.info(f"Reloaded tiktoken model from {vocab_file}")
120
 
121
  self.n_words: int = self.model.n_vocab
122
  # BOS / EOS token IDs
123
  self.bos_id: int = self.special_tokens[str(bos_token)]
124
  self.eos_id: int = self.special_tokens[str(eos_token)]
125
- logger.info(
126
- f"#words: {self.n_words} - BOS ID: {self.bos_id} - EOS ID: {self.eos_id}"
127
- )
128
 
129
  self.pad_id: int = self.special_tokens[str(pad_token)]
130
  self.unk_id: int = self.special_tokens[str(unk_token)]
 
94
  i: added_tokens_decoder[i].content for i in added_tokens_decoder
95
  }
96
 
 
 
 
 
 
 
97
  self.vocab_file = vocab_file
98
  mergeable_ranks = load_tiktoken_bpe(vocab_file)
99
  num_base_tokens = len(mergeable_ranks)
 
110
  mergeable_ranks=mergeable_ranks,
111
  special_tokens=self.special_tokens,
112
  )
 
113
 
114
  self.n_words: int = self.model.n_vocab
115
  # BOS / EOS token IDs
116
  self.bos_id: int = self.special_tokens[str(bos_token)]
117
  self.eos_id: int = self.special_tokens[str(eos_token)]
 
 
 
118
 
119
  self.pad_id: int = self.special_tokens[str(pad_token)]
120
  self.unk_id: int = self.special_tokens[str(unk_token)]