mirror of
https://github.com/karpathy/nanochat.git
synced 2025-12-06 12:22:18 +00:00
Merge 2f4f20862d into 4a87a0d19f
This commit is contained in:
commit
52c7d23a63
|
|
@ -77,8 +77,7 @@ vocab_size = tokenizer.get_vocab_size()
|
||||||
special_set = set(tokenizer.get_special_tokens())
|
special_set = set(tokenizer.get_special_tokens())
|
||||||
token_strings = [tokenizer.decode([token_id]) for token_id in range(vocab_size)]
|
token_strings = [tokenizer.decode([token_id]) for token_id in range(vocab_size)]
|
||||||
token_bytes = []
|
token_bytes = []
|
||||||
for token_id in range(vocab_size):
|
for token_str in token_strings: # the Python string representation of this token
|
||||||
token_str = token_strings[token_id] # the Python string representation of this token
|
|
||||||
if token_str in special_set:
|
if token_str in special_set:
|
||||||
token_bytes.append(0) # special characters are not counted
|
token_bytes.append(0) # special characters are not counted
|
||||||
else:
|
else:
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue
Block a user