add back comment

2025-12-06 04:12:13 +00:00 · 2025-11-23 08:09:28 -07:00 · 2025-11-23 08:09:28 -07:00 · 2f4f20862d
commit 2f4f20862d
parent 1d719a7c94
1 changed files with 1 additions and 1 deletions
--- a/scripts/tok_train.py
+++ b/scripts/tok_train.py
@ -77,7 +77,7 @@ vocab_size = tokenizer.get_vocab_size()
 special_set = set(tokenizer.get_special_tokens())
 token_strings = [tokenizer.decode([token_id]) for token_id in range(vocab_size)]
 token_bytes = []
-for token_str in token_strings:
+for token_str in token_strings: # the Python string representation of this token
    if token_str in special_set:
        token_bytes.append(0) # special characters are not counted
    else: