Merge 4715fdcf52 into 4a87a0d19f

Merge pull request #299 from samjabrahams/rotary_embedding_head_dim_comment_cleanup
Fix comment: rotary embeddings final dimension size
2025-12-06 04:12:13 +00:00 · 2025-11-18 03:15:16 +03:00 · 2025-11-17 13:29:21 -08:00 · 2025-11-17 11:32:56 -05:00 · 2025-10-29 09:40:24 +01:00 · 2025-10-19 09:40:54 +03:30
4 changed files with 10 additions and 8 deletions
--- a/nanochat/gpt.py
+++ b/nanochat/gpt.py
@ -244,7 +244,7 @@ class GPT(nn.Module):
    def forward(self, idx, targets=None, kv_cache=None, loss_reduction='mean'):
        B, T = idx.size()
-        # Grab the rotary embeddings for the current sequence length (they are of shape (1, seq_len, 1, head_dim))
+        # Grab the rotary embeddings for the current sequence length (they are of shape (1, seq_len, 1, head_dim/2))
        assert T <= self.cos.size(1), f"Sequence length grew beyond the rotary embeddings cache: {T} > {self.cos.size(1)}"
        assert idx.device == self.cos.device, f"Rotary embeddings and idx are on different devices: {idx.device} != {self.cos.device}"
        assert self.cos.dtype == torch.bfloat16, "Rotary embeddings must be in bfloat16"
--- a/rustbpe/Cargo.toml
+++ b/rustbpe/Cargo.toml
@ -13,3 +13,4 @@ pyo3-log = "0.12.4"
 ahash = "0.8.12"
 rayon = "1.11.0"
 compact_str = "0.9.0"
 num-format = "0.4"
--- a/rustbpe/src/lib.rs
+++ b/rustbpe/src/lib.rs
@ -6,6 +6,7 @@ use fancy_regex::Regex;
 use pyo3::prelude::*;
 use ahash::{AHashMap, AHashSet};
 use num_format::{Locale, ToFormattedString};
 use compact_str::CompactString;
 use rayon::prelude::*;
@ -164,15 +165,15 @@ impl Tokenizer {
    fn train_core_incremental(&mut self, mut words: Vec<Word>, counts: Vec<i32>, vocab_size: u32) {
        assert!(vocab_size >= 256, "vocab_size must be at least 256");
        let num_merges = vocab_size - 256;
-        log::info!("Starting BPE training: {} merges to compute", num_merges);
+        log::info!("Starting BPE training: {} merges to compute", num_merges.to_formatted_string(&Locale::en));
        self.merges.clear();
        // ---- Initial pair_counts and where_to_update (parallel) ----
-        log::info!("Computing initial pair counts from {} unique sequences", words.len());
+        log::info!("Computing initial pair counts from {} unique sequences", words.len().to_formatted_string(&Locale::en));
        let (mut pair_counts, mut where_to_update) = count_pairs_parallel(&words, &counts);
        // ---- Build heap ----
-        log::info!("Building heap with {} unique pairs", pair_counts.len());
+        log::info!("Building heap with {} unique pairs", pair_counts.len().to_formatted_string(&Locale::en));
        let mut heap = OctonaryHeap::with_capacity(pair_counts.len());
        for (pair, pos) in where_to_update.drain() {
            let c = *pair_counts.get(&pair).unwrap_or(&0);
@ -375,7 +376,7 @@ impl Tokenizer {
                break;
            }
        }
-        log::info!("Processed {} sequences total, {} unique", total_sequences, counts.len());
+        log::info!("Processed {} sequences total, {} unique", total_sequences.to_formatted_string(&Locale::en), counts.len().to_formatted_string(&Locale::en));
        // Materialize words & counts
        let mut words = Vec::with_capacity(counts.len());
--- a/scripts/tok_eval.py
+++ b/scripts/tok_eval.py
@ -196,9 +196,9 @@ RESET = '\033[0m'
 # Print vocab sizes
 print(f"\nVocab sizes:")
-print(f"GPT-2: {vocab_sizes['gpt2']}")
+print(f"GPT-2: {vocab_sizes['gpt2']:,}")
-print(f"GPT-4: {vocab_sizes['gpt4']}")
+print(f"GPT-4: {vocab_sizes['gpt4']:,}")
-print(f"Ours: {vocab_sizes['ours']}")
+print(f"Ours: {vocab_sizes['ours']:,}")
 def print_comparison(baseline_name, baseline_results, ours_results, all_text):
    """Print comparison table between baseline tokenizer and ours."""
Author	SHA1	Message	Date
Hossein-Lakzaei	de354b6fad	Merge `4715fdcf52` into `4a87a0d19f`	2025-11-18 03:15:16 +03:00
Andrej	4a87a0d19f	Merge pull request #299 from samjabrahams/rotary_embedding_head_dim_comment_cleanup Fix comment: rotary embeddings final dimension size	2025-11-17 13:29:21 -08:00
Sam Abrahams	11e68bf442	Fix comment: rotary embeddings final dimension size	2025-11-17 11:32:56 -05:00
svlandeg	4715fdcf52	Merge branch 'master' into master_hoslak	2025-10-29 09:40:24 +01:00
Hossein-Lakzaei	1a428f2b0b	Fix typo in README.md for "modern" and improve clarity in the description of the nanochat d32 model.	2025-10-19 09:40:54 +03:30
Hossein-Lakzaei	520bdfe081	Merge branch 'master' of https://github.com/HosLak/nanochat	2025-10-17 00:26:24 +03:30
Hossein-Lakzaei	355cc60089	Update logging to use formatted strings for better readability and add num-format dependency and Fix typo in comment	2025-10-17 00:25:16 +03:30
Hossein-Lakzaei	671e8d9fc9	Merge branch 'karpathy:master' into master	2025-10-16 23:47:32 +03:30
Hossein-Lakzaei	52382d58c5	Update logging to use formatted strings for better readability and add num-format dependency	2025-10-16 23:33:01 +03:30