From 02b22a5a13919d19223ecd57c17323b1e2dd5c8a Mon Sep 17 00:00:00 2001 From: Anton Chechetka Date: Sun, 23 Nov 2025 17:51:18 +0100 Subject: [PATCH] Fix relative difference sign in scripts/tok_eval.py --- scripts/tok_eval.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/scripts/tok_eval.py b/scripts/tok_eval.py index 9233d71..c4e35a6 100644 --- a/scripts/tok_eval.py +++ b/scripts/tok_eval.py @@ -212,9 +212,9 @@ def print_comparison(baseline_name, baseline_results, ours_results, all_text): baseline_data = baseline_results[name] ours_data = ours_results[name] - # Calculate relative difference (positive means ours is better, negative means worse) - # Using tokens: fewer tokens is better, so we calculate (baseline_tokens - ours_tokens) / baseline_tokens - relative_diff = ((baseline_data['tokens'] - ours_data['tokens']) / baseline_data['tokens']) * 100 + # Calculate relative difference in total tokens (negative means ours is better, positive means worse) + # We calculate (ours_tokens - baseline_tokens) / baseline_tokens + relative_diff = ((ours_data['tokens'] - baseline_data['tokens']) / baseline_data['tokens']) * 100 # Determine which has better compression (higher ratio = better) if baseline_data['ratio'] > ours_data['ratio']: @@ -256,7 +256,7 @@ for baseline_name in ["GPT-2", "GPT-4"]: for name, text in all_text: baseline_data = baseline_results[name] ours_data = ours_results[name] - relative_diff = ((baseline_data['tokens'] - ours_data['tokens']) / baseline_data['tokens']) * 100 + relative_diff = ((ours_data['tokens'] - baseline_data['tokens']) / baseline_data['tokens']) * 100 lines.append(f"| {name} | {baseline_data['bytes']} | {baseline_data['tokens']} | {baseline_data['ratio']:.2f} | {ours_data['tokens']} | {ours_data['ratio']:.2f} | {relative_diff:+.1f}% |") lines.append("") report_markdown = "\n".join(lines)