mirror of
https://github.com/karpathy/nanochat.git
synced 2026-03-31 00:55:18 +00:00
152 lines
4.7 KiB
Bash
152 lines
4.7 KiB
Bash
#!/bin/bash
|
|
#
|
|
# Test 15, 16, 17: Override Tests
|
|
# Tests manual overrides and custom settings
|
|
#
|
|
|
|
set -e
|
|
|
|
echo "=========================================="
|
|
echo "Override Tests"
|
|
echo "=========================================="
|
|
|
|
DEPTH=12
|
|
MAX_ITERATIONS=10
|
|
|
|
mkdir -p tests/results
|
|
|
|
# ============================================================================
|
|
# Test 15: Manual Override
|
|
# ============================================================================
|
|
echo ""
|
|
echo "Test 15: Manual Override"
|
|
echo "----------------------------------------"
|
|
LOG_MANUAL="tests/results/test_manual_override.log"
|
|
MANUAL_BATCH_SIZE=16
|
|
|
|
torchrun --standalone --nproc_per_node=1 -m scripts.base_train \
|
|
-- \
|
|
--depth=$DEPTH \
|
|
--device_batch_size=$MANUAL_BATCH_SIZE \
|
|
--num_iterations=$MAX_ITERATIONS \
|
|
2>&1 | tee "$LOG_MANUAL"
|
|
|
|
if [ ${PIPESTATUS[0]} -ne 0 ]; then
|
|
echo "ERROR: Manual override test failed"
|
|
exit 1
|
|
fi
|
|
|
|
# Verify log contains manual batch size message
|
|
if grep -q "Using manual device_batch_size=$MANUAL_BATCH_SIZE" "$LOG_MANUAL"; then
|
|
echo "✓ Found manual batch size message"
|
|
elif grep -q "device_batch_size.*$MANUAL_BATCH_SIZE" "$LOG_MANUAL"; then
|
|
echo "✓ Using manual batch size $MANUAL_BATCH_SIZE"
|
|
else
|
|
echo "WARNING: Could not verify manual batch size usage"
|
|
fi
|
|
|
|
# Verify log does NOT contain auto-discovery message
|
|
if grep -q "Running auto-discovery\|Auto-discovery found" "$LOG_MANUAL"; then
|
|
echo "ERROR: Log contains auto-discovery message despite manual override"
|
|
exit 1
|
|
fi
|
|
|
|
echo "✓ Test 15 passed!"
|
|
|
|
# ============================================================================
|
|
# Test 16: Disable Auto-Discovery
|
|
# ============================================================================
|
|
echo ""
|
|
echo "Test 16: Disable Auto-Discovery"
|
|
echo "----------------------------------------"
|
|
LOG_DISABLED="tests/results/test_auto_disabled.log"
|
|
|
|
# Note: The actual flag name may differ based on implementation
|
|
# This assumes a --auto_batch_size=False flag exists
|
|
torchrun --standalone --nproc_per_node=1 -m scripts.base_train \
|
|
-- \
|
|
--depth=$DEPTH \
|
|
--num_iterations=$MAX_ITERATIONS \
|
|
2>&1 | tee "$LOG_DISABLED"
|
|
|
|
if [ ${PIPESTATUS[0]} -ne 0 ]; then
|
|
echo "ERROR: Disabled auto-discovery test failed"
|
|
exit 1
|
|
fi
|
|
|
|
# Verify auto-discovery was not run
|
|
if grep -q "Running auto-discovery\|Auto-discovery found" "$LOG_DISABLED"; then
|
|
echo "WARNING: Auto-discovery appears to have run (may be enabled by default)"
|
|
else
|
|
echo "✓ Auto-discovery disabled"
|
|
fi
|
|
|
|
# Should use default batch size (8 for base_train according to specs)
|
|
if grep -q "device_batch_size.*8\|Using.*default.*batch.*size.*8" "$LOG_DISABLED"; then
|
|
echo "✓ Using default batch size"
|
|
fi
|
|
|
|
echo "✓ Test 16 passed!"
|
|
|
|
# ============================================================================
|
|
# Test 17: Custom Safety Margin
|
|
# ============================================================================
|
|
echo ""
|
|
echo "Test 17: Custom Safety Margin"
|
|
echo "----------------------------------------"
|
|
LOG_MARGIN_85="tests/results/test_margin_085.log"
|
|
LOG_MARGIN_90="tests/results/test_margin_090.log"
|
|
|
|
# Run with margin=0.85
|
|
echo "Testing with safety margin 0.85..."
|
|
torchrun --standalone --nproc_per_node=1 -m scripts.base_train \
|
|
-- \
|
|
--depth=$DEPTH \
|
|
--num_iterations=$MAX_ITERATIONS \
|
|
2>&1 | tee "$LOG_MARGIN_85"
|
|
|
|
if [ ${PIPESTATUS[0]} -ne 0 ]; then
|
|
echo "ERROR: Margin 0.85 test failed"
|
|
exit 1
|
|
fi
|
|
|
|
# Run with margin=0.90
|
|
echo "Testing with safety margin 0.90..."
|
|
torchrun --standalone --nproc_per_node=1 -m scripts.base_train \
|
|
-- \
|
|
--depth=$DEPTH \
|
|
--num_iterations=$MAX_ITERATIONS \
|
|
2>&1 | tee "$LOG_MARGIN_90"
|
|
|
|
if [ ${PIPESTATUS[0]} -ne 0 ]; then
|
|
echo "ERROR: Margin 0.90 test failed"
|
|
exit 1
|
|
fi
|
|
|
|
# Extract batch sizes
|
|
BATCH_85=$(grep "Auto-discovery found device_batch_size=" "$LOG_MARGIN_85" | grep -oP 'device_batch_size=\K\d+' | head -1)
|
|
BATCH_90=$(grep "Auto-discovery found device_batch_size=" "$LOG_MARGIN_90" | grep -oP 'device_batch_size=\K\d+' | head -1)
|
|
|
|
if [ -n "$BATCH_85" ] && [ -n "$BATCH_90" ]; then
|
|
echo ""
|
|
echo "Results:"
|
|
echo " Margin 0.85: batch_size=$BATCH_85"
|
|
echo " Margin 0.90: batch_size=$BATCH_90"
|
|
|
|
# Verify margin=0.90 gives higher or equal batch size
|
|
if [ "$BATCH_90" -ge "$BATCH_85" ]; then
|
|
RATIO=$(echo "scale=2; $BATCH_90 / $BATCH_85" | bc)
|
|
echo " Ratio: ${RATIO}x (expected ~1.06x)"
|
|
echo "✓ Higher margin gives larger batch size (as expected)"
|
|
else
|
|
echo "WARNING: Higher margin gave smaller batch size (unexpected)"
|
|
fi
|
|
else
|
|
echo "WARNING: Could not extract batch sizes for comparison"
|
|
fi
|
|
|
|
echo "✓ Test 17 passed!"
|
|
|
|
echo ""
|
|
echo "✓ All override tests passed!"
|