nanochat/tests/integration/test_throughput_comparison.sh

#!/bin/bash
#
# Test 10: Throughput Measurement
# Compares throughput between manual and auto-discovered batch sizes
#

set -e

echo "=========================================="
echo "Test 10: Throughput Comparison"
echo "=========================================="

DEPTH=12
MAX_ITERATIONS=100
MANUAL_BATCH_SIZE=8

LOG_MANUAL="tests/results/throughput_manual.log"
LOG_AUTO="tests/results/throughput_auto.log"
RESULTS_FILE="tests/results/throughput_comparison.json"
mkdir -p tests/results

# Run 1: Manual batch size
echo ""
echo "Run 1: Manual batch size = $MANUAL_BATCH_SIZE"
echo "----------------------------------------"
START_MANUAL=$(date +%s)

torchrun --standalone --nproc_per_node=1 -m scripts.base_train \
    -- \
    --depth=$DEPTH \
    --device_batch_size=$MANUAL_BATCH_SIZE \
    --num_iterations=$MAX_ITERATIONS \
    2>&1 | tee "$LOG_MANUAL"

END_MANUAL=$(date +%s)
DURATION_MANUAL=$((END_MANUAL - START_MANUAL))

if [ ${PIPESTATUS[0]} -ne 0 ]; then
    echo "ERROR: Manual run failed"
    exit 1
fi

# Run 2: Auto discovery
echo ""
echo "Run 2: Auto-discovery"
echo "----------------------------------------"
START_AUTO=$(date +%s)

torchrun --standalone --nproc_per_node=1 -m scripts.base_train \
    -- \
    --depth=$DEPTH \
    --num_iterations=$MAX_ITERATIONS \
    2>&1 | tee "$LOG_AUTO"

END_AUTO=$(date +%s)
DURATION_AUTO=$((END_AUTO - START_AUTO))

if [ ${PIPESTATUS[0]} -ne 0 ]; then
    echo "ERROR: Auto-discovery run failed"
    exit 1
fi

# Extract batch sizes
AUTO_BATCH_SIZE=$(grep "Auto-discovery found device_batch_size=" "$LOG_AUTO" | grep -oP 'device_batch_size=\K\d+' | head -1)

# Calculate throughput (iterations per second)
# Note: This is approximate since it includes discovery time
THROUGHPUT_MANUAL=$(echo "scale=4; $MAX_ITERATIONS / $DURATION_MANUAL" | bc)
THROUGHPUT_AUTO=$(echo "scale=4; $MAX_ITERATIONS / $DURATION_AUTO" | bc)

# Calculate speedup ratio
SPEEDUP=$(echo "scale=2; $THROUGHPUT_AUTO / $THROUGHPUT_MANUAL" | bc)

echo ""
echo "Results:"
echo "  Manual batch size: $MANUAL_BATCH_SIZE"
echo "  Auto-discovered batch size: $AUTO_BATCH_SIZE"
echo "  Manual duration: ${DURATION_MANUAL}s"
echo "  Auto duration: ${DURATION_AUTO}s"
echo "  Manual throughput: ${THROUGHPUT_MANUAL} iter/s"
echo "  Auto throughput: ${THROUGHPUT_AUTO} iter/s"
echo "  Speedup ratio: ${SPEEDUP}x"

# Save results to JSON
cat > "$RESULTS_FILE" << EOF
{
  "timestamp": "$(date -u +%Y-%m-%dT%H:%M:%SZ)",
  "depth": $DEPTH,
  "max_iterations": $MAX_ITERATIONS,
  "manual": {
    "batch_size": $MANUAL_BATCH_SIZE,
    "duration_seconds": $DURATION_MANUAL,
    "throughput_iter_per_sec": $THROUGHPUT_MANUAL
  },
  "auto": {
    "batch_size": $AUTO_BATCH_SIZE,
    "duration_seconds": $DURATION_AUTO,
    "throughput_iter_per_sec": $THROUGHPUT_AUTO
  },
  "speedup_ratio": $SPEEDUP
}
EOF

echo ""
echo "Results saved to: $RESULTS_FILE"

# Verify speedup is reasonable (allowing some margin)
# Target is 1.5-3x, but we'll accept >= 1.3x considering overhead
SPEEDUP_INT=$(echo "$SPEEDUP" | cut -d. -f1)
if [ "$SPEEDUP_INT" -lt 1 ]; then
    echo "WARNING: Speedup ratio ($SPEEDUP) is less than 1.0"
    echo "         Auto-discovery may not be providing benefit"
    # Don't fail the test, as this could be due to discovery overhead
fi

# Check for minimum speedup of 1.3x (allowing for overhead)
SPEEDUP_THRESHOLD="1.3"
if [ $(echo "$SPEEDUP < $SPEEDUP_THRESHOLD" | bc) -eq 1 ]; then
    echo "WARNING: Speedup ratio ($SPEEDUP) is below threshold ($SPEEDUP_THRESHOLD)"
    echo "         This may be acceptable if discovery overhead is high"
fi

echo ""
echo "✓ Test passed!"
echo "  - Both runs completed successfully"
echo "  - Throughput measured and compared"
echo "  - Results saved for analysis"