| .. |
|
__init__.py
|
initial commit
|
2025-10-13 06:49:24 -07:00 |
|
adamw.py
|
fix: remove unnecessary tensor allocation in DistAdamW optimizer
|
2025-10-20 12:03:26 +03:00 |
|
checkpoint_manager.py
|
ready to run
|
2026-01-08 13:34:34 +00:00 |
|
common.py
|
align the upstream design
|
2026-01-06 05:50:48 +00:00 |
|
configurator.py
|
initial commit
|
2025-10-13 06:49:24 -07:00 |
|
core_eval.py
|
initial commit
|
2025-10-13 06:49:24 -07:00 |
|
dataloader.py
|
align the upstream design
|
2026-01-06 05:50:48 +00:00 |
|
dataset.py
|
align the upstream design
|
2026-01-06 05:50:48 +00:00 |
|
engine.py
|
align the upstream design
|
2026-01-06 05:50:48 +00:00 |
|
execution.py
|
nit delete redundant catch/raise in execute
|
2025-10-29 08:10:03 -07:00 |
|
gpt.py
|
align the upstream design
|
2026-01-06 05:50:48 +00:00 |
|
logo.svg
|
initial commit
|
2025-10-13 06:49:24 -07:00 |
|
loss_eval.py
|
align the upstream design
|
2026-01-06 05:50:48 +00:00 |
|
manager.py
|
align the upstream design
|
2026-01-06 05:50:48 +00:00 |
|
muon.py
|
initial commit
|
2025-10-13 06:49:24 -07:00 |
|
report.py
|
align the upstream design
|
2026-01-06 05:50:48 +00:00 |
|
to_hf.py
|
to_hf adjusted to current imple
|
2026-01-06 06:34:46 +00:00 |
|
tokenizer.py
|
align the upstream design
|
2026-01-06 05:50:48 +00:00 |
|
ui.html
|
fix(ui): prevent iOS Safari toolbar from covering input on initial load
|
2025-10-21 17:34:40 -07:00 |