diff --git a/nanochat/tokenizer.py b/nanochat/tokenizer.py index a2146c2..77d5404 100644 --- a/nanochat/tokenizer.py +++ b/nanochat/tokenizer.py @@ -284,6 +284,8 @@ class RustBPETokenizer: # some conversation surgery is necessary here for now... conversation = copy.deepcopy(conversation) # avoid mutating the original messages = conversation["messages"] + if len(messages) < 2: + raise ValueError("System message must be followed by a user message") assert messages[1]["role"] == "user", "System message must be followed by a user message" messages[1]["content"] = messages[0]["content"] + "\n\n" + messages[1]["content"] messages = messages[1:]