From 4040d31aab1faf52417f4abc1a57bd522f673a60 Mon Sep 17 00:00:00 2001 From: Qubitium Date: Tue, 21 Oct 2025 02:58:12 +0000 Subject: [PATCH] use non-deprecated thread/gil apis from pyo3 --- rustbpe/src/lib.rs | 4 ++-- tests/test_gsm8k.py | 9 ++++++++- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/rustbpe/src/lib.rs b/rustbpe/src/lib.rs index 273d7f2..e3f0ab2 100644 --- a/rustbpe/src/lib.rs +++ b/rustbpe/src/lib.rs @@ -307,7 +307,7 @@ impl Tokenizer { // Helper: refill `buf` with up to `buffer_size` strings from the Python iterator. // Returns Ok(true) if the iterator is exhausted, Ok(false) otherwise. let refill = |buf: &mut Vec| -> PyResult { - pyo3::Python::with_gil(|py| { + pyo3::Python::attach(|py| { buf.clear(); let it = py_iter.bind(py); loop { @@ -345,7 +345,7 @@ impl Tokenizer { total_sequences += buf.len() as u64; let pattern = self.compiled_pattern.clone(); - let local: AHashMap = py.allow_threads(|| { + let local: AHashMap = py.detach(|| { buf.par_iter() .map(|s| { let mut m: AHashMap = AHashMap::new(); diff --git a/tests/test_gsm8k.py b/tests/test_gsm8k.py index ab58f2f..7dbfc68 100644 --- a/tests/test_gsm8k.py +++ b/tests/test_gsm8k.py @@ -1,6 +1,13 @@ +import sys +from pathlib import Path + import pytest -from tasks.gsm8k import DATASET_CONFIGS, GSM8K +# Ensure the repository root (which contains the ``nanochat`` package) is on sys.path +if "nanochat" not in sys.modules: + sys.path.insert(0, str(Path(__file__).resolve().parents[1])) + +from nanochat.tasks.gsm8k import DATASET_CONFIGS, GSM8K # Simple test to check we are getting the correct rows from the gsm8k datasets. # It does not verify the actual content of the dataset itself.