diff --git a/scripts/chat_web.py b/scripts/chat_web.py
index cae577d..24258a2 100644
--- a/scripts/chat_web.py
+++ b/scripts/chat_web.py
@@ -257,6 +257,11 @@ async def generate_stream(
     assistant_end = worker.tokenizer.encode_special("<|assistant_end|>")
     bos = worker.tokenizer.get_bos_token_id()
 
+    # Accumulate tokens to properly handle multi-byte UTF-8 characters (like emojis)
+    accumulated_tokens = []
+    # Track the last complete UTF-8 string (without replacement characters)
+    last_clean_text = ""
+
     with worker.autocast_ctx:
         for token_column, token_masks in worker.engine.generate(
             tokens,
@@ -267,11 +272,23 @@ async def generate_stream(
         ):
             token = token_column[0]
 
+            # Stopping criteria
             if token == assistant_end or token == bos:
                 break
 
-            token_text = worker.tokenizer.decode([token])
-            yield f"data: {json.dumps({'token': token_text, 'gpu': worker.gpu_id})}\n\n"
+            # Append the token to sequence
+            accumulated_tokens.append(token)
+            # Decode all accumulated tokens to get proper UTF-8 handling
+            # Note that decode is a quite efficient operation, basically table lookup and string concat
+            current_text = worker.tokenizer.decode(accumulated_tokens)
+            # Only emit text if it doesn't end with a replacement character
+            # This ensures we don't emit incomplete UTF-8 sequences
+            if not current_text.endswith('�'):
+                # Extract only the new text since last clean decode
+                new_text = current_text[len(last_clean_text):]
+                if new_text:  # Only yield if there's new content
+                    yield f"data: {json.dumps({'token': new_text, 'gpu': worker.gpu_id}, ensure_ascii=False)}\n\n"
+                    last_clean_text = current_text
 
     yield f"data: {json.dumps({'done': True})}\n\n"