From 5b27c0c59e5f926f5b8ac17ead63955bca022df3 Mon Sep 17 00:00:00 2001
From: Dustin Loring <Dustinwloring1988@gmail.com>
Date: Fri, 6 Mar 2026 11:20:10 -0500
Subject: [PATCH] Create convert_to_sharegpt.py

added a convert script for convert the current format of the idenitiy conversation for mid training to be compatiable with huggingface so there will be no need for the s3 one anymore
---
 dev/convert_to_sharegpt.py | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)
 create mode 100644 dev/convert_to_sharegpt.py

diff --git a/dev/convert_to_sharegpt.py b/dev/convert_to_sharegpt.py
new file mode 100644
index 0000000..365afce
--- /dev/null
+++ b/dev/convert_to_sharegpt.py
@@ -0,0 +1,24 @@
+import json
+import os
+
+os.chdir(r'C:\tmp')
+
+input_file = 'identity_conversations.jsonl'
+output_file = 'identity_conversations_sharegpt.jsonl'
+
+count = 0
+with open(input_file, 'r', encoding='utf-8') as f_in, open(output_file, 'w', encoding='utf-8') as f_out:
+    for line in f_in:
+        messages = json.loads(line.strip())
+        converted = {'conversations': []}
+        for msg in messages:
+            role = 'human' if msg['role'] == 'user' else 'gpt'
+            converted['conversations'].append({
+                'from': role,
+                'value': msg['content']
+            })
+        f_out.write(json.dumps(converted, ensure_ascii=False) + '\n')
+        count += 1
+
+print(f'Converted {count} conversations to ShareGPT format')
+print(f'Output saved to: {output_file}')
\ No newline at end of file